pandas-dev
diff --git a/‎doc/source/whatsnew/v0.17.0.txt
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v0.17.0.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/io/packers.py
Lines changed: 18 additions & 11 deletions b/‎pandas/io/packers.py
Lines changed: 18 additions & 11 deletions
diff --git a/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack
4.57 KB b/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_2.7.10.msgpack
4.57 KB
diff --git a/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack
4.57 KB b/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_AMD64_windows_3.4.3.msgpack
4.57 KB
diff --git a/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack
4.57 KB b/‎pandas/io/tests/data/legacy_msgpack/0.16.2/0.16.2_x86_64_darwin_2.7.10.msgpack
4.57 KB
diff --git a/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle
14.7 KB b/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_2.7.10.pickle
14.7 KB
diff --git a/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle
13.4 KB b/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_AMD64_windows_3.4.3.pickle
13.4 KB
diff --git a/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle
14.5 KB b/‎pandas/io/tests/data/legacy_pickle/0.16.2/0.16.2_x86_64_darwin_2.7.10.pickle
14.5 KB
diff --git a/‎pandas/io/tests/test_cparser.py
Lines changed: 39 additions & 0 deletions b/‎pandas/io/tests/test_cparser.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎pandas/io/tests/test_json/test_ujson.py
Lines changed: 3 additions & 0 deletions b/‎pandas/io/tests/test_json/test_ujson.py
Lines changed: 3 additions & 0 deletions
@@ -370,7 +370,7 @@ Bug Fixes
 - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`)
 - Bug in `pd.eval` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`)
 - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`)
-- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`)
+- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`, :issue:`10630`)
 - Bug in `pandas.read_csv` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype``
   (:issue:`10413`, :issue:`10467`, :issue:`10577`)
 - Bug in `Series.from_csv` with ``header`` kwarg not setting the ``Series.name`` or the ``Series.index.name`` (:issue:`10483`)
 
@@ -169,10 +169,16 @@ def read(fh):
               u('datetime64[us]'): np.dtype('M8[us]'),
               22: np.dtype('m8[ns]'),
               u('timedelta64[ns]'): np.dtype('m8[ns]'),
-              u('timedelta64[us]'): np.dtype('m8[us]')}
+              u('timedelta64[us]'): np.dtype('m8[us]'),
+
+              # this is platform int, which we need to remap to np.int64
+              # for compat on windows platforms
+              7: np.dtype('int64'),
+}
 
 
 def dtype_for(t):
+    """ return my dtype mapping, whether number or name """
     if t in dtype_dict:
         return dtype_dict[t]
     return np.typeDict[t]
@@ -266,7 +272,7 @@ def encode(obj):
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
                     'freq': getattr(obj, 'freqstr', None),
-                    'dtype': obj.dtype.num,
+                    'dtype': obj.dtype.name,
                     'data': convert(obj.asi8),
                     'compress': compressor}
         elif isinstance(obj, DatetimeIndex):
@@ -279,7 +285,7 @@ def encode(obj):
             return {'typ': 'datetime_index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
-                    'dtype': obj.dtype.num,
+                    'dtype': obj.dtype.name,
                     'data': convert(obj.asi8),
                     'freq': getattr(obj, 'freqstr', None),
                     'tz': tz,
@@ -288,14 +294,14 @@ def encode(obj):
             return {'typ': 'multi_index',
                     'klass': obj.__class__.__name__,
                     'names': getattr(obj, 'names', None),
-                    'dtype': obj.dtype.num,
+                    'dtype': obj.dtype.name,
                     'data': convert(obj.values),
                     'compress': compressor}
         else:
             return {'typ': 'index',
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
-                    'dtype': obj.dtype.num,
+                    'dtype': obj.dtype.name,
                     'data': convert(obj.values),
                     'compress': compressor}
     elif isinstance(obj, Series):
@@ -305,7 +311,7 @@ def encode(obj):
             )
             #d = {'typ': 'sparse_series',
             #     'klass': obj.__class__.__name__,
-            #     'dtype': obj.dtype.num,
+            #     'dtype': obj.dtype.name,
             #     'index': obj.index,
             #     'sp_index': obj.sp_index,
             #     'sp_values': convert(obj.sp_values),
@@ -318,7 +324,7 @@ def encode(obj):
                     'klass': obj.__class__.__name__,
                     'name': getattr(obj, 'name', None),
                     'index': obj.index,
-                    'dtype': obj.dtype.num,
+                    'dtype': obj.dtype.name,
                     'data': convert(obj.values),
                     'compress': compressor}
     elif issubclass(tobj, NDFrame):
@@ -360,7 +366,7 @@ def encode(obj):
                                 'locs': b.mgr_locs.as_array,
                                 'values': convert(b.values),
                                 'shape': b.values.shape,
-                                'dtype': b.dtype.num,
+                                'dtype': b.dtype.name,
                                 'klass': b.__class__.__name__,
                                 'compress': compressor
                                 } for b in data.blocks]}
@@ -413,7 +419,7 @@ def encode(obj):
         return {'typ': 'ndarray',
                 'shape': obj.shape,
                 'ndim': obj.ndim,
-                'dtype': obj.dtype.num,
+                'dtype': obj.dtype.name,
                 'data': convert(obj),
                 'compress': compressor}
     elif isinstance(obj, np.number):
@@ -449,11 +455,12 @@ def decode(obj):
         return Period(ordinal=obj['ordinal'], freq=obj['freq'])
     elif typ == 'index':
         dtype = dtype_for(obj['dtype'])
-        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
+        data = unconvert(obj['data'], dtype,
                          obj.get('compress'))
         return globals()[obj['klass']](data, dtype=dtype, name=obj['name'])
     elif typ == 'multi_index':
-        data = unconvert(obj['data'], np.typeDict[obj['dtype']],
+        dtype = dtype_for(obj['dtype'])
+        data = unconvert(obj['data'], dtype,
                          obj.get('compress'))
         data = [tuple(x) for x in data]
         return globals()[obj['klass']].from_tuples(data, names=obj['names'])
 
@@ -186,6 +186,30 @@ def test_header_not_enough_lines(self):
                 '1,2,3\n'
                 '4,5,6')
 
+        reader = TextReader(StringIO(data), delimiter=',', header=2)
+        header = reader.header
+        expected = [['a', 'b', 'c']]
+        self.assertEqual(header, expected)
+
+        recs = reader.read()
+        expected = {0 : [1, 4], 1 : [2, 5], 2 : [3, 6]}
+        assert_array_dicts_equal(expected, recs)
+
+        # not enough rows
+        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
+                          delimiter=',', header=5, as_recarray=True)
+
+    def test_header_not_enough_lines_as_recarray(self):
+
+        if compat.is_platform_windows():
+            raise nose.SkipTest("segfaults on win-64, only when all tests are run")
+
+        data = ('skip this\n'
+                'skip this\n'
+                'a,b,c\n'
+                '1,2,3\n'
+                '4,5,6')
+
         reader = TextReader(StringIO(data), delimiter=',', header=2,
                             as_recarray=True)
         header = reader.header
@@ -246,6 +270,21 @@ def _make_reader(**kwds):
         self.assertTrue((result[0] == ex_values).all())
         self.assertEqual(result[1].dtype, 'S4')
 
+    def test_numpy_string_dtype_as_recarray(self):
+        data = """\
+a,1
+aa,2
+aaa,3
+aaaa,4
+aaaaa,5"""
+
+        if compat.is_platform_windows():
+            raise nose.SkipTest("segfaults on win-64, only when all tests are run")
+
+        def _make_reader(**kwds):
+            return TextReader(StringIO(data), delimiter=',', header=None,
+                              **kwds)
+
         reader = _make_reader(dtype='S4', as_recarray=True)
         result = reader.read()
         self.assertEqual(result['0'].dtype, 'S4')
 
@@ -114,6 +114,9 @@ def test_decimalDecodeTestPrecise(self):
         self.assertEqual(sut, decoded)
 
     def test_encodeDoubleTinyExponential(self):
+        if compat.is_platform_windows() and not compat.PY3:
+            raise nose.SkipTest("buggy on win-64 for py2")
+
         num = 1e-40
         self.assertEqual(num, ujson.decode(ujson.encode(num)))
         num = 1e-100