From cf9f51336b0ca99ef7ed686481e406c559ffb3d4 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 21 Dec 2017 21:09:28 -0500 Subject: [PATCH] Fixed read_json int overflow --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/io/json/json.py | 2 +- pandas/tests/io/json/test_pandas.py | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 40e1e2011479c..348c1c6dafbcb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -310,6 +310,7 @@ I/O - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) - Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) - Plotting diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 0e0aae0506809..bb435c625ff35 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -724,7 +724,7 @@ def _try_convert_to_date(self, data): if new_data.dtype == 'object': try: new_data = data.astype('int64') - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): pass # ignore numbers that are out of range diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7cf3d6cd7b612..10139eb07a925 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1074,6 +1074,20 @@ def test_read_jsonl_unicode_chars(self): columns=['a', 'b']) assert_frame_equal(result, expected) + def test_read_json_large_numbers(self): + # GH18842 + json = '{"articleId": "1404366058080022500245"}' + json = StringIO(json) + result = read_json(json, typ="series") + expected = Series(1.404366e+21, index=['articleId']) + assert_series_equal(result, expected) + + json = '{"0": {"articleId": "1404366058080022500245"}}' + json = StringIO(json) + result = read_json(json) + expected = DataFrame(1.404366e+21, index=['articleId'], columns=[0]) + assert_frame_equal(result, expected) + def test_to_jsonl(self): # GH9180 df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])