From 3a4033c7296a665fcbbbcd49a05a420611fa3f3c Mon Sep 17 00:00:00 2001 From: "Lumberbot (aka Jack)" <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 21 Feb 2024 21:46:10 +0100 Subject: [PATCH] Backport PR #57439 on branch 2.2.x (BUG: read_json returning Index instead of RangeIndex) (#57552) Backport PR #57439: BUG: read_json returning Index instead of RangeIndex Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/io/json/_json.py | 25 ++++++++++++++----------- pandas/tests/io/json/test_pandas.py | 16 ++++++++++++++-- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index ab62a7c7598d5..d81032fafa730 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`) - Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`) - Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`) +- Fixed regression in :func:`read_json` where an :class:`Index` would be returned instead of a :class:`RangeIndex` (:issue:`57429`) - Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`) - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`) - Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 4c490c6b2cda2..9414f45215029 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1266,6 +1266,7 @@ def _try_convert_data( if result: return new_data, True + converted = False if self.dtype_backend is not lib.no_default and not is_axis: # Fall through for conversion later on return data, True @@ -1273,16 +1274,17 @@ def _try_convert_data( # try float try: data = data.astype("float64") + converted = True except (TypeError, ValueError): pass - if data.dtype.kind == "f": - if data.dtype != "float64": - # coerce floats to 64 - try: - data = data.astype("float64") - except (TypeError, ValueError): - pass + if data.dtype.kind == "f" and data.dtype != "float64": + # coerce floats to 64 + try: + data = data.astype("float64") + converted = True + except (TypeError, ValueError): + pass # don't coerce 0-len data if len(data) and data.dtype in ("float", "object"): @@ -1291,14 +1293,15 @@ def _try_convert_data( new_data = data.astype("int64") if (new_data == data).all(): data = new_data + converted = True except (TypeError, ValueError, OverflowError): pass - # coerce ints to 64 - if data.dtype == "int": - # coerce floats to 64 + if data.dtype == "int" and data.dtype != "int64": + # coerce ints to 64 try: data = data.astype("int64") + converted = True except (TypeError, ValueError): pass @@ -1307,7 +1310,7 @@ def _try_convert_data( if self.orient == "split": return data, False - return data, True + return data, converted @final def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index caa25841d3596..5279f3f1cdfbe 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -24,6 +24,7 @@ DataFrame, DatetimeIndex, Index, + RangeIndex, Series, Timestamp, date_range, @@ -493,12 +494,12 @@ def test_frame_mixedtype_orient(self): # GH10289 left = read_json(inp, orient=orient, convert_axes=False) tm.assert_frame_equal(left, right) - right.index = pd.RangeIndex(len(df)) + right.index = RangeIndex(len(df)) inp = StringIO(df.to_json(orient="records")) left = read_json(inp, orient="records", convert_axes=False) tm.assert_frame_equal(left, right) - right.columns = pd.RangeIndex(df.shape[1]) + right.columns = RangeIndex(df.shape[1]) inp = StringIO(df.to_json(orient="values")) left = read_json(inp, orient="values", convert_axes=False) tm.assert_frame_equal(left, right) @@ -2188,3 +2189,14 @@ def test_to_json_ea_null(): {"a":null,"b":null} """ assert result == expected + + +def test_read_json_lines_rangeindex(): + # GH 57429 + data = """ +{"a": 1, "b": 2} +{"a": 3, "b": 4} +""" + result = read_json(StringIO(data), lines=True).index + expected = RangeIndex(2) + tm.assert_index_equal(result, expected, exact=True)