Skip to content

Commit

Permalink
Backport PR #57439 on branch 2.2.x (BUG: read_json returning Index in…
Browse files Browse the repository at this point in the history
…stead of RangeIndex) (#57552)

Backport PR #57439: BUG: read_json returning Index instead of RangeIndex

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
meeseeksmachine and mroeschke authored Feb 21, 2024
1 parent c101d30 commit 3a4033c
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)
- Fixed regression in :func:`read_json` where an :class:`Index` would be returned instead of a :class:`RangeIndex` (:issue:`57429`)
- Fixed regression in :func:`wide_to_long` raising an ``AttributeError`` for string columns (:issue:`57066`)
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
- Fixed regression in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
Expand Down
25 changes: 14 additions & 11 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,23 +1266,25 @@ def _try_convert_data(
if result:
return new_data, True

converted = False
if self.dtype_backend is not lib.no_default and not is_axis:
# Fall through for conversion later on
return data, True
elif is_string_dtype(data.dtype):
# try float
try:
data = data.astype("float64")
converted = True
except (TypeError, ValueError):
pass

if data.dtype.kind == "f":
if data.dtype != "float64":
# coerce floats to 64
try:
data = data.astype("float64")
except (TypeError, ValueError):
pass
if data.dtype.kind == "f" and data.dtype != "float64":
# coerce floats to 64
try:
data = data.astype("float64")
converted = True
except (TypeError, ValueError):
pass

# don't coerce 0-len data
if len(data) and data.dtype in ("float", "object"):
Expand All @@ -1291,14 +1293,15 @@ def _try_convert_data(
new_data = data.astype("int64")
if (new_data == data).all():
data = new_data
converted = True
except (TypeError, ValueError, OverflowError):
pass

# coerce ints to 64
if data.dtype == "int":
# coerce floats to 64
if data.dtype == "int" and data.dtype != "int64":
# coerce ints to 64
try:
data = data.astype("int64")
converted = True
except (TypeError, ValueError):
pass

Expand All @@ -1307,7 +1310,7 @@ def _try_convert_data(
if self.orient == "split":
return data, False

return data, True
return data, converted

@final
def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DataFrame,
DatetimeIndex,
Index,
RangeIndex,
Series,
Timestamp,
date_range,
Expand Down Expand Up @@ -493,12 +494,12 @@ def test_frame_mixedtype_orient(self): # GH10289
left = read_json(inp, orient=orient, convert_axes=False)
tm.assert_frame_equal(left, right)

right.index = pd.RangeIndex(len(df))
right.index = RangeIndex(len(df))
inp = StringIO(df.to_json(orient="records"))
left = read_json(inp, orient="records", convert_axes=False)
tm.assert_frame_equal(left, right)

right.columns = pd.RangeIndex(df.shape[1])
right.columns = RangeIndex(df.shape[1])
inp = StringIO(df.to_json(orient="values"))
left = read_json(inp, orient="values", convert_axes=False)
tm.assert_frame_equal(left, right)
Expand Down Expand Up @@ -2188,3 +2189,14 @@ def test_to_json_ea_null():
{"a":null,"b":null}
"""
assert result == expected


def test_read_json_lines_rangeindex():
# GH 57429
data = """
{"a": 1, "b": 2}
{"a": 3, "b": 4}
"""
result = read_json(StringIO(data), lines=True).index
expected = RangeIndex(2)
tm.assert_index_equal(result, expected, exact=True)

0 comments on commit 3a4033c

Please sign in to comment.