Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Index.format #55439

Merged
merged 8 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ For example:
Other Deprecations
^^^^^^^^^^^^^^^^^^
- Changed :meth:`Timedelta.resolution_string` to return ``h``, ``min``, ``s``, ``ms``, ``us``, and ``ns`` instead of ``H``, ``T``, ``S``, ``L``, ``U``, and ``N``, for compatibility with respective deprecations in frequency aliases (:issue:`52536`)
- Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_clipboard`. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_csv` except ``path_or_buf``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_dict`. (:issue:`54229`)
Expand Down Expand Up @@ -260,6 +261,7 @@ Other Deprecations
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_220.performance:
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,14 @@ def format(
"""
Render a string representation of the Index.
"""
warnings.warn(
# GH#55413
f"{type(self).__name__}.format is deprecated and will be removed "
"in a future version. Convert using index.astype(str) or "
"index.map(formatter) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
header = []
if name:
header.append(
Expand All @@ -1383,6 +1391,31 @@ def format(

return self._format_with_header(header=header, na_rep=na_rep)

_default_na_rep = "NaN"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct if the data is a masked array with NA?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ATM Index.format and DTI.format have different defaults for na_rep. Putting this here keeps the same behavior for _format_flat while avoiding the need for overrding the whole method in DTI.

Looks like na_rep is ignored inside format_with_header anyway

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like na_rep is ignored inside format_with_header anyway

That's probably not good, but I guess that can be tackled elsewhere


@final
def _format_flat(
self,
*,
include_name: bool,
formatter: Callable | None = None,
) -> list[str_t]:
"""
Render a string representation of the Index.
"""
header = []
if include_name:
header.append(
pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
if self.name is not None
else ""
)

if formatter is not None:
return header + list(self.map(formatter))

return self._format_with_header(header=header, na_rep=self._default_na_rep)

def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str_t]:
from pandas.io.formats.format import format_array

Expand Down
11 changes: 11 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
cast,
final,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -42,6 +43,7 @@
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_integer,
Expand Down Expand Up @@ -187,6 +189,7 @@ def _convert_tolerance(self, tolerance, target):

# --------------------------------------------------------------------
# Rendering Methods
_default_na_rep = "NaT"

def format(
self,
Expand All @@ -198,6 +201,14 @@ def format(
"""
Render a string representation of the Index.
"""
warnings.warn(
# GH#55413
f"{type(self).__name__}.format is deprecated and will be removed "
"in a future version. Convert using index.astype(str) or "
"index.map(formatter) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
header = []
if name:
header.append(
Expand Down
72 changes: 72 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,15 @@ def format(
sparsify=None,
adjoin: bool = True,
) -> list:
warnings.warn(
# GH#55413
f"{type(self).__name__}.format is deprecated and will be removed "
"in a future version. Convert using index.astype(str) or "
"index.map(formatter) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if name is not None:
names = name

Expand Down Expand Up @@ -1445,6 +1454,69 @@ def format(
else:
return result_levels

def _format_multi(
self,
*,
include_names: bool,
sparsify: bool | None | lib.NoDefault,
formatter: Callable | None = None,
) -> list:
if len(self) == 0:
return []

stringified_levels = []
for lev, level_codes in zip(self.levels, self.codes):
na = _get_na_rep(lev.dtype)

if len(lev) > 0:
taken = formatted = lev.take(level_codes)
formatted = taken._format_flat(include_name=False, formatter=formatter)

# we have some NA
mask = level_codes == -1
if mask.any():
formatted = np.array(formatted, dtype=object)
formatted[mask] = na
formatted = formatted.tolist()

else:
# weird all NA case
formatted = [
pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
for x in algos.take_nd(lev._values, level_codes)
]
stringified_levels.append(formatted)

result_levels = []
for lev, lev_name in zip(stringified_levels, self.names):
level = []

if include_names:
level.append(
pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
if lev_name is not None
else ""
)

level.extend(np.array(lev, dtype=object))
result_levels.append(level)

if sparsify is None:
sparsify = get_option("display.multi_sparse")

if sparsify:
sentinel: Literal[""] | bool | lib.NoDefault = ""
# GH3547 use value of sparsify as sentinel if it's "Falsey"
assert isinstance(sparsify, bool) or sparsify is lib.no_default
if sparsify is lib.no_default:
sentinel = sparsify
# little bit of a kludge job for #1217
result_levels = sparsify_labels(
result_levels, start=int(include_names), sentinel=sentinel
)

return result_levels

# --------------------------------------------------------------------
# Names Methods

Expand Down
8 changes: 4 additions & 4 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,8 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
return

columns = self.columns
level_strs = columns.format(
sparsify=self.merge_cells, adjoin=False, names=False
level_strs = columns._format_multi(
sparsify=self.merge_cells, include_names=False
)
level_lengths = get_level_lengths(level_strs)
coloffset = 0
Expand Down Expand Up @@ -813,8 +813,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:

if self.merge_cells:
# Format hierarchical rows as merged cells.
level_strs = self.df.index.format(
sparsify=True, adjoin=False, names=False
level_strs = self.df.index._format_multi(
sparsify=True, include_names=False
)
level_lengths = get_level_lengths(level_strs)

Expand Down
23 changes: 15 additions & 8 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,14 @@ def to_string(self) -> str:
if len(series) == 0:
return f"{type(self.series).__name__}([], {footer})"

have_header = _has_names(series.index)
fmt_index = self.tr_series.index.format(name=True)
index = series.index
have_header = _has_names(index)
if isinstance(index, MultiIndex):
fmt_index = index._format_multi(include_names=True, sparsify=None)
adj = printing.get_adjustment()
fmt_index = adj.adjoin(2, *fmt_index).split("\n")
else:
fmt_index = index._format_flat(include_name=True)
fmt_values = self._get_formatted_values()

if self.is_truncated_vertically:
Expand Down Expand Up @@ -776,7 +782,7 @@ def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
columns = frame.columns

if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
fmt_columns = columns._format_multi(sparsify=False, include_names=False)
fmt_columns = list(zip(*fmt_columns))
dtypes = self.frame.dtypes._values

Expand All @@ -801,7 +807,7 @@ def space_format(x, y):

str_columns = [list(x) for x in zip(*str_columns)]
else:
fmt_columns = columns.format()
fmt_columns = columns._format_flat(include_name=False)
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [
Expand All @@ -820,14 +826,15 @@ def _get_formatted_index(self, frame: DataFrame) -> list[str]:
fmt = self._get_formatter("__index__")

if isinstance(index, MultiIndex):
fmt_index = index.format(
fmt_index = index._format_multi(
sparsify=self.sparsify,
adjoin=False,
names=self.show_row_idx_names,
include_names=self.show_row_idx_names,
formatter=fmt,
)
else:
fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
fmt_index = [
index._format_flat(include_name=self.show_row_idx_names, formatter=fmt)
]

fmt_index = [
tuple(
Expand Down
14 changes: 8 additions & 6 deletions pandas/io/formats/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def _write_col_header(self, indent: int) -> None:
sentinel = lib.no_default
else:
sentinel = False
levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False)
levels = self.columns._format_multi(sparsify=sentinel, include_names=False)
level_lengths = get_level_lengths(levels, sentinel)
inner_lvl = len(level_lengths) - 1
for lnum, (records, values) in enumerate(zip(level_lengths, levels)):
Expand Down Expand Up @@ -437,7 +437,8 @@ def _write_regular_rows(
if fmt is not None:
index_values = self.fmt.tr_frame.index.map(fmt)
else:
index_values = self.fmt.tr_frame.index.format()
# only reached with non-Multi index
index_values = self.fmt.tr_frame.index._format_flat(include_name=False)

row: list[str] = []
for i in range(nrows):
Expand Down Expand Up @@ -480,13 +481,13 @@ def _write_hierarchical_rows(
nrows = len(frame)

assert isinstance(frame.index, MultiIndex)
idx_values = frame.index.format(sparsify=False, adjoin=False, names=False)
idx_values = frame.index._format_multi(sparsify=False, include_names=False)
idx_values = list(zip(*idx_values))

if self.fmt.sparsify:
# GH3547
sentinel = lib.no_default
levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False)
levels = frame.index._format_multi(sparsify=sentinel, include_names=False)

level_lengths = get_level_lengths(levels, sentinel)
inner_lvl = len(level_lengths) - 1
Expand Down Expand Up @@ -579,7 +580,7 @@ def _write_hierarchical_rows(
)

idx_values = list(
zip(*frame.index.format(sparsify=False, adjoin=False, names=False))
zip(*frame.index._format_multi(sparsify=False, include_names=False))
)
row = []
row.extend(idx_values[i])
Expand All @@ -606,7 +607,8 @@ def _get_formatted_values(self) -> dict[int, list[str]]:
return {i: self.fmt.format_col(i) for i in range(self.ncols)}

def _get_columns_formatted_values(self) -> list[str]:
return self.columns.format()
# only reached with non-Multi Index
return self.columns._format_flat(include_name=False)

def write_style(self) -> None:
# We use the "scoped" attribute here so that the desired
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -1652,9 +1652,9 @@ def _get_level_lengths(
Result is a dictionary of (level, initial_position): span
"""
if isinstance(index, MultiIndex):
levels = index.format(sparsify=lib.no_default, adjoin=False)
levels = index._format_multi(sparsify=lib.no_default, include_names=False)
else:
levels = index.format()
levels = index._format_flat(include_name=False)

if hidden_elements is None:
hidden_elements = []
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/indexes/base_class/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas._config.config as cf

from pandas import Index
import pandas._testing as tm


class TestIndexRendering:
Expand Down Expand Up @@ -133,7 +134,9 @@ def test_summary_bug(self):
def test_index_repr_bool_nan(self):
# GH32146
arr = Index([True, False, np.nan], dtype=object)
exp1 = arr.format()
msg = "Index.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
exp1 = arr.format()
out1 = ["True", "False", "NaN"]
assert out1 == exp1

Expand All @@ -145,4 +148,6 @@ def test_format_different_scalar_lengths(self):
# GH#35439
idx = Index(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
assert idx.format() == expected
msg = r"Index\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
5 changes: 4 additions & 1 deletion pandas/tests/indexes/categorical/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
import pandas._config.config as cf

from pandas import CategoricalIndex
import pandas._testing as tm


class TestCategoricalIndexRepr:
def test_format_different_scalar_lengths(self):
# GH#35439
idx = CategoricalIndex(["aaaaaaaaa", "b"])
expected = ["aaaaaaaaa", "b"]
assert idx.format() == expected
msg = r"CategoricalIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected

def test_string_categorical_index_repr(self):
# short
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexes/datetimes/test_datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
""" generic tests from the Datetimelike class """
from pandas import date_range
import pandas._testing as tm


class TestDatetimeIndex:
def test_format(self):
# GH35439
idx = date_range("20130101", periods=5)
expected = [f"{x:%Y-%m-%d}" for x in idx]
assert idx.format() == expected
msg = r"DatetimeIndex\.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected
Loading