Skip to content

Commit

Permalink
REF: simplify Categorical.__repr__ (#55391)
Browse files Browse the repository at this point in the history
* REF: remove unused arguments from private Categorical methods

* REF: remove unused args from CategoricalFormatter

* REF: remove CategoricalFormatter
  • Loading branch information
jbrockmendel authored Oct 4, 2023
1 parent 4145278 commit 6d4819b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 101 deletions.
67 changes: 34 additions & 33 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2151,21 +2151,6 @@ def _formatter(self, boxed: bool = False):
# Defer to CategoricalFormatter's formatter.
return None

def _tidy_repr(self, max_vals: int = 10, footer: bool = True) -> str:
"""
a short repr displaying only max_vals and an optional (but default
footer)
"""
num = max_vals // 2
head = self[:num]._get_repr(length=False, footer=False)
tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False)

result = f"{head[:-1]}, ..., {tail[1:]}"
if footer:
result = f"{result}\n{self._repr_footer()}"

return str(result)

def _repr_categories(self) -> list[str]:
"""
return the base repr for the categories
Expand Down Expand Up @@ -2221,33 +2206,49 @@ def _repr_categories_info(self) -> str:
# replace to simple save space by
return f"{levheader}[{levstring.replace(' < ... < ', ' ... ')}]"

def _repr_footer(self) -> str:
info = self._repr_categories_info()
return f"Length: {len(self)}\n{info}"

def _get_repr(
self, length: bool = True, na_rep: str = "NaN", footer: bool = True
) -> str:
def _get_values_repr(self) -> str:
from pandas.io.formats import format as fmt

formatter = fmt.CategoricalFormatter(
self, length=length, na_rep=na_rep, footer=footer
assert len(self) > 0

vals = self._internal_get_values()
fmt_values = fmt.format_array(
vals,
None,
float_format=None,
na_rep="NaN",
quoting=QUOTE_NONNUMERIC,
)
result = formatter.to_string()
return str(result)

fmt_values = [i.strip() for i in fmt_values]
joined = ", ".join(fmt_values)
result = "[" + joined + "]"
return result

def __repr__(self) -> str:
"""
String representation.
"""
_maxlen = 10
if len(self._codes) > _maxlen:
result = self._tidy_repr(_maxlen)
elif len(self._codes) > 0:
result = self._get_repr(length=len(self) > _maxlen)
footer = self._repr_categories_info()
length = len(self)
max_len = 10
if length > max_len:
# In long cases we do not display all entries, so we add Length
# information to the __repr__.
num = max_len // 2
head = self[:num]._get_values_repr()
tail = self[-(max_len - num) :]._get_values_repr()
body = f"{head[:-1]}, ..., {tail[1:]}"
length_info = f"Length: {len(self)}"
result = f"{body}\n{length_info}\n{footer}"
elif length > 0:
body = self._get_values_repr()
result = f"{body}\n{footer}"
else:
msg = self._get_repr(length=False, footer=True).replace("\n", ", ")
result = f"[], {msg}"
# In the empty case we use a comma instead of newline to get
# a more compact __repr__
body = "[]"
result = f"{body}, {footer}"

return result

Expand Down
69 changes: 1 addition & 68 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
Sequence,
)
from contextlib import contextmanager
from csv import (
QUOTE_NONE,
QUOTE_NONNUMERIC,
)
from csv import QUOTE_NONE
from decimal import Decimal
from functools import partial
from io import StringIO
Expand Down Expand Up @@ -200,70 +197,6 @@
"""


class CategoricalFormatter:
def __init__(
self,
categorical: Categorical,
buf: IO[str] | None = None,
length: bool = True,
na_rep: str = "NaN",
footer: bool = True,
) -> None:
self.categorical = categorical
self.buf = buf if buf is not None else StringIO("")
self.na_rep = na_rep
self.length = length
self.footer = footer
self.quoting = QUOTE_NONNUMERIC

def _get_footer(self) -> str:
footer = ""

if self.length:
if footer:
footer += ", "
footer += f"Length: {len(self.categorical)}"

level_info = self.categorical._repr_categories_info()

# Levels are added in a newline
if footer:
footer += "\n"
footer += level_info

return str(footer)

def _get_formatted_values(self) -> list[str]:
return format_array(
self.categorical._internal_get_values(),
None,
float_format=None,
na_rep=self.na_rep,
quoting=self.quoting,
)

def to_string(self) -> str:
categorical = self.categorical

if len(categorical) == 0:
if self.footer:
return self._get_footer()
else:
return ""

fmt_values = self._get_formatted_values()

fmt_values = [i.strip() for i in fmt_values]
values = ", ".join(fmt_values)
result = ["[" + values + "]"]
if self.footer:
footer = self._get_footer()
if footer:
result.append(footer)

return str("\n".join(result))


class SeriesFormatter:
def __init__(
self,
Expand Down

0 comments on commit 6d4819b

Please sign in to comment.