From 88af8871caa80d90a4417264b9fbadc551240388 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Oct 2023 10:26:42 -0700 Subject: [PATCH] REF: privatize/annotate in io.formats (#55438) * REF: privatize in formats.info * privatize in formats.xml * REF: privatize in formats.xml * TYP: annotate in style_render * lint fixup --- pandas/io/formats/info.py | 52 +++++++-------- pandas/io/formats/style_render.py | 8 +-- pandas/io/formats/xml.py | 103 +++++++++++++++++------------- 3 files changed, 90 insertions(+), 73 deletions(-) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index d20c2a62c61e2..552affbd053f2 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -356,7 +356,7 @@ def _initialize_memory_usage( return memory_usage -class BaseInfo(ABC): +class _BaseInfo(ABC): """ Base class for DataFrameInfo and SeriesInfo. @@ -439,7 +439,7 @@ def render( pass -class DataFrameInfo(BaseInfo): +class DataFrameInfo(_BaseInfo): """ Class storing dataframe-specific info. """ @@ -503,7 +503,7 @@ def render( verbose: bool | None, show_counts: bool | None, ) -> None: - printer = DataFrameInfoPrinter( + printer = _DataFrameInfoPrinter( info=self, max_cols=max_cols, verbose=verbose, @@ -512,7 +512,7 @@ def render( printer.to_buffer(buf) -class SeriesInfo(BaseInfo): +class SeriesInfo(_BaseInfo): """ Class storing series-specific info. """ @@ -538,7 +538,7 @@ def render( "Argument `max_cols` can only be passed " "in DataFrame.info, not Series.info" ) - printer = SeriesInfoPrinter( + printer = _SeriesInfoPrinter( info=self, verbose=verbose, show_counts=show_counts, @@ -572,7 +572,7 @@ def memory_usage_bytes(self) -> int: return self.data.memory_usage(index=True, deep=deep) -class InfoPrinterAbstract: +class _InfoPrinterAbstract: """ Class for printing dataframe or series info. """ @@ -586,11 +586,11 @@ def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None: fmt.buffer_put_lines(buf, lines) @abstractmethod - def _create_table_builder(self) -> TableBuilderAbstract: + def _create_table_builder(self) -> _TableBuilderAbstract: """Create instance of table builder.""" -class DataFrameInfoPrinter(InfoPrinterAbstract): +class _DataFrameInfoPrinter(_InfoPrinterAbstract): """ Class for printing dataframe info. @@ -650,27 +650,27 @@ def _initialize_show_counts(self, show_counts: bool | None) -> bool: else: return show_counts - def _create_table_builder(self) -> DataFrameTableBuilder: + def _create_table_builder(self) -> _DataFrameTableBuilder: """ Create instance of table builder based on verbosity and display settings. """ if self.verbose: - return DataFrameTableBuilderVerbose( + return _DataFrameTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) elif self.verbose is False: # specifically set to False, not necessarily None - return DataFrameTableBuilderNonVerbose(info=self.info) + return _DataFrameTableBuilderNonVerbose(info=self.info) elif self.exceeds_info_cols: - return DataFrameTableBuilderNonVerbose(info=self.info) + return _DataFrameTableBuilderNonVerbose(info=self.info) else: - return DataFrameTableBuilderVerbose( + return _DataFrameTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) -class SeriesInfoPrinter(InfoPrinterAbstract): +class _SeriesInfoPrinter(_InfoPrinterAbstract): """Class for printing series info. Parameters @@ -694,17 +694,17 @@ def __init__( self.verbose = verbose self.show_counts = self._initialize_show_counts(show_counts) - def _create_table_builder(self) -> SeriesTableBuilder: + def _create_table_builder(self) -> _SeriesTableBuilder: """ Create instance of table builder based on verbosity. """ if self.verbose or self.verbose is None: - return SeriesTableBuilderVerbose( + return _SeriesTableBuilderVerbose( info=self.info, with_counts=self.show_counts, ) else: - return SeriesTableBuilderNonVerbose(info=self.info) + return _SeriesTableBuilderNonVerbose(info=self.info) def _initialize_show_counts(self, show_counts: bool | None) -> bool: if show_counts is None: @@ -713,13 +713,13 @@ def _initialize_show_counts(self, show_counts: bool | None) -> bool: return show_counts -class TableBuilderAbstract(ABC): +class _TableBuilderAbstract(ABC): """ Abstract builder for info table. """ _lines: list[str] - info: BaseInfo + info: _BaseInfo @abstractmethod def get_lines(self) -> list[str]: @@ -769,7 +769,7 @@ def add_dtypes_line(self) -> None: self._lines.append(f"dtypes: {', '.join(collected_dtypes)}") -class DataFrameTableBuilder(TableBuilderAbstract): +class _DataFrameTableBuilder(_TableBuilderAbstract): """ Abstract builder for dataframe info table. @@ -820,7 +820,7 @@ def add_memory_usage_line(self) -> None: self._lines.append(f"memory usage: {self.memory_usage_string}") -class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder): +class _DataFrameTableBuilderNonVerbose(_DataFrameTableBuilder): """ Dataframe info table builder for non-verbose output. """ @@ -838,7 +838,7 @@ def add_columns_summary_line(self) -> None: self._lines.append(self.ids._summary(name="Columns")) -class TableBuilderVerboseMixin(TableBuilderAbstract): +class _TableBuilderVerboseMixin(_TableBuilderAbstract): """ Mixin for verbose info output. """ @@ -931,7 +931,7 @@ def _gen_dtypes(self) -> Iterator[str]: yield pprint_thing(dtype) -class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin): +class _DataFrameTableBuilderVerbose(_DataFrameTableBuilder, _TableBuilderVerboseMixin): """ Dataframe info table builder for verbose output. """ @@ -997,7 +997,7 @@ def _gen_columns(self) -> Iterator[str]: yield pprint_thing(col) -class SeriesTableBuilder(TableBuilderAbstract): +class _SeriesTableBuilder(_TableBuilderAbstract): """ Abstract builder for series info table. @@ -1029,7 +1029,7 @@ def _fill_non_empty_info(self) -> None: """Add lines to the info table, pertaining to non-empty series.""" -class SeriesTableBuilderNonVerbose(SeriesTableBuilder): +class _SeriesTableBuilderNonVerbose(_SeriesTableBuilder): """ Series info table builder for non-verbose output. """ @@ -1043,7 +1043,7 @@ def _fill_non_empty_info(self) -> None: self.add_memory_usage_line() -class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin): +class _SeriesTableBuilderVerbose(_SeriesTableBuilder, _TableBuilderVerboseMixin): """ Series info table builder for verbose output. """ diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 90e9b1f0486db..829ed4a33f6a4 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -2357,7 +2357,7 @@ def color(value, user_arg, command, comm_arg): return latex_styles -def _escape_latex(s): +def _escape_latex(s: str) -> str: r""" Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``, ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences. @@ -2392,7 +2392,7 @@ def _escape_latex(s): ) -def _math_mode_with_dollar(s): +def _math_mode_with_dollar(s: str) -> str: r""" All characters in LaTeX math mode are preserved. @@ -2425,7 +2425,7 @@ def _math_mode_with_dollar(s): return "".join(res).replace(r"rt8§=§7wz", r"\$") -def _math_mode_with_parentheses(s): +def _math_mode_with_parentheses(s: str) -> str: r""" All characters in LaTeX math mode are preserved. @@ -2461,7 +2461,7 @@ def _math_mode_with_parentheses(s): return "".join(res) -def _escape_latex_math(s): +def _escape_latex_math(s: str) -> str: r""" All characters in LaTeX math mode are preserved. diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index a6ee8407988ec..f56fca8d7ef44 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -8,11 +8,15 @@ from typing import ( TYPE_CHECKING, Any, + final, ) import warnings from pandas.errors import AbstractMethodError -from pandas.util._decorators import doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.missing import isna @@ -41,7 +45,7 @@ storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path_or_buffer", ) -class BaseXMLFormatter: +class _BaseXMLFormatter: """ Subclass for formatting data in XML. @@ -138,14 +142,14 @@ def __init__( self.storage_options = storage_options self.orig_cols = self.frame.columns.tolist() - self.frame_dicts = self.process_dataframe() + self.frame_dicts = self._process_dataframe() - self.validate_columns() - self.validate_encoding() - self.prefix_uri = self.get_prefix_uri() - self.handle_indexes() + self._validate_columns() + self._validate_encoding() + self.prefix_uri = self._get_prefix_uri() + self._handle_indexes() - def build_tree(self) -> bytes: + def _build_tree(self) -> bytes: """ Build tree from data. @@ -154,7 +158,8 @@ def build_tree(self) -> bytes: """ raise AbstractMethodError(self) - def validate_columns(self) -> None: + @final + def _validate_columns(self) -> None: """ Validate elems_cols and attrs_cols. @@ -175,7 +180,8 @@ def validate_columns(self) -> None: f"{type(self.elem_cols).__name__} is not a valid type for elem_cols" ) - def validate_encoding(self) -> None: + @final + def _validate_encoding(self) -> None: """ Validate encoding. @@ -189,7 +195,8 @@ def validate_encoding(self) -> None: codecs.lookup(self.encoding) - def process_dataframe(self) -> dict[int | str, dict[str, Any]]: + @final + def _process_dataframe(self) -> dict[int | str, dict[str, Any]]: """ Adjust Data Frame to fit xml output. @@ -213,7 +220,8 @@ def process_dataframe(self) -> dict[int | str, dict[str, Any]]: return df.to_dict(orient="index") - def handle_indexes(self) -> None: + @final + def _handle_indexes(self) -> None: """ Handle indexes. @@ -234,7 +242,7 @@ def handle_indexes(self) -> None: if self.elem_cols: self.elem_cols = indexes + self.elem_cols - def get_prefix_uri(self) -> str: + def _get_prefix_uri(self) -> str: """ Get uri of namespace prefix. @@ -248,7 +256,8 @@ def get_prefix_uri(self) -> str: raise AbstractMethodError(self) - def other_namespaces(self) -> dict: + @final + def _other_namespaces(self) -> dict: """ Define other namespaces. @@ -267,7 +276,8 @@ def other_namespaces(self) -> dict: return nmsp_dict - def build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any: + @final + def _build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any: """ Create attributes of row. @@ -287,6 +297,7 @@ def build_attribs(self, d: dict[str, Any], elem_row: Any) -> Any: raise KeyError(f"no valid column, {col}") return elem_row + @final def _get_flat_col_name(self, col: str | tuple) -> str: flat_col = col if isinstance(col, tuple): @@ -297,17 +308,20 @@ def _get_flat_col_name(self, col: str | tuple) -> str: ) return f"{self.prefix_uri}{flat_col}" - def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + @cache_readonly + def _sub_element_cls(self): + raise AbstractMethodError(self) + + @final + def _build_elems(self, d: dict[str, Any], elem_row: Any) -> None: """ Create child elements of row. This method adds child elements using elem_cols to row element and works with tuples for multindex or hierarchical columns. """ + sub_element_cls = self._sub_element_cls - raise AbstractMethodError(self) - - def _build_elems(self, sub_element_cls, d: dict[str, Any], elem_row: Any) -> None: if not self.elem_cols: return @@ -319,8 +333,9 @@ def _build_elems(self, sub_element_cls, d: dict[str, Any], elem_row: Any) -> Non except KeyError: raise KeyError(f"no valid column, {col}") + @final def write_output(self) -> str | None: - xml_doc = self.build_tree() + xml_doc = self._build_tree() if self.path_or_buffer is not None: with get_handle( @@ -337,13 +352,13 @@ def write_output(self) -> str | None: return xml_doc.decode(self.encoding).rstrip() -class EtreeXMLFormatter(BaseXMLFormatter): +class EtreeXMLFormatter(_BaseXMLFormatter): """ Class for formatting data in xml using Python standard library modules: `xml.etree.ElementTree` and `xml.dom.minidom`. """ - def build_tree(self) -> bytes: + def _build_tree(self) -> bytes: from xml.etree.ElementTree import ( Element, SubElement, @@ -351,7 +366,7 @@ def build_tree(self) -> bytes: ) self.root = Element( - f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces() + f"{self.prefix_uri}{self.root_name}", attrib=self._other_namespaces() ) for d in self.frame_dicts.values(): @@ -359,11 +374,11 @@ def build_tree(self) -> bytes: if not self.attr_cols and not self.elem_cols: self.elem_cols = list(d.keys()) - self.build_elems(d, elem_row) + self._build_elems(d, elem_row) else: - elem_row = self.build_attribs(d, elem_row) - self.build_elems(d, elem_row) + elem_row = self._build_attribs(d, elem_row) + self._build_elems(d, elem_row) self.out_xml = tostring( self.root, @@ -373,7 +388,7 @@ def build_tree(self) -> bytes: ) if self.pretty_print: - self.out_xml = self.prettify_tree() + self.out_xml = self._prettify_tree() if self.stylesheet is not None: raise ValueError( @@ -382,7 +397,7 @@ def build_tree(self) -> bytes: return self.out_xml - def get_prefix_uri(self) -> str: + def _get_prefix_uri(self) -> str: from xml.etree.ElementTree import register_namespace uri = "" @@ -402,12 +417,13 @@ def get_prefix_uri(self) -> str: return uri - def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + @cache_readonly + def _sub_element_cls(self): from xml.etree.ElementTree import SubElement - self._build_elems(SubElement, d, elem_row) + return SubElement - def prettify_tree(self) -> bytes: + def _prettify_tree(self) -> bytes: """ Output tree for pretty print format. @@ -421,7 +437,7 @@ def prettify_tree(self) -> bytes: return dom.toprettyxml(indent=" ", encoding=self.encoding) -class LxmlXMLFormatter(BaseXMLFormatter): +class LxmlXMLFormatter(_BaseXMLFormatter): """ Class for formatting data in xml using Python standard library modules: `xml.etree.ElementTree` and `xml.dom.minidom`. @@ -430,9 +446,9 @@ class LxmlXMLFormatter(BaseXMLFormatter): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) - self.convert_empty_str_key() + self._convert_empty_str_key() - def build_tree(self) -> bytes: + def _build_tree(self) -> bytes: """ Build tree from data. @@ -452,11 +468,11 @@ def build_tree(self) -> bytes: if not self.attr_cols and not self.elem_cols: self.elem_cols = list(d.keys()) - self.build_elems(d, elem_row) + self._build_elems(d, elem_row) else: - elem_row = self.build_attribs(d, elem_row) - self.build_elems(d, elem_row) + elem_row = self._build_attribs(d, elem_row) + self._build_elems(d, elem_row) self.out_xml = tostring( self.root, @@ -467,11 +483,11 @@ def build_tree(self) -> bytes: ) if self.stylesheet is not None: - self.out_xml = self.transform_doc() + self.out_xml = self._transform_doc() return self.out_xml - def convert_empty_str_key(self) -> None: + def _convert_empty_str_key(self) -> None: """ Replace zero-length string in `namespaces`. @@ -482,7 +498,7 @@ def convert_empty_str_key(self) -> None: if self.namespaces and "" in self.namespaces.keys(): self.namespaces[None] = self.namespaces.pop("", "default") - def get_prefix_uri(self) -> str: + def _get_prefix_uri(self) -> str: uri = "" if self.namespaces: if self.prefix: @@ -497,12 +513,13 @@ def get_prefix_uri(self) -> str: return uri - def build_elems(self, d: dict[str, Any], elem_row: Any) -> None: + @cache_readonly + def _sub_element_cls(self): from lxml.etree import SubElement - self._build_elems(SubElement, d, elem_row) + return SubElement - def transform_doc(self) -> bytes: + def _transform_doc(self) -> bytes: """ Parse stylesheet from file or buffer and run it.