Merge branch 'main' into allow_pep8_validation_of_multiple_files

pandas-dev · Mar 20, 2024 · 39b9e2e · 39b9e2e
2 parents 574de8a + 924f246
commit 39b9e2e
Show file tree

Hide file tree

Showing 16 changed files with 84 additions and 58 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -78,7 +78,7 @@ repos:
     hooks:
     -   id: pylint
         stages: [manual]
-        args: [--load-plugins=pylint.extensions.redefined_loop_name]
+        args: [--load-plugins=pylint.extensions.redefined_loop_name, --fail-on=I0021]
     -   id: pylint
         alias: redefined-outer-name
         name: Redefining name from outer scope

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -357,6 +357,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
+- Now all ``Mapping`` s are pretty printed correctly. Before only literal ``dict`` s were. (:issue:`57915`)
 -
 -
 

diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
@@ -185,11 +185,11 @@ cdef inline const char* get_c_string(str py_string) except NULL:
     return get_c_string_buf_and_size(py_string, NULL)
 
 
-cdef inline bytes string_encode_locale(str py_string) noexcept:
+cdef inline bytes string_encode_locale(str py_string):
     """As opposed to PyUnicode_Encode, use current system locale to encode."""
     return PyUnicode_EncodeLocale(py_string, NULL)
 
 
-cdef inline object char_to_string_locale(const char* data) noexcept:
+cdef inline object char_to_string_locale(const char* data):
     """As opposed to PyUnicode_FromString, use current system locale to decode."""
     return PyUnicode_DecodeLocale(data, NULL)
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1231,10 +1231,6 @@ def tz_aware_fixture(request):
     return request.param
 
 
-# Generate cartesian product of tz_aware_fixture:
-tz_aware_fixture2 = tz_aware_fixture
-
-
 _UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]
 if zoneinfo is not None:
     _UTCS.append(zoneinfo.ZoneInfo("UTC"))

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -87,12 +87,6 @@
 
 
 _shared_docs: dict[str, str] = {}
-_indexops_doc_kwargs = {
-    "klass": "IndexOpsMixin",
-    "inplace": "",
-    "unique": "IndexOpsMixin",
-    "duplicated": "IndexOpsMixin",
-}
 
 
 class PandasObject(DirNamesMixin):

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -37,8 +37,6 @@
 
     from pandas.core.arrays import ExtensionArray
 
-_dtype_obj = np.dtype(object)
-
 
 @overload
 def _astype_nansafe(

diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py
@@ -114,7 +114,6 @@ def _positional_selector(self) -> GroupByPositionalSelector:
         4  b  5
         """
         if TYPE_CHECKING:
-            # pylint: disable-next=used-before-assignment
             groupby_self = cast(groupby.GroupBy, self)
         else:
             groupby_self = self

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -7154,6 +7154,43 @@ def shape(self) -> Shape:
         return (len(self),)
 
 
+def maybe_sequence_to_range(sequence) -> Any | range:
+    """
+    Convert a 1D, non-pandas sequence to a range if possible.
+
+    Returns the input if not possible.
+
+    Parameters
+    ----------
+    sequence : 1D sequence
+    names : sequence of str
+
+    Returns
+    -------
+    Any : input or range
+    """
+    if isinstance(sequence, (ABCSeries, Index)):
+        return sequence
+    np_sequence = np.asarray(sequence)
+    if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
+        return sequence
+    elif len(np_sequence) == 0:
+        return range(0)
+    diff = np_sequence[1] - np_sequence[0]
+    if diff == 0:
+        return sequence
+    elif len(np_sequence) == 2:
+        return range(np_sequence[0], np_sequence[1] + diff, diff)
+    maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff)
+    if (
+        lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
+        and not remainder.any()
+    ):
+        return range(np_sequence[0], np_sequence[-1] + diff, diff)
+    else:
+        return sequence
+
+
 def ensure_index_from_sequences(sequences, names=None) -> Index:
     """
     Construct an index from sequences of data.
@@ -7172,8 +7209,8 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
 
     Examples
     --------
-    >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
-    Index([1, 2, 3], dtype='int64', name='name')
+    >>> ensure_index_from_sequences([[1, 2, 4]], names=["name"])
+    Index([1, 2, 4], dtype='int64', name='name')
 
     >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
     MultiIndex([('a', 'a'),
@@ -7189,8 +7226,9 @@ def ensure_index_from_sequences(sequences, names=None) -> Index:
     if len(sequences) == 1:
         if names is not None:
             names = names[0]
-        return Index(sequences[0], name=names)
+        return Index(maybe_sequence_to_range(sequences[0]), name=names)
     else:
+        # TODO: Apply maybe_sequence_to_range to sequences?
         return MultiIndex.from_arrays(sequences, names=names)
 
 

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -29,7 +29,6 @@
     doc,
 )
 
-from pandas.core.dtypes import missing
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.common import (
     ensure_platform_int,
@@ -475,28 +474,13 @@ def _shallow_copy(self, values, name: Hashable = no_default):
         if values.dtype.kind == "i" and values.ndim == 1:
             # GH 46675 & 43885: If values is equally spaced, return a
             # more memory-compact RangeIndex instead of Index with 64-bit dtype
-            if len(values) == 0:
-                return type(self)._simple_new(_empty_range, name=name)
-            elif len(values) == 1:
+            if len(values) == 1:
                 start = values[0]
                 new_range = range(start, start + self.step, self.step)
                 return type(self)._simple_new(new_range, name=name)
-            diff = values[1] - values[0]
-            if not missing.isna(diff) and diff != 0:
-                if len(values) == 2:
-                    # Can skip is_range_indexer check
-                    new_range = range(values[0], values[-1] + diff, diff)
-                    return type(self)._simple_new(new_range, name=name)
-                else:
-                    maybe_range_indexer, remainder = np.divmod(values - values[0], diff)
-                    if (
-                        lib.is_range_indexer(
-                            maybe_range_indexer, len(maybe_range_indexer)
-                        )
-                        and not remainder.any()
-                    ):
-                        new_range = range(values[0], values[-1] + diff, diff)
-                        return type(self)._simple_new(new_range, name=name)
+            maybe_range = ibase.maybe_sequence_to_range(values)
+            if isinstance(maybe_range, range):
+                return type(self)._simple_new(maybe_range, name=name)
         return self._constructor._simple_new(values, name=name)
 
     def _view(self) -> Self:

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -993,7 +993,6 @@ def to_datetime(
         errors=errors,
         exact=exact,
     )
-    # pylint: disable-next=used-before-assignment
     result: Timestamp | NaTType | Series | Index
 
     if isinstance(arg, Timestamp):

diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -187,8 +187,8 @@ def pprint_thing(
     _nest_lvl : internal use only. pprint_thing() is mutually-recursive
         with pprint_sequence, this argument is used to keep track of the
         current nesting level, and limit it.
-    escape_chars : list or dict, optional
-        Characters to escape. If a dict is passed the values are the
+    escape_chars : list[str] or Mapping[str, str], optional
+        Characters to escape. If a Mapping is passed the values are the
         replacements
     default_escapes : bool, default False
         Whether the input escape characters replaces or adds to the defaults
@@ -204,11 +204,11 @@ def as_escaped_string(
         thing: Any, escape_chars: EscapeChars | None = escape_chars
     ) -> str:
         translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
-        if isinstance(escape_chars, dict):
+        if isinstance(escape_chars, Mapping):
             if default_escapes:
                 translate.update(escape_chars)
             else:
-                translate = escape_chars
+                translate = escape_chars  # type: ignore[assignment]
             escape_chars = list(escape_chars.keys())
         else:
             escape_chars = escape_chars or ()
@@ -220,7 +220,7 @@ def as_escaped_string(
 
     if hasattr(thing, "__next__"):
         return str(thing)
-    elif isinstance(thing, dict) and _nest_lvl < get_option(
+    elif isinstance(thing, Mapping) and _nest_lvl < get_option(
         "display.pprint_nest_depth"
     ):
         result = _pprint_dict(

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -1514,8 +1514,10 @@ class TestIndexUtils:
     @pytest.mark.parametrize(
         "data, names, expected",
         [
-            ([[1, 2, 3]], None, Index([1, 2, 3])),
-            ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")),
+            ([[1, 2, 4]], None, Index([1, 2, 4])),
+            ([[1, 2, 4]], ["name"], Index([1, 2, 4], name="name")),
+            ([[1, 2, 3]], None, RangeIndex(1, 4)),
+            ([[1, 2, 3]], ["name"], RangeIndex(1, 4, name="name")),
             (
                 [["a", "a"], ["c", "d"]],
                 None,
@@ -1530,7 +1532,7 @@ class TestIndexUtils:
     )
     def test_ensure_index_from_sequences(self, data, names, expected):
         result = ensure_index_from_sequences(data, names)
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected, exact=True)
 
     def test_ensure_index_mixed_closed_intervals(self):
         # GH27172

diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py
@@ -1,5 +1,6 @@
 # Note! This file is aimed specifically at pandas.io.formats.printing utility
 # functions, not the general printing of pandas objects.
+from collections.abc import Mapping
 import string
 
 import pandas._config.config as cf
@@ -16,6 +17,17 @@ def test_adjoin():
     assert adjoined == expected
 
 
+class MyMapping(Mapping):
+    def __getitem__(self, key):
+        return 4
+
+    def __iter__(self):
+        return iter(["a", "b"])
+
+    def __len__(self):
+        return 2
+
+
 class TestPPrintThing:
     def test_repr_binary_type(self):
         letters = string.ascii_letters
@@ -42,6 +54,12 @@ def test_repr_obeys_max_seq_limit(self):
     def test_repr_set(self):
         assert printing.pprint_thing({1}) == "{1}"
 
+    def test_repr_dict(self):
+        assert printing.pprint_thing({"a": 4, "b": 4}) == "{'a': 4, 'b': 4}"
+
+    def test_repr_mapping(self):
+        assert printing.pprint_thing(MyMapping()) == "{'a': 4, 'b': 4}"
+
 
 class TestFormatBase:
     def test_adjoin(self):

diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
@@ -1311,7 +1311,6 @@ def test_to_latex_multiindex_names(self, name0, name1, axes):
         )
         col_names = [n if (bool(n) and 1 in axes) else "" for n in names]
         observed = df.to_latex(multirow=False)
-        # pylint: disable-next=consider-using-f-string
         expected = r"""\begin{tabular}{llrrrr}
 \toprule
  & %s & \multicolumn{2}{r}{1} & \multicolumn{2}{r}{2} \\

diff --git a/pandas/tests/io/xml/conftest.py b/pandas/tests/io/xml/conftest.py
@@ -11,7 +11,7 @@ def xml_data_path():
     Examples
     --------
     >>> def test_read_xml(xml_data_path):
-    ...     read_xml(xml_data_path / "file.xsl")
+    ...     pd.read_xml(xml_data_path / "file.xsl")
     """
     return Path(__file__).parent.parent / "data" / "xml"
 
@@ -24,7 +24,7 @@ def xml_books(xml_data_path, datapath):
     Examples
     --------
     >>> def test_read_xml(xml_books):
-    ...     read_xml(xml_books)
+    ...     pd.read_xml(xml_books)
     """
     return datapath(xml_data_path / "books.xml")
 
@@ -37,7 +37,7 @@ def xml_doc_ch_utf(xml_data_path, datapath):
     Examples
     --------
     >>> def test_read_xml(xml_doc_ch_utf):
-    ...     read_xml(xml_doc_ch_utf)
+    ...     pd.read_xml(xml_doc_ch_utf)
     """
     return datapath(xml_data_path / "doc_ch_utf.xml")
 
@@ -50,7 +50,7 @@ def xml_baby_names(xml_data_path, datapath):
     Examples
     --------
     >>> def test_read_xml(xml_baby_names):
-    ...     read_xml(xml_baby_names)
+    ...     pd.read_xml(xml_baby_names)
     """
     return datapath(xml_data_path / "baby_names.xml")
 
@@ -63,7 +63,7 @@ def kml_cta_rail_lines(xml_data_path, datapath):
     Examples
     --------
     >>> def test_read_xml(kml_cta_rail_lines):
-    ...     read_xml(
+    ...     pd.read_xml(
     ...         kml_cta_rail_lines,
     ...         xpath=".//k:Placemark",
     ...         namespaces={"k": "http://www.opengis.net/kml/2.2"},
@@ -80,7 +80,7 @@ def xsl_flatten_doc(xml_data_path, datapath):
 
     Examples
     --------
-    >>> def test_read_xsl(xsl_flatten_doc):
+    >>> def test_read_xsl(xsl_flatten_doc, mode):
     ...     with open(
     ...         xsl_flatten_doc, mode, encoding="utf-8" if mode == "r" else None
     ...     ) as f:
@@ -96,7 +96,7 @@ def xsl_row_field_output(xml_data_path, datapath):
 
     Examples
     --------
-    >>> def test_read_xsl(xsl_row_field_output):
+    >>> def test_read_xsl(xsl_row_field_output, mode):
     ...     with open(
     ...         xsl_row_field_output, mode, encoding="utf-8" if mode == "r" else None
     ...     ) as f:

diff --git a/pandas/tests/series/test_iteration.py b/pandas/tests/series/test_iteration.py
@@ -4,12 +4,10 @@ def test_keys(self, datetime_series):
 
     def test_iter_datetimes(self, datetime_series):
         for i, val in enumerate(datetime_series):
-            # pylint: disable-next=unnecessary-list-index-lookup
             assert val == datetime_series.iloc[i]
 
     def test_iter_strings(self, string_series):
         for i, val in enumerate(string_series):
-            # pylint: disable-next=unnecessary-list-index-lookup
             assert val == string_series.iloc[i]
 
     def test_iteritems_datetimes(self, datetime_series):
-Original file line number
+Diff line change
@@ Expand Up / @@ -357,6 +357,7 @@ MultiIndex @@
     I/O
     ^^^
     - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
+    - Now all ``Mapping`` s are pretty printed correctly. Before only literal ``dict`` s were. (:issue:`57915`)
     -
     -
@@ Expand Down @@