TYP: misc return types (pandas-dev#57430)

pmhatre1 · May 7, 2024 · 477a4c3 · 477a4c3
1 parent f7b860b
commit 477a4c3
Show file tree

Hide file tree

Showing 6 changed files with 281 additions and 62 deletions.
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -118,6 +118,7 @@
     Concatenate: Any = None
 
 HashableT = TypeVar("HashableT", bound=Hashable)
+HashableT2 = TypeVar("HashableT2", bound=Hashable)
 MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
 
 # array-like

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -8,7 +8,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
     Generic,
     Literal,
     cast,
@@ -105,7 +104,7 @@ class PandasObject(DirNamesMixin):
     _cache: dict[str, Any]
 
     @property
-    def _constructor(self) -> Callable[..., Self]:
+    def _constructor(self) -> type[Self]:
         """
         Class constructor (for this class it's just `__class__`).
         """
@@ -1356,7 +1355,7 @@ def searchsorted(
             sorter=sorter,
         )
 
-    def drop_duplicates(self, *, keep: DropKeep = "first"):
+    def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:
         duplicated = self._duplicated(keep=keep)
         # error: Value of type "IndexOpsMixin" is not indexable
         return self[~duplicated]  # type: ignore[index]

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -217,6 +217,8 @@
         FormattersType,
         Frequency,
         FromDictOrient,
+        HashableT,
+        HashableT2,
         IgnoreRaise,
         IndexKeyFunc,
         IndexLabel,
@@ -239,6 +241,7 @@
         SortKind,
         StorageOptions,
         Suffixes,
+        T,
         ToStataByteorder,
         ToTimestampHow,
         UpdateJoin,
@@ -643,10 +646,10 @@ class DataFrame(NDFrame, OpsMixin):
     __pandas_priority__ = 4000
 
     @property
-    def _constructor(self) -> Callable[..., DataFrame]:
+    def _constructor(self) -> type[DataFrame]:
         return DataFrame
 
-    def _constructor_from_mgr(self, mgr, axes):
+    def _constructor_from_mgr(self, mgr, axes) -> DataFrame:
         if self._constructor is DataFrame:
             # we are pandas.DataFrame (or a subclass that doesn't override _constructor)
             return DataFrame._from_mgr(mgr, axes=axes)
@@ -659,7 +662,7 @@ def _constructor_from_mgr(self, mgr, axes):
     def _sliced_from_mgr(self, mgr, axes) -> Series:
         return Series._from_mgr(mgr, axes)
 
-    def _constructor_sliced_from_mgr(self, mgr, axes):
+    def _constructor_sliced_from_mgr(self, mgr, axes) -> Series:
         if self._constructor_sliced is Series:
             ser = self._sliced_from_mgr(mgr, axes)
             ser._name = None  # caller is responsible for setting real name
@@ -1353,7 +1356,7 @@ def _get_values_for_csv(
         decimal: str,
         na_rep: str,
         quoting,  # int csv.QUOTE_FOO from stdlib
-    ) -> Self:
+    ) -> DataFrame:
         # helper used by to_csv
         mgr = self._mgr.get_values_for_csv(
             float_format=float_format,
@@ -1831,7 +1834,7 @@ def from_dict(
         a  b   1  3
            c   2  4
         """
-        index = None
+        index: list | Index | None = None
         orient = orient.lower()  # type: ignore[assignment]
         if orient == "index":
             if len(data) > 0:
@@ -1857,7 +1860,7 @@ def from_dict(
         else:
             realdata = data["data"]
 
-            def create_index(indexlist, namelist):
+            def create_index(indexlist, namelist) -> Index:
                 index: Index
                 if len(namelist) > 1:
                     index = MultiIndex.from_tuples(indexlist, names=namelist)
@@ -2700,6 +2703,42 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None:
 
         to_feather(self, path, **kwargs)
 
+    @overload
+    def to_markdown(
+        self,
+        buf: None = ...,
+        *,
+        mode: str = ...,
+        index: bool = ...,
+        storage_options: StorageOptions | None = ...,
+        **kwargs,
+    ) -> str:
+        ...
+
+    @overload
+    def to_markdown(
+        self,
+        buf: FilePath | WriteBuffer[str],
+        *,
+        mode: str = ...,
+        index: bool = ...,
+        storage_options: StorageOptions | None = ...,
+        **kwargs,
+    ) -> None:
+        ...
+
+    @overload
+    def to_markdown(
+        self,
+        buf: FilePath | WriteBuffer[str] | None,
+        *,
+        mode: str = ...,
+        index: bool = ...,
+        storage_options: StorageOptions | None = ...,
+        **kwargs,
+    ) -> str | None:
+        ...
+
     @doc(
         Series.to_markdown,
         klass=_shared_doc_kwargs["klass"],
@@ -2881,6 +2920,39 @@ def to_parquet(
             **kwargs,
         )
 
+    @overload
+    def to_orc(
+        self,
+        path: None = ...,
+        *,
+        engine: Literal["pyarrow"] = ...,
+        index: bool | None = ...,
+        engine_kwargs: dict[str, Any] | None = ...,
+    ) -> bytes:
+        ...
+
+    @overload
+    def to_orc(
+        self,
+        path: FilePath | WriteBuffer[bytes],
+        *,
+        engine: Literal["pyarrow"] = ...,
+        index: bool | None = ...,
+        engine_kwargs: dict[str, Any] | None = ...,
+    ) -> None:
+        ...
+
+    @overload
+    def to_orc(
+        self,
+        path: FilePath | WriteBuffer[bytes] | None,
+        *,
+        engine: Literal["pyarrow"] = ...,
+        index: bool | None = ...,
+        engine_kwargs: dict[str, Any] | None = ...,
+    ) -> bytes | None:
+        ...
+
     def to_orc(
         self,
         path: FilePath | WriteBuffer[bytes] | None = None,
@@ -4027,7 +4099,7 @@ def _setitem_slice(self, key: slice, value) -> None:
         #  backwards-compat, xref GH#31469
         self.iloc[key] = value
 
-    def _setitem_array(self, key, value):
+    def _setitem_array(self, key, value) -> None:
         # also raises Exception if object array with NA values
         if com.is_bool_indexer(key):
             # bool indexer is indexing along rows
@@ -4061,7 +4133,7 @@ def _setitem_array(self, key, value):
             elif np.ndim(value) > 1:
                 # list of lists
                 value = DataFrame(value).values
-                return self._setitem_array(key, value)
+                self._setitem_array(key, value)
 
             else:
                 self._iset_not_inplace(key, value)
@@ -4595,7 +4667,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
 
         return _eval(expr, inplace=inplace, **kwargs)
 
-    def select_dtypes(self, include=None, exclude=None) -> Self:
+    def select_dtypes(self, include=None, exclude=None) -> DataFrame:
         """
         Return a subset of the DataFrame's columns based on the column dtypes.
 
@@ -5474,9 +5546,21 @@ def pop(self, item: Hashable) -> Series:
         """
         return super().pop(item=item)
 
+    @overload
+    def _replace_columnwise(
+        self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[True], regex
+    ) -> None:
+        ...
+
+    @overload
+    def _replace_columnwise(
+        self, mapping: dict[Hashable, tuple[Any, Any]], inplace: Literal[False], regex
+    ) -> Self:
+        ...
+
     def _replace_columnwise(
         self, mapping: dict[Hashable, tuple[Any, Any]], inplace: bool, regex
-    ):
+    ) -> Self | None:
         """
         Dispatch to Series.replace column-wise.
 
@@ -5505,7 +5589,7 @@ def _replace_columnwise(
                 res._iset_item(i, newobj, inplace=inplace)
 
         if inplace:
-            return
+            return None
         return res.__finalize__(self)
 
     @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
@@ -11815,19 +11899,19 @@ def kurt(
     product = prod
 
     @doc(make_doc("cummin", ndim=2))
-    def cummin(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
+    def cummin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
         return NDFrame.cummin(self, axis, skipna, *args, **kwargs)
 
     @doc(make_doc("cummax", ndim=2))
-    def cummax(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
+    def cummax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
         return NDFrame.cummax(self, axis, skipna, *args, **kwargs)
 
     @doc(make_doc("cumsum", ndim=2))
-    def cumsum(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
+    def cumsum(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
         return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)
 
     @doc(make_doc("cumprod", 2))
-    def cumprod(self, axis: Axis | None = None, skipna: bool = True, *args, **kwargs):
+    def cumprod(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Self:
         return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)
 
     def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series:
@@ -12710,8 +12794,12 @@ def values(self) -> np.ndarray:
         return self._mgr.as_array()
 
 
-def _from_nested_dict(data) -> collections.defaultdict:
-    new_data: collections.defaultdict = collections.defaultdict(dict)
+def _from_nested_dict(
+    data: Mapping[HashableT, Mapping[HashableT2, T]],
+) -> collections.defaultdict[HashableT2, dict[HashableT, T]]:
+    new_data: collections.defaultdict[
+        HashableT2, dict[HashableT, T]
+    ] = collections.defaultdict(dict)
     for index, s in data.items():
         for col, v in s.items():
             new_data[col][index] = v