pandas-dev · twoertwein · Dec 15, 2021 · Feb 19, 2022 · Feb 19, 2022 · Feb 20, 2022
@@ -0,0 +1,120 @@
+from datetime import timedelta
+import numbers
+from typing import (
+    Any,
+    Generic,
+    TypeVar,
+    overload,
+)
+
+import numpy as np
+import numpy.typing as npt
+
+from pandas._typing import IntervalBound
+
+from pandas import (
+    Timedelta,
+    Timestamp,
+)
+
+_OrderableMixinT = TypeVar(
+    "_OrderableMixinT", int, float, Timestamp, Timedelta, npt.NDArray[np.generic]
+)
+_OrderableT = TypeVar("_OrderableT", int, float, Timestamp, Timedelta)
+
+# note: mypy doesn't support overloading properties
+# based on github.com/microsoft/python-type-stubs/pull/167
+class _LengthProperty:
+    @overload
+    def __get__(self, instance: IntervalMixin[Timestamp], owner: Any) -> Timedelta: ...
+    @overload
+    def __get__(
+        self, instance: IntervalMixin[_OrderableMixinT], owner: Any
+    ) -> _OrderableMixinT: ...
+
+class IntervalMixin(Generic[_OrderableMixinT]):
+    @property
+    def closed_left(self) -> bool: ...
+    @property
+    def closed_right(self) -> bool: ...
+    @property
+    def open_left(self) -> bool: ...
+    @property
+    def open_right(self) -> bool: ...
+    @property
+    def mid(self) -> _OrderableT: ...
+    length: _LengthProperty
+    @property
+    def is_empty(self) -> bool: ...
+    def _check_closed_matches(self, other: IntervalMixin, name: str = ...) -> None: ...
+
+class Interval(IntervalMixin[_OrderableT]):
+    def __init__(
+        self,
+        left: _OrderableT,
+        right: _OrderableT,
+        closed: IntervalBound = ...,
+    ) -> None: ...
+    @property
+    def closed(self) -> str: ...
+    @property
+    def left(self) -> _OrderableT: ...
+    @property
+    def right(self) -> _OrderableT: ...
+    def __str__(self) -> str: ...
+    # TODO: could return Interval with different type
+    def __add__(
+        self, y: numbers.Number | np.timedelta64 | timedelta
+    ) -> Interval[_OrderableT]: ...
+    def __radd__(
+        self, y: numbers.Number | np.timedelta64 | timedelta
+    ) -> Interval[_OrderableT]: ...
+    def __sub__(
+        self, y: numbers.Number | np.timedelta64 | timedelta
+    ) -> Interval[_OrderableT]: ...
+    def __mul__(self, y: numbers.Number) -> Interval[_OrderableT]: ...
+    def __rmul__(self, y: numbers.Number) -> Interval[_OrderableT]: ...
+    def __truediv__(self, y: numbers.Number) -> Interval[_OrderableT]: ...
+    def __floordiv__(self, y: numbers.Number) -> Interval[_OrderableT]: ...
+    def __hash__(self) -> int: ...
+    def overlaps(self, other: Interval[_OrderableT]) -> bool: ...
+
+VALID_CLOSED: frozenset[str]
+
+# takes npt.NDArray[Interval[_OrderableT]] and returns arrays of type
+# _OrderableT but _Orderable is not a valid dtype
+def intervals_to_interval_bounds(
+    intervals: npt.NDArray[np.object_], validate_closed: bool = ...
+) -> tuple[np.ndarray, np.ndarray, str]: ...
+
+# from pandas/_libs/intervaltree.pxi.in
+_GenericT = TypeVar("_GenericT", bound=np.generic)
+
+# error: Value of type variable "_OrderableMixinT" of "IntervalMixin"
+# cannot be "ndarray"
+class IntervalTree(
+    Generic[_GenericT],
+    IntervalMixin[npt.NDArray[_GenericT]],  # type: ignore[type-var]
+):
+    _na_count: int
+    def __init__(
+        self,
+        left: npt.NDArray[_GenericT],
+        right: npt.NDArray[_GenericT],
+        closed: IntervalBound = ...,
+        leaf_size: int = ...,
+    ) -> None: ...
+    @property
+    def left_sorter(self) -> npt.NDArray[_GenericT]: ...
+    @property
+    def right_sorter(self) -> npt.NDArray[_GenericT]: ...
+    @property
+    def is_overlapping(self) -> bool: ...
+    @property
+    def is_monotonic_increasing(self) -> bool: ...
+    def get_indexer(self, target: np.ndarray) -> npt.NDArray[np.intp]: ...
+    def get_indexer_non_unique(
+        self, target: np.ndarray
+    ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: ...
+    def __repr__(self) -> str: ...
+    def clear_mapping(self) -> None: ...
@@ -83,10 +83,9 @@
 PythonScalar = Union[str, int, float, bool]
 DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
 PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
-Scalar = Union[PythonScalar, PandasScalar]
+Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64]
 IntStrT = TypeVar("IntStrT", int, str)
 
-
 # timestamp and timedelta convertible types
 
 TimestampConvertibleTypes = Union[
@@ -304,3 +303,6 @@ def closed(self) -> bool:
 
 # read_xml parsers
 XMLParsers = Literal["lxml", "etree"]
+
+# on which side(s) Interval is closed
+IntervalBound = Literal["left", "right", "both", "neither"]
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -759,7 +759,7 @@ def factorize(
     else:
         dtype = values.dtype
         values = _ensure_data(values)
-        na_value: Scalar
+        na_value: Scalar | None
 
         if original.dtype.kind in ["m", "M"]:
             # Note: factorize_array will cast NaT bc it has a __int__

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -29,6 +29,7 @@
 from pandas._typing import (
     ArrayLike,
     Dtype,
+    IntervalBound,
     NpDtype,
     PositionalIndexer,
     ScalarIndexer,
@@ -196,6 +197,9 @@ class IntervalArray(IntervalMixin, ExtensionArray):
     ndim = 1
     can_hold_na = True
     _na_value = _fill_value = np.nan
+    _left: np.ndarray
+    _right: np.ndarray
+    _dtype: IntervalDtype
 
     # ---------------------------------------------------------------------
     # Constructors
@@ -657,11 +661,7 @@ def __getitem__(
             if is_scalar(left) and isna(left):
                 return self._fill_value
             return Interval(left, right, self.closed)
-        # error: Argument 1 to "ndim" has incompatible type "Union[ndarray,
-        # ExtensionArray]"; expected "Union[Union[int, float, complex, str, bytes,
-        # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
-        # Sequence[Sequence[Any]], _SupportsArray]"
-        if np.ndim(left) > 1:  # type: ignore[arg-type]
+        if np.ndim(left) > 1:
             # GH#30588 multi-dimensional indexer disallowed
             raise ValueError("multi-dimensional indexing not allowed")
         return self._shallow_copy(left, right)
@@ -945,10 +945,10 @@ def _concat_same_type(
         -------
         IntervalArray
         """
-        closed = {interval.closed for interval in to_concat}
-        if len(closed) != 1:
+        closed_set = {interval.closed for interval in to_concat}
+        if len(closed_set) != 1:
             raise ValueError("Intervals must all be closed on the same side.")
-        closed = closed.pop()
+        closed = closed_set.pop()
 
         left = np.concatenate([interval.left for interval in to_concat])
         right = np.concatenate([interval.right for interval in to_concat])
@@ -1317,7 +1317,7 @@ def overlaps(self, other):
     # ---------------------------------------------------------------------
 
     @property
-    def closed(self):
+    def closed(self) -> IntervalBound:
         """
         Whether the intervals are closed on the left-side, right-side, both or
         neither.
@@ -1665,8 +1665,14 @@ def _from_combined(self, combined: np.ndarray) -> IntervalArray:
 
         dtype = self._left.dtype
         if needs_i8_conversion(dtype):
-            new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
-            new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
+            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
+            new_left = type(self._left)._from_sequence(  # type: ignore[attr-defined]
+                nc[:, 0], dtype=dtype
+            )
+            # error: "Type[ndarray[Any, Any]]" has no attribute "_from_sequence"
+            new_right = type(self._right)._from_sequence(  # type: ignore[attr-defined]
+                nc[:, 1], dtype=dtype
+            )
         else:
             new_left = nc[:, 0].view(dtype)
             new_right = nc[:, 1].view(dtype)

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -336,7 +336,7 @@ def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,
         copy: bool = False,
-        na_value: Scalar = lib.no_default,
+        na_value: Scalar | libmissing.NAType | lib.NoDefault = lib.no_default,
     ) -> np.ndarray:
         """
         Convert to a NumPy Array.

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -762,7 +762,7 @@ def _str_replace(
         return type(self)(result)
 
     def _str_match(
-        self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
+        self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None
     ):
         if pa_version_under4p0:
             return super()._str_match(pat, case, flags, na)
@@ -771,7 +771,9 @@ def _str_match(
             pat = "^" + pat
         return self._str_contains(pat, case, flags, na, regex=True)
 
-    def _str_fullmatch(self, pat, case: bool = True, flags: int = 0, na: Scalar = None):
+    def _str_fullmatch(
+        self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None
+    ):
         if pa_version_under4p0:
             return super()._str_fullmatch(pat, case, flags, na)
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -35,7 +35,6 @@
     ArrayLike,
     NpDtype,
     RandomState,
-    Scalar,
     T,
 )
 from pandas.util._exceptions import find_stack_level
@@ -517,7 +516,7 @@ def f(x):
 
 
 def convert_to_list_like(
-    values: Scalar | Iterable | AnyArrayLike,
+    values: Hashable | Iterable | AnyArrayLike,
 ) -> list | AnyArrayLike:
     """
     Convert list-like or scalar input to list-like. List, numpy and pandas array-like

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -9,6 +9,7 @@
 from typing import (
     Any,
     Hashable,
+    Literal,
 )
 
 import numpy as np
@@ -28,6 +29,7 @@
 from pandas._typing import (
     Dtype,
     DtypeObj,
+    IntervalBound,
     npt,
 )
 from pandas.errors import InvalidIndexError
@@ -191,10 +193,12 @@ class IntervalIndex(ExtensionIndex):
     _typ = "intervalindex"
 
     # annotate properties pinned via inherit_names
-    closed: str
+    closed: IntervalBound
     is_non_overlapping_monotonic: bool
     closed_left: bool
     closed_right: bool
+    open_left: bool
+    open_right: bool
 
     _data: IntervalArray
     _values: IntervalArray
@@ -246,7 +250,7 @@ def __new__(
     def from_breaks(
         cls,
         breaks,
-        closed: str = "right",
+        closed: IntervalBound = "right",
         name: Hashable = None,
         copy: bool = False,
         dtype: Dtype | None = None,
@@ -277,7 +281,7 @@ def from_arrays(
         cls,
         left,
         right,
-        closed: str = "right",
+        closed: IntervalBound = "right",
         name: Hashable = None,
         copy: bool = False,
         dtype: Dtype | None = None,
@@ -307,7 +311,7 @@ def from_arrays(
     def from_tuples(
         cls,
         data,
-        closed: str = "right",
+        closed: IntervalBound = "right",
         name: Hashable = None,
         copy: bool = False,
         dtype: Dtype | None = None,
@@ -318,8 +322,10 @@ def from_tuples(
 
     # --------------------------------------------------------------------
 
+    # error: Return type "IntervalTree[Any]" of "_engine" incompatible with return type
+    # "IndexEngine" in supertype "Index"
     @cache_readonly
-    def _engine(self) -> IntervalTree:
+    def _engine(self) -> IntervalTree:  # type: ignore[override]
         left = self._maybe_convert_i8(self.left)
         right = self._maybe_convert_i8(self.right)
         return IntervalTree(left, right, closed=self.closed)
@@ -511,7 +517,10 @@ def _maybe_convert_i8(self, key):
             left = self._maybe_convert_i8(key.left)
             right = self._maybe_convert_i8(key.right)
             constructor = Interval if scalar else IntervalIndex.from_arrays
-            return constructor(left, right, closed=self.closed)
+            # error: "object" not callable
+            return constructor(  # type: ignore[operator]
+                left, right, closed=self.closed
+            )
 
         if scalar:
             # Timestamp/Timedelta
@@ -543,7 +552,7 @@ def _maybe_convert_i8(self, key):
 
         return key_i8
 
-    def _searchsorted_monotonic(self, label, side: str = "left"):
+    def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
         if not self.is_non_overlapping_monotonic:
             raise KeyError(
                 "can only get slices from an IntervalIndex if bounds are "
@@ -663,7 +672,9 @@ def _get_indexer(
             # homogeneous scalar index: use IntervalTree
             # we should always have self._should_partial_index(target) here
             target = self._maybe_convert_i8(target)
-            indexer = self._engine.get_indexer(target.values)
+            # error: Argument 1 to "get_indexer" of "IntervalTree" has incompatible type
+            # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]"
+            indexer = self._engine.get_indexer(target.values)  # type: ignore[arg-type]
         else:
             # heterogeneous scalar index: defer elementwise to get_loc
             # we should always have self._should_partial_index(target) here
@@ -698,7 +709,12 @@ def get_indexer_non_unique(
             # Note: this case behaves differently from other Index subclasses
             #  because IntervalIndex does partial-int indexing
             target = self._maybe_convert_i8(target)
-            indexer, missing = self._engine.get_indexer_non_unique(target.values)
+            # error: Argument 1 to "get_indexer_non_unique" of "IntervalTree" has
+            # incompatible type "Union[ExtensionArray, ndarray[Any, Any]]"; expected
+            # "ndarray[Any, Any]"  [arg-type]
+            indexer, missing = self._engine.get_indexer_non_unique(
+                target.values  # type: ignore[arg-type]
+            )
 
         return ensure_platform_int(indexer), ensure_platform_int(missing)
 
@@ -941,7 +957,12 @@ def _is_type_compatible(a, b) -> bool:
 
 
 def interval_range(
-    start=None, end=None, periods=None, freq=None, name: Hashable = None, closed="right"
+    start=None,
+    end=None,
+    periods=None,
+    freq=None,
+    name: Hashable = None,
+    closed: IntervalBound = "right",
 ) -> IntervalIndex:
     """
     Return a fixed frequency IntervalIndex.