👽️ cluster: fix new stubtest errors (#334)

jorenham · Dec 16, 2024 · 04459ed · 04459ed
2 parents 299edd8 + 9ff46b2
commit 04459ed
Show file tree

Hide file tree

Showing 3 changed files with 139 additions and 72 deletions.
diff --git a/.mypyignore-todo b/.mypyignore-todo
@@ -1,6 +1,3 @@
-scipy\.cluster\.hierarchy\.ClusterNode\.__init__
-scipy\.cluster\.vq\.kmeans2?
-
 scipy\.fft\._basic_backend\.complex_funcs
 
 scipy\.fftpack\.(_pseudo_diffs\.)?diff

diff --git a/scipy-stubs/cluster/hierarchy.pyi b/scipy-stubs/cluster/hierarchy.pyi
@@ -1,10 +1,11 @@
 from collections.abc import Callable
 from types import ModuleType
-from typing import Any, Literal, TypeAlias, TypedDict, overload
+from typing import Any, Final, Literal, TypeAlias, TypedDict, overload, type_check_only
 from typing_extensions import TypeVar, override
 
 import numpy as np
 import optype.numpy as onp
+import optype.typing as opt
 from scipy._lib._disjoint_set import DisjointSet
 from scipy.spatial.distance import _MetricCallback, _MetricKind
 
@@ -45,14 +46,20 @@ __all__ = [
 
 _T = TypeVar("_T")
 _SCT = TypeVar("_SCT", bound=np.number[Any], default=np.float64)
-_LinkageMethod: TypeAlias = Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"]
+
 _LinkageArray: TypeAlias = onp.Array2D[_SCT]
+_LinkageMethod: TypeAlias = Literal["single", "complete", "average", "weighted", "centroid", "median", "ward"]
 _ClusterCriterion: TypeAlias = Literal["inconsistent", "distance", "maxclust", "monocrit", "maxclust_monocrit"]
 _SortOrder: TypeAlias = Literal["ascending", "descending"]
+_TruncateMode: TypeAlias = Literal["lastp", "level"]
+_Orientation: TypeAlias = Literal["top", "bottom", "left", "right"]
+_Metric: TypeAlias = _MetricKind | _MetricCallback
 
-# for the lack of a better type
+# for the lack of better types
 _MatplotlibAxes: TypeAlias = object
+_ArrayAPINamespace: TypeAlias = ModuleType
 
+@type_check_only
 class _DendrogramResult(TypedDict):
     color_list: list[str]
     icoord: list[list[int]]
@@ -61,104 +68,147 @@ class _DendrogramResult(TypedDict):
     leaves: list[int] | None
     leaves_color_list: list[str]
 
-class ClusterWarning(UserWarning): ...
+###
 
-def int_floor(arr: onp.ToArrayND, xp: ModuleType) -> int: ...
-def single(y: onp.ToArrayND) -> _LinkageArray: ...
-def complete(y: onp.ToArrayND) -> _LinkageArray: ...
-def average(y: onp.ToArrayND) -> _LinkageArray: ...
-def weighted(y: onp.ToArrayND) -> _LinkageArray: ...
-def centroid(y: onp.ToArrayND) -> _LinkageArray: ...
-def median(y: onp.ToArrayND) -> _LinkageArray: ...
-def ward(y: onp.ToArrayND) -> _LinkageArray: ...
-def linkage(
-    y: onp.ToArrayND,
-    method: _LinkageMethod = "single",
-    metric: _MetricKind | _MetricCallback = "euclidean",
-    optimal_ordering: bool = False,
-) -> _LinkageArray[np.int_ | np.float64 | np.complex128]: ...
+class ClusterWarning(UserWarning): ...
 
+# NOTE: this can't be made generic, because mypy doesn't support cyclical generic types (classic mypy...)
 class ClusterNode:  # noqa: PLW1641
-    id: int
-    left: ClusterNode | None
-    right: ClusterNode | None
-    dist: float
-    count: int
+    id: Final[int]
+    left: Final[ClusterNode | None]
+    right: Final[ClusterNode | None]
+    dist: Final[float]
+    count: Final[int]
+
+    # NOTE: either both `left` and `right` are None, or both are `ClusterNode`
+    @overload
     def __init__(
-        self,
+        self: ClusterNode,
         /,
         id: int,
-        left: ClusterNode | None = None,
-        right: ClusterNode | None = None,
-        dist: float = 0,
+        left: None = None,
+        right: None = None,
+        dist: float = 0.0,
         count: int = 1,
     ) -> None: ...
-    def __lt__(self, node: ClusterNode, /) -> bool: ...
-    def __gt__(self, node: ClusterNode, /) -> bool: ...
+    @overload
+    def __init__(self, /, id: int, left: ClusterNode, right: ClusterNode, dist: float = 0, count: int = 1) -> None: ...
+
+    # NOTE: These raise a `ValueError` if passed anything other than `ClusterNode`
     @override
     def __eq__(self, node: ClusterNode, /) -> bool: ...  # type: ignore[override]  # pyright: ignore[reportIncompatibleMethodOverride]
+    def __lt__(self, node: ClusterNode, /) -> bool: ...
+    def __gt__(self, node: ClusterNode, /) -> bool: ...
+
+    # NOTE: These getters are basically redundant, as the attributes they (directly) return are public anyway
     def get_id(self, /) -> int: ...
     def get_count(self, /) -> int: ...
-    def get_left(self, /) -> ClusterNode: ...
-    def get_right(self, /) -> ClusterNode: ...
+    def get_left(self, /) -> ClusterNode | None: ...
+    def get_right(self, /) -> ClusterNode | None: ...
+
+    # NOTE: True iff `left` (and therefore `right`) is `None`
     def is_leaf(self, /) -> bool: ...
+
+    # NOTE: `func` defaults to `(x) -> x.id`
     @overload
     def pre_order(self, /, func: Callable[[ClusterNode], int] = ...) -> list[int]: ...
     @overload
     def pre_order(self, /, func: Callable[[ClusterNode], _T]) -> list[_T]: ...
 
+#
+def int_floor(arr: onp.ToArrayND, xp: _ArrayAPINamespace) -> int: ...
+
+#
+def single(y: onp.ToArrayND) -> _LinkageArray: ...
+def complete(y: onp.ToArrayND) -> _LinkageArray: ...
+def average(y: onp.ToArrayND) -> _LinkageArray: ...
+def weighted(y: onp.ToArrayND) -> _LinkageArray: ...
+def centroid(y: onp.ToArrayND) -> _LinkageArray: ...
+def median(y: onp.ToArrayND) -> _LinkageArray: ...
+def ward(y: onp.ToArrayND) -> _LinkageArray: ...
+def linkage(
+    y: onp.ToArrayND,
+    method: _LinkageMethod = "single",
+    metric: _Metric = "euclidean",
+    optimal_ordering: bool = False,
+) -> _LinkageArray[np.int_ | np.float64 | np.complex128]: ...
+
+#
 def cut_tree(
     Z: onp.ToArray2D,
     n_clusters: onp.ToInt1D | None = None,
     height: onp.ToFloat1D | None = None,
 ) -> onp.Array2D[np.int64]: ...
+
+#
 @overload
 def to_tree(Z: onp.ToArray2D, rd: Literal[False] = False) -> ClusterNode: ...
 @overload
 def to_tree(Z: onp.ToArray2D, rd: Literal[True]) -> tuple[ClusterNode, list[ClusterNode]]: ...
-def optimal_leaf_ordering(
-    Z: onp.ToArray2D,
-    y: onp.ToArrayND,
-    metric: _MetricKind | _MetricCallback = "euclidean",
-) -> _LinkageArray: ...
+
+#
+def optimal_leaf_ordering(Z: onp.ToArray2D, y: onp.ToArrayND, metric: _Metric = "euclidean") -> _LinkageArray: ...
+
+#
 @overload
 def cophenet(Z: onp.ToArray2D, Y: None = None) -> onp.Array1D[np.float64]: ...
 @overload
 def cophenet(Z: onp.ToArray2D, Y: onp.ToArrayND) -> tuple[onp.Array1D[np.float64], onp.Array1D[np.float64]]: ...
+
+#
 def inconsistent(Z: onp.ToArray2D, d: int = 2) -> _LinkageArray: ...
+
+#
 def from_mlab_linkage(Z: onp.ToArray2D) -> _LinkageArray: ...
 def to_mlab_linkage(Z: onp.ToArray2D) -> _LinkageArray: ...
+
+#
 def is_monotonic(Z: onp.ToArray2D) -> bool: ...
 def is_valid_im(R: onp.ToArrayND, warning: bool = False, throw: bool = False, name: str | None = None) -> bool: ...
 def is_valid_linkage(Z: onp.ToArray2D, warning: bool = False, throw: bool = False, name: str | None = None) -> bool: ...
+def is_isomorphic(T1: onp.ToArrayND, T2: onp.ToArrayND) -> bool: ...
+
+#
 def num_obs_linkage(Z: onp.ToArray2D) -> int: ...
+
+#
 def correspond(Z: onp.ToArray2D, Y: onp.ToArrayND) -> bool: ...
+
+#
 def fcluster(
     Z: onp.ToArray2D,
-    t: float | np.floating[Any] | np.integer[Any],
+    t: onp.ToFloat,
     criterion: _ClusterCriterion = "inconsistent",
-    depth: int = 2,
+    depth: opt.JustInt = 2,
     R: onp.ToArrayND | None = None,
     monocrit: onp.ToArrayND | None = None,
 ) -> onp.Array1D[np.int32]: ...
+
+#
 def fclusterdata(
     X: onp.ToArrayND,
-    t: float | np.floating[Any] | np.integer[Any],
+    t: onp.ToFloat,
     criterion: _ClusterCriterion = "inconsistent",
-    metric: _MetricKind | _MetricCallback = "euclidean",
-    depth: int = 2,
+    metric: _Metric = "euclidean",
+    depth: opt.JustInt = 2,
     method: _LinkageMethod = "single",
     R: onp.ToArrayND | None = None,
 ) -> onp.Array1D[np.int32]: ...
+
+#
 def leaves_list(Z: onp.ToArray2D) -> onp.Array1D[np.int32]: ...
+
+#
 def set_link_color_palette(palette: list[str] | tuple[str, ...] | None) -> None: ...
+
+#
 def dendrogram(
     Z: onp.ToArray2D,
     p: int = 30,
-    truncate_mode: Literal["lastp", "level"] | None = None,
+    truncate_mode: _TruncateMode | None = None,
     color_threshold: float | np.floating[Any] | None = None,
     get_leaves: bool = True,
-    orientation: Literal["top", "bottom", "left", "right"] = "top",
+    orientation: _Orientation = "top",
     labels: onp.ToArrayND | None = None,
     count_sort: _SortOrder | bool = False,
     distance_sort: _SortOrder | bool = False,
@@ -173,7 +223,8 @@ def dendrogram(
     ax: _MatplotlibAxes | None = None,
     above_threshold_color: str = "C0",
 ) -> _DendrogramResult: ...
-def is_isomorphic(T1: onp.ToArrayND, T2: onp.ToArrayND) -> bool: ...
+
+#
 def maxdists(Z: onp.ToArray2D) -> onp.Array1D[np.float64]: ...
 def maxinconsts(Z: onp.ToArray2D, R: onp.ToArrayND) -> onp.Array1D[np.float64]: ...
 def maxRstat(Z: onp.ToArray2D, R: onp.ToArrayND, i: int) -> onp.Array1D[np.float64]: ...

diff --git a/scipy-stubs/cluster/vq.pyi b/scipy-stubs/cluster/vq.pyi
@@ -1,83 +1,102 @@
-from typing import Any, Literal, overload
+from typing import Any, Literal, TypeAlias, overload
 from typing_extensions import TypeVar
 
 import numpy as np
 import optype.numpy as onp
-from scipy._typing import ToRNG as Seed  # TODO(jorenham)
+from scipy._typing import ToRNG
 
 __all__ = ["kmeans", "kmeans2", "vq", "whiten"]
 
-_SCT_fc = TypeVar("_SCT_fc", bound=np.inexact[Any])
+_InitMethod: TypeAlias = Literal["random", "points", "++", "matrix"]
+_MissingMethod: TypeAlias = Literal["warn", "raise"]
+
+_Floating: TypeAlias = np.floating[Any]
+_Inexact: TypeAlias = np.inexact[Any]
+
+_InexactT = TypeVar("_InexactT", bound=_Inexact)
 
 ###
 
 class ClusterError(Exception): ...
 
+# TODO(jorenham): Array API support
 @overload
-def whiten(obs: onp.ArrayND[_SCT_fc], check_finite: bool = True) -> onp.Array2D[_SCT_fc]: ...
+def whiten(obs: onp.ArrayND[np.bool_ | np.integer[Any]], check_finite: bool = True) -> onp.Array2D[np.float64]: ...
 @overload
-def whiten(obs: onp.ToFloat2D, check_finite: bool = True) -> onp.Array2D[np.floating[Any]]: ...
-@overload
-def whiten(obs: onp.ToComplex2D, check_finite: bool = True) -> onp.Array2D[np.inexact[Any]]: ...
+def whiten(obs: onp.ArrayND[_InexactT], check_finite: bool = True) -> onp.Array2D[_InexactT]: ...
 
 #
+@overload
+def vq(
+    obs: onp.ToFloat2D,
+    code_book: onp.ToFloat2D,
+    check_finite: bool = True,
+) -> tuple[onp.Array1D[np.int32 | np.intp], onp.Array1D[_Floating]]: ...
+@overload
 def vq(
     obs: onp.ToComplex2D,
     code_book: onp.ToComplex2D,
     check_finite: bool = True,
-) -> tuple[onp.Array1D[np.int32 | np.intp], onp.Array1D[_SCT_fc]]: ...
+) -> tuple[onp.Array1D[np.int32 | np.intp], onp.Array1D[_Inexact]]: ...
 
 #
+@overload
+def py_vq(
+    obs: onp.ToFloat2D,
+    code_book: onp.ToFloat2D,
+    check_finite: bool = True,
+) -> tuple[onp.Array1D[np.intp], onp.Array1D[_Floating]]: ...
+@overload
 def py_vq(
     obs: onp.ToComplex2D,
     code_book: onp.ToComplex2D,
     check_finite: bool = True,
-) -> tuple[onp.Array1D[np.intp], onp.Array1D[_SCT_fc]]: ...
+) -> tuple[onp.Array1D[np.intp], onp.Array1D[_Inexact]]: ...
 
 #
 @overload  # real
 def kmeans(
     obs: onp.ToFloat2D,
     k_or_guess: onp.ToJustInt | onp.ToFloatND,
     iter: int = 20,
-    thresh: float = 1e-05,
+    thresh: float = 1e-5,
     check_finite: bool = True,
     *,
-    seed: Seed | None = None,
-) -> tuple[onp.Array2D[np.floating[Any]], float]: ...
+    rng: ToRNG = None,
+) -> tuple[onp.Array2D[_Floating], float]: ...
 @overload  # complex
 def kmeans(
     obs: onp.ToComplex2D,
     k_or_guess: onp.ToJustInt | onp.ToFloatND,
     iter: int = 20,
-    thresh: float = 1e-05,
+    thresh: float = 1e-5,
     check_finite: bool = True,
     *,
-    seed: Seed | None = None,
-) -> tuple[onp.Array2D[np.inexact[Any]], float]: ...
+    rng: ToRNG = None,
+) -> tuple[onp.Array2D[_Inexact], float]: ...
 
 #
 @overload  # real
 def kmeans2(
     data: onp.ToFloat1D | onp.ToFloat2D,
     k: onp.ToJustInt | onp.ToFloatND,
     iter: int = 10,
-    thresh: float = 1e-05,
-    minit: Literal["random", "points", "++", "matrix"] = "random",
-    missing: Literal["warn", "raise"] = "warn",
+    thresh: float = 1e-5,
+    minit: _InitMethod = "random",
+    missing: _MissingMethod = "warn",
     check_finite: bool = True,
     *,
-    seed: Seed | None = None,
-) -> tuple[onp.Array2D[np.floating[Any]], onp.Array1D[np.int32]]: ...
+    rng: ToRNG = None,
+) -> tuple[onp.Array2D[_Floating], onp.Array1D[np.int32]]: ...
 @overload  # complex
 def kmeans2(
     data: onp.ToComplex1D | onp.ToComplex2D,
     k: onp.ToJustInt | onp.ToFloatND,
     iter: int = 10,
-    thresh: float = 1e-05,
-    minit: Literal["random", "points", "++", "matrix"] = "random",
-    missing: Literal["warn", "raise"] = "warn",
+    thresh: float = 1e-5,
+    minit: _InitMethod = "random",
+    missing: _MissingMethod = "warn",
     check_finite: bool = True,
     *,
-    seed: Seed | None = None,
-) -> tuple[onp.Array2D[np.inexact[Any]], onp.Array1D[np.int32]]: ...
+    rng: ToRNG = None,
+) -> tuple[onp.Array2D[_Inexact], onp.Array1D[np.int32]]: ...