diff --git a/src/safeds/_utils/__init__.py b/src/safeds/_utils/__init__.py new file mode 100644 index 000000000..78dbcf575 --- /dev/null +++ b/src/safeds/_utils/__init__.py @@ -0,0 +1,7 @@ +"""Utilities for Safe-DS.""" + +from ._hashing import _structural_hash + +__all__ = [ + "_structural_hash", +] diff --git a/src/safeds/_utils/_hashing.py b/src/safeds/_utils/_hashing.py new file mode 100644 index 000000000..fd336ebe1 --- /dev/null +++ b/src/safeds/_utils/_hashing.py @@ -0,0 +1,69 @@ +import functools +import operator +import struct +from typing import Any + +import xxhash + + +def _structural_hash(*value: Any) -> int: + """ + Calculate a deterministic hash value, based on the provided values. + + Parameters + ---------- + value + Variable amount of values to hash + + Returns + ------- + hash + Deterministic hash value + """ + return xxhash.xxh3_64(_value_to_bytes(value)).intdigest() + + +def _value_to_bytes(value: Any) -> bytes: + """ + Convert any value to a deterministically hashable representation. + + Parameters + ---------- + value + Object to convert to a byte representation for deterministic structural hashing + + Returns + ------- + bytes + Byte representation of the provided value + """ + if value is None: + return b"\0" + elif isinstance(value, bytes): + return value + elif isinstance(value, bool): + return b"\1" if value else b"\0" + elif isinstance(value, int) and value < 0: + return value.to_bytes(8, signed=True) + elif isinstance(value, int) and value >= 0: + return value.to_bytes(8) + elif isinstance(value, str): + return value.encode("utf-8") + elif isinstance(value, float): + return struct.pack("d", value) + elif isinstance(value, list | tuple): + return functools.reduce(operator.add, [_value_to_bytes(entry) for entry in value], len(value).to_bytes(8)) + elif isinstance(value, frozenset | set): + return functools.reduce( + operator.add, + sorted([_value_to_bytes(entry) for entry in value]), + len(value).to_bytes(8), + ) + elif isinstance(value, dict): + return functools.reduce( + operator.add, + sorted([_value_to_bytes(key) + _value_to_bytes(entry) for key, entry in value.items()]), + len(value).to_bytes(8), + ) + else: + return _value_to_bytes(hash(value)) diff --git a/src/safeds/data/image/containers/_image.py b/src/safeds/data/image/containers/_image.py index e97b56b75..c00e1a13c 100644 --- a/src/safeds/data/image/containers/_image.py +++ b/src/safeds/data/image/containers/_image.py @@ -8,11 +8,11 @@ import torch import torch.nn.functional as func -import xxhash from PIL.Image import open as pil_image_open from torch import Tensor from safeds._config import _get_device +from safeds._utils import _structural_hash if TYPE_CHECKING: from torch.types import Device @@ -119,7 +119,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(self.width.to_bytes(8) + self.height.to_bytes(8) + self.channel.to_bytes(8)).intdigest() + return _structural_hash(self.width, self.height, self.channel) def __sizeof__(self) -> int: """ @@ -301,10 +301,12 @@ def convert_to_grayscale(self) -> Image: """ if self.channel == 4: return Image( - torch.cat([ - func2.rgb_to_grayscale(self._image_tensor[0:3], num_output_channels=3), - self._image_tensor[3].unsqueeze(dim=0), - ]), + torch.cat( + [ + func2.rgb_to_grayscale(self._image_tensor[0:3], num_output_channels=3), + self._image_tensor[3].unsqueeze(dim=0), + ], + ), device=self.device, ) else: @@ -391,10 +393,12 @@ def adjust_brightness(self, factor: float) -> Image: ) if self.channel == 4: return Image( - torch.cat([ - func2.adjust_brightness(self._image_tensor[0:3], factor * 1.0), - self._image_tensor[3].unsqueeze(dim=0), - ]), + torch.cat( + [ + func2.adjust_brightness(self._image_tensor[0:3], factor * 1.0), + self._image_tensor[3].unsqueeze(dim=0), + ], + ), device=self.device, ) else: @@ -462,10 +466,12 @@ def adjust_contrast(self, factor: float) -> Image: ) if self.channel == 4: return Image( - torch.cat([ - func2.adjust_contrast(self._image_tensor[0:3], factor * 1.0), - self._image_tensor[3].unsqueeze(dim=0), - ]), + torch.cat( + [ + func2.adjust_contrast(self._image_tensor[0:3], factor * 1.0), + self._image_tensor[3].unsqueeze(dim=0), + ], + ), device=self.device, ) else: @@ -562,10 +568,12 @@ def sharpen(self, factor: float) -> Image: ) if self.channel == 4: return Image( - torch.cat([ - func2.adjust_sharpness(self._image_tensor[0:3], factor * 1.0), - self._image_tensor[3].unsqueeze(dim=0), - ]), + torch.cat( + [ + func2.adjust_sharpness(self._image_tensor[0:3], factor * 1.0), + self._image_tensor[3].unsqueeze(dim=0), + ], + ), device=self.device, ) else: diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 8eae640e6..41ce0962c 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -10,8 +10,8 @@ import numpy as np import pandas as pd import seaborn as sns -import xxhash +from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType from safeds.exceptions import ( @@ -201,7 +201,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(self.name.encode("utf-8") + self.type.__repr__().encode("utf-8") + self.number_of_rows.to_bytes(8)).intdigest() + return _structural_hash(self.name, self.type.__repr__(), self.number_of_rows) def __iter__(self) -> Iterator[T]: r""" diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 50c670b49..57d095b96 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -1,14 +1,13 @@ from __future__ import annotations -import sys import functools -import operator +import sys from collections.abc import Callable, Mapping from typing import TYPE_CHECKING, Any import pandas as pd -import xxhash +from safeds._utils import _structural_hash from safeds.data.tabular.typing import ColumnType, Schema from safeds.exceptions import UnknownColumnNameError @@ -227,7 +226,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + functools.reduce(operator.add, [xxhash.xxh3_64(str(self.get_value(value))).intdigest().to_bytes(8) for value in self], b"\0")).intdigest() + return _structural_hash(self._schema, [str(self.get_value(value)) for value in self]) def __iter__(self) -> Iterator[Any]: """ diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 53281af15..1f2c43542 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -14,11 +14,11 @@ import pandas as pd import seaborn as sns import torch -import xxhash from pandas import DataFrame from scipy import stats from torch.utils.data import DataLoader, Dataset +from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.typing import ColumnType, Schema from safeds.exceptions import ( @@ -469,7 +469,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(hash(self._schema).to_bytes(8) + self.number_of_rows.to_bytes(8)).intdigest() + return _structural_hash(self._schema, self.number_of_rows) def __repr__(self) -> str: r""" diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index c5b72e591..ca465a39e 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -5,9 +5,9 @@ import numpy as np import torch -import xxhash from torch.utils.data import DataLoader, Dataset +from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( ColumnIsTargetError, @@ -193,9 +193,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64( - hash(self.target).to_bytes(8) + hash(self.features).to_bytes(8) + Table.__hash__(self).to_bytes(8), - ).intdigest() + return _structural_hash(self.target, self.features, Table.__hash__(self)) def __sizeof__(self) -> int: """ diff --git a/src/safeds/data/tabular/containers/_time_series.py b/src/safeds/data/tabular/containers/_time_series.py index a26836b61..9759e5d01 100644 --- a/src/safeds/data/tabular/containers/_time_series.py +++ b/src/safeds/data/tabular/containers/_time_series.py @@ -7,8 +7,8 @@ import matplotlib.pyplot as plt import pandas as pd import seaborn as sns -import xxhash +from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.containers import Column, Row, Table, TaggedTable from safeds.exceptions import ( @@ -295,12 +295,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64( - hash(self.time).to_bytes(8) - + hash(self.target).to_bytes(8) - + hash(self.features).to_bytes(8) - + Table.__hash__(self).to_bytes(8), - ).intdigest() + return _structural_hash(self.time, self.target, self.features, Table.__hash__(self)) def __sizeof__(self) -> int: """ diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index 06e6c6d32..e40280c11 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys import warnings from typing import Any @@ -47,9 +48,28 @@ class Constant(ImputerStrategy): The given value to impute missing values. """ + def __eq__(self, other: object) -> bool: + if not isinstance(other, Imputer.Strategy.Constant): + return NotImplemented + if self is other: + return True + return self._value == other._value + + __hash__ = ImputerStrategy.__hash__ + def __init__(self, value: Any): self._value = value + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + Size of this object in bytes. + """ + return sys.getsizeof(self._value) + def __str__(self) -> str: return f"Constant({self._value})" @@ -60,6 +80,13 @@ def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: class Mean(ImputerStrategy): """An imputation strategy for imputing missing data with mean values.""" + def __eq__(self, other: object) -> bool: + if not isinstance(other, Imputer.Strategy.Mean): + return NotImplemented + return True + + __hash__ = ImputerStrategy.__hash__ + def __str__(self) -> str: return "Mean" @@ -69,6 +96,13 @@ def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: class Median(ImputerStrategy): """An imputation strategy for imputing missing data with median values.""" + def __eq__(self, other: object) -> bool: + if not isinstance(other, Imputer.Strategy.Median): + return NotImplemented + return True + + __hash__ = ImputerStrategy.__hash__ + def __str__(self) -> str: return "Median" @@ -78,6 +112,13 @@ def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: class Mode(ImputerStrategy): """An imputation strategy for imputing missing data with mode values. The lowest value will be used if there are multiple values with the same highest count.""" + def __eq__(self, other: object) -> bool: + if not isinstance(other, Imputer.Strategy.Mode): + return NotImplemented + return True + + __hash__ = ImputerStrategy.__hash__ + def __str__(self) -> str: return "Mode" diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index 3502be576..56c5eaf5a 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -import xxhash +from safeds._utils import _structural_hash if TYPE_CHECKING: from safeds.data.tabular.containers import Table @@ -21,7 +21,10 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + added = self.get_names_of_added_columns() if self.is_fitted() else [] + changed = self.get_names_of_changed_columns() if self.is_fitted() else [] + removed = self.get_names_of_removed_columns() if self.is_fitted() else [] + return _structural_hash(self.__class__.__qualname__, self.is_fitted(), added, changed, removed) @abstractmethod def fit(self, table: Table, column_names: list[str] | None) -> TableTransformer: diff --git a/src/safeds/data/tabular/typing/_imputer_strategy.py b/src/safeds/data/tabular/typing/_imputer_strategy.py index fd55477c5..ff43099b9 100644 --- a/src/safeds/data/tabular/typing/_imputer_strategy.py +++ b/src/safeds/data/tabular/typing/_imputer_strategy.py @@ -2,6 +2,8 @@ from sklearn.impute import SimpleImputer as sk_SimpleImputer +from safeds._utils import _structural_hash + class ImputerStrategy(ABC): """ @@ -20,3 +22,30 @@ def _augment_imputer(self, imputer: sk_SimpleImputer) -> None: imputer: SimpleImputer The imputer to augment. """ + + @abstractmethod + def __eq__(self, other: object) -> bool: + """ + Compare two imputer strategies. + + Parameters + ---------- + other: + other object to compare to + + Returns + ------- + equals: + Whether the two imputer strategies are equal + """ + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this imputer strategy. + + Returns + ------- + hash : int + The hash value. + """ + return _structural_hash(self.__class__.__qualname__) diff --git a/src/safeds/data/tabular/typing/_schema.py b/src/safeds/data/tabular/typing/_schema.py index 3ed0623cd..20cdd3256 100644 --- a/src/safeds/data/tabular/typing/_schema.py +++ b/src/safeds/data/tabular/typing/_schema.py @@ -4,8 +4,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -import xxhash - +from safeds._utils import _structural_hash from safeds.data.tabular.typing import Anything, Integer, Nothing, RealNumber from safeds.data.tabular.typing._column_type import ColumnType from safeds.exceptions import UnknownColumnNameError @@ -83,7 +82,7 @@ def __hash__(self) -> int: """ column_names = self._schema.keys() column_types = map(repr, self._schema.values()) - return xxhash.xxh3_64(str(tuple(zip(column_names, column_types, strict=True)))).intdigest() + return _structural_hash(str(tuple(zip(column_names, column_types, strict=True)))) def __repr__(self) -> str: """ diff --git a/src/safeds/ml/classical/classification/_ada_boost.py b/src/safeds/ml/classical/classification/_ada_boost.py index 8cb6c28d0..289b5ef31 100644 --- a/src/safeds/ml/classical/classification/_ada_boost.py +++ b/src/safeds/ml/classical/classification/_ada_boost.py @@ -4,6 +4,7 @@ from sklearn.ensemble import AdaBoostClassifier as sk_AdaBoostClassifier +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -36,6 +37,15 @@ class AdaBoostClassifier(Classifier): If `maximum_number_of_learners` or `learning_rate` are less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._target_name, + self._feature_names, + self._learning_rate, + self._maximum_number_of_learners, + ) + def __init__( self, *, diff --git a/src/safeds/ml/classical/classification/_classifier.py b/src/safeds/ml/classical/classification/_classifier.py index b3a7a852b..c5c7e8be3 100644 --- a/src/safeds/ml/classical/classification/_classifier.py +++ b/src/safeds/ml/classical/classification/_classifier.py @@ -3,9 +3,9 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -import xxhash from sklearn.metrics import accuracy_score as sk_accuracy_score +from safeds._utils import _structural_hash from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import UntaggedTableError @@ -27,7 +27,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + return _structural_hash(self.__class__.__qualname__, self.is_fitted()) @abstractmethod def fit(self, training_set: TaggedTable) -> Classifier: diff --git a/src/safeds/ml/classical/classification/_decision_tree.py b/src/safeds/ml/classical/classification/_decision_tree.py index 78b7e1559..651a6101b 100644 --- a/src/safeds/ml/classical/classification/_decision_tree.py +++ b/src/safeds/ml/classical/classification/_decision_tree.py @@ -4,6 +4,7 @@ from sklearn.tree import DecisionTreeClassifier as sk_DecisionTreeClassifier +from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict from ._classifier import Classifier @@ -17,6 +18,9 @@ class DecisionTreeClassifier(Classifier): """Decision tree classification.""" + def __hash__(self) -> int: + return _structural_hash(Classifier.__hash__(self), self._target_name, self._feature_names) + def __init__(self) -> None: # Internal state self._wrapped_classifier: sk_DecisionTreeClassifier | None = None diff --git a/src/safeds/ml/classical/classification/_gradient_boosting.py b/src/safeds/ml/classical/classification/_gradient_boosting.py index 3f22b08ff..83eae4703 100644 --- a/src/safeds/ml/classical/classification/_gradient_boosting.py +++ b/src/safeds/ml/classical/classification/_gradient_boosting.py @@ -4,6 +4,7 @@ from sklearn.ensemble import GradientBoostingClassifier as sk_GradientBoostingClassifier +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -34,6 +35,15 @@ class GradientBoostingClassifier(Classifier): If `number_of_trees` or `learning_rate` is less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._target_name, + self._feature_names, + self._learning_rate, + self._number_of_trees, + ) + def __init__(self, *, number_of_trees: int = 100, learning_rate: float = 0.1) -> None: # Validation if number_of_trees < 1: diff --git a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py index 8f67ed95b..3202c7615 100644 --- a/src/safeds/ml/classical/classification/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/classification/_k_nearest_neighbors.py @@ -4,6 +4,7 @@ from sklearn.neighbors import KNeighborsClassifier as sk_KNeighborsClassifier +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, DatasetMissesDataError, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -31,6 +32,14 @@ class KNearestNeighborsClassifier(Classifier): If `number_of_neighbors` is less than 1. """ + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._target_name, + self._feature_names, + self._number_of_neighbors, + ) + def __init__(self, number_of_neighbors: int) -> None: # Validation if number_of_neighbors < 1: diff --git a/src/safeds/ml/classical/classification/_logistic_regression.py b/src/safeds/ml/classical/classification/_logistic_regression.py index 67630396f..913176cca 100644 --- a/src/safeds/ml/classical/classification/_logistic_regression.py +++ b/src/safeds/ml/classical/classification/_logistic_regression.py @@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression as sk_LogisticRegression +from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict from ._classifier import Classifier @@ -17,6 +18,9 @@ class LogisticRegressionClassifier(Classifier): """Regularized logistic regression.""" + def __hash__(self) -> int: + return _structural_hash(Classifier.__hash__(self), self._target_name, self._feature_names) + def __init__(self) -> None: # Internal state self._wrapped_classifier: sk_LogisticRegression | None = None diff --git a/src/safeds/ml/classical/classification/_random_forest.py b/src/safeds/ml/classical/classification/_random_forest.py index c237fd5a7..f37e1cf39 100644 --- a/src/safeds/ml/classical/classification/_random_forest.py +++ b/src/safeds/ml/classical/classification/_random_forest.py @@ -4,6 +4,7 @@ from sklearn.ensemble import RandomForestClassifier as sk_RandomForestClassifier +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -29,6 +30,14 @@ class RandomForestClassifier(Classifier): If `number_of_trees` is less than 1. """ + def __hash__(self) -> int: + return _structural_hash( + Classifier.__hash__(self), + self._target_name, + self._feature_names, + self._number_of_trees, + ) + def __init__(self, *, number_of_trees: int = 100) -> None: # Validation if number_of_trees < 1: diff --git a/src/safeds/ml/classical/classification/_support_vector_machine.py b/src/safeds/ml/classical/classification/_support_vector_machine.py index 34bac1bdf..035851a0f 100644 --- a/src/safeds/ml/classical/classification/_support_vector_machine.py +++ b/src/safeds/ml/classical/classification/_support_vector_machine.py @@ -1,10 +1,12 @@ from __future__ import annotations +import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING from sklearn.svm import SVC as sk_SVC # noqa: N811 +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict from safeds.ml.classical.classification import Classifier @@ -29,6 +31,33 @@ def _get_sklearn_kernel(self) -> object: The kernel of the SupportVectorMachine. """ + @abstractmethod + def __eq__(self, other: object) -> bool: + """ + Compare two kernels. + + Parameters + ---------- + other: + other object to compare to + + Returns + ------- + equals: + Whether the two kernels are equal + """ + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this kernel. + + Returns + ------- + hash : int + The hash value. + """ + return _structural_hash(self.__class__.__qualname__) + class SupportVectorMachineClassifier(Classifier): """ @@ -47,6 +76,9 @@ class SupportVectorMachineClassifier(Classifier): If `c` is less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash(Classifier.__hash__(self), self._target_name, self._feature_names, self._c, self.kernel) + def __init__(self, *, c: float = 1.0, kernel: SupportVectorMachineKernel | None = None) -> None: # Internal state self._wrapped_classifier: sk_SVC | None = None @@ -96,6 +128,13 @@ def _get_sklearn_kernel(self) -> str: """ return "linear" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineClassifier.Kernel.Linear): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + class Polynomial(SupportVectorMachineKernel): def __init__(self, degree: int): if degree < 1: @@ -113,6 +152,24 @@ def _get_sklearn_kernel(self) -> str: """ return "poly" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineClassifier.Kernel.Polynomial): + return NotImplemented + return self._degree == other._degree + + def __hash__(self) -> int: + return _structural_hash(SupportVectorMachineKernel.__hash__(self), self._degree) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + Size of this object in bytes. + """ + return sys.getsizeof(self._degree) + class Sigmoid(SupportVectorMachineKernel): def _get_sklearn_kernel(self) -> str: """ @@ -125,6 +182,13 @@ def _get_sklearn_kernel(self) -> str: """ return "sigmoid" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineClassifier.Kernel.Sigmoid): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + class RadialBasisFunction(SupportVectorMachineKernel): def _get_sklearn_kernel(self) -> str: """ @@ -137,6 +201,13 @@ def _get_sklearn_kernel(self) -> str: """ return "rbf" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineClassifier.Kernel.RadialBasisFunction): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + def _get_kernel_name(self) -> str: """ Get the name of the kernel. diff --git a/src/safeds/ml/classical/regression/_ada_boost.py b/src/safeds/ml/classical/regression/_ada_boost.py index ad037e96f..676473f43 100644 --- a/src/safeds/ml/classical/regression/_ada_boost.py +++ b/src/safeds/ml/classical/regression/_ada_boost.py @@ -4,6 +4,7 @@ from sklearn.ensemble import AdaBoostRegressor as sk_AdaBoostRegressor +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -36,6 +37,15 @@ class AdaBoostRegressor(Regressor): If `maximum_number_of_learners` or `learning_rate` are less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._target_name, + self._feature_names, + self._learning_rate, + self._maximum_number_of_learners, + ) + def __init__( self, *, diff --git a/src/safeds/ml/classical/regression/_arima.py b/src/safeds/ml/classical/regression/_arima.py index a9a2705da..1ce412906 100644 --- a/src/safeds/ml/classical/regression/_arima.py +++ b/src/safeds/ml/classical/regression/_arima.py @@ -4,9 +4,9 @@ import itertools import matplotlib.pyplot as plt -import xxhash from statsmodels.tsa.arima.model import ARIMA +from safeds._utils import _structural_hash from safeds.data.image.containers import Image from safeds.data.tabular.containers import Column, Table, TimeSeries from safeds.exceptions import ( @@ -30,11 +30,7 @@ def __hash__(self) -> int: hash: The hash value. """ - return xxhash.xxh3_64( - self.__class__.__qualname__.encode("utf-8") - + (1 if self.is_fitted() else 0).to_bytes(1) - + (bytes((9, 9, 9)) if self._order is None else bytes(self._order)), - ).intdigest() + return _structural_hash(self.__class__.__qualname__, self.is_fitted(), self._order) def __init__(self) -> None: # Internal state diff --git a/src/safeds/ml/classical/regression/_decision_tree.py b/src/safeds/ml/classical/regression/_decision_tree.py index 48bb5a95f..473906552 100644 --- a/src/safeds/ml/classical/regression/_decision_tree.py +++ b/src/safeds/ml/classical/regression/_decision_tree.py @@ -4,6 +4,7 @@ from sklearn.tree import DecisionTreeRegressor as sk_DecisionTreeRegressor +from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict from ._regressor import Regressor @@ -17,6 +18,9 @@ class DecisionTreeRegressor(Regressor): """Decision tree regression.""" + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names) + def __init__(self) -> None: # Internal state self._wrapped_regressor: sk_DecisionTreeRegressor | None = None diff --git a/src/safeds/ml/classical/regression/_elastic_net_regression.py b/src/safeds/ml/classical/regression/_elastic_net_regression.py index c7087529f..15f9b6db1 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regression.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regression.py @@ -6,6 +6,7 @@ from sklearn.linear_model import ElasticNet as sk_ElasticNet +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -34,6 +35,15 @@ class ElasticNetRegressor(Regressor): If `alpha` is negative or `lasso_ratio` is not between 0 and 1. """ + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._target_name, + self._feature_names, + self._alpha, + self._lasso_ratio, + ) + def __init__(self, *, alpha: float = 1.0, lasso_ratio: float = 0.5) -> None: # Validation if alpha < 0: diff --git a/src/safeds/ml/classical/regression/_gradient_boosting.py b/src/safeds/ml/classical/regression/_gradient_boosting.py index fd47d5a7c..c3905936b 100644 --- a/src/safeds/ml/classical/regression/_gradient_boosting.py +++ b/src/safeds/ml/classical/regression/_gradient_boosting.py @@ -4,6 +4,7 @@ from sklearn.ensemble import GradientBoostingRegressor as sk_GradientBoostingRegressor +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -34,6 +35,15 @@ class GradientBoostingRegressor(Regressor): If `number_of_trees` or `learning_rate` are less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._target_name, + self._feature_names, + self._learning_rate, + self._number_of_trees, + ) + def __init__(self, *, number_of_trees: int = 100, learning_rate: float = 0.1) -> None: # Validation if number_of_trees < 1: diff --git a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py index d2b8040cf..e4ee60ee1 100644 --- a/src/safeds/ml/classical/regression/_k_nearest_neighbors.py +++ b/src/safeds/ml/classical/regression/_k_nearest_neighbors.py @@ -4,6 +4,7 @@ from sklearn.neighbors import KNeighborsRegressor as sk_KNeighborsRegressor +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, DatasetMissesDataError, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -31,6 +32,14 @@ class KNearestNeighborsRegressor(Regressor): If `number_of_neighbors` is less than 1. """ + def __hash__(self) -> int: + return _structural_hash( + Regressor.__hash__(self), + self._target_name, + self._feature_names, + self._number_of_neighbors, + ) + def __init__(self, number_of_neighbors: int) -> None: # Validation if number_of_neighbors < 1: diff --git a/src/safeds/ml/classical/regression/_lasso_regression.py b/src/safeds/ml/classical/regression/_lasso_regression.py index 6971f183d..1748b63cd 100644 --- a/src/safeds/ml/classical/regression/_lasso_regression.py +++ b/src/safeds/ml/classical/regression/_lasso_regression.py @@ -5,6 +5,7 @@ from sklearn.linear_model import Lasso as sk_Lasso +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -30,6 +31,9 @@ class LassoRegressor(Regressor): If `alpha` is negative. """ + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names, self._alpha) + def __init__(self, *, alpha: float = 1.0) -> None: # Validation if alpha < 0: diff --git a/src/safeds/ml/classical/regression/_linear_regression.py b/src/safeds/ml/classical/regression/_linear_regression.py index a3bbb381e..e577d0db1 100644 --- a/src/safeds/ml/classical/regression/_linear_regression.py +++ b/src/safeds/ml/classical/regression/_linear_regression.py @@ -4,6 +4,7 @@ from sklearn.linear_model import LinearRegression as sk_LinearRegression +from safeds._utils import _structural_hash from safeds.ml.classical._util_sklearn import fit, predict from ._regressor import Regressor @@ -17,6 +18,9 @@ class LinearRegressionRegressor(Regressor): """Linear regression.""" + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names) + def __init__(self) -> None: # Internal state self._wrapped_regressor: sk_LinearRegression | None = None diff --git a/src/safeds/ml/classical/regression/_random_forest.py b/src/safeds/ml/classical/regression/_random_forest.py index 0709ef92e..d8959a36b 100644 --- a/src/safeds/ml/classical/regression/_random_forest.py +++ b/src/safeds/ml/classical/regression/_random_forest.py @@ -4,6 +4,7 @@ from sklearn.ensemble import RandomForestRegressor as sk_RandomForestRegressor +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -29,6 +30,9 @@ class RandomForestRegressor(Regressor): If `number_of_trees` is less than 1. """ + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names, self._number_of_trees) + def __init__(self, *, number_of_trees: int = 100) -> None: # Validation if number_of_trees < 1: diff --git a/src/safeds/ml/classical/regression/_regressor.py b/src/safeds/ml/classical/regression/_regressor.py index 54ca497f3..260a9a474 100644 --- a/src/safeds/ml/classical/regression/_regressor.py +++ b/src/safeds/ml/classical/regression/_regressor.py @@ -3,10 +3,10 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING -import xxhash from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error from sklearn.metrics import mean_squared_error as sk_mean_squared_error +from safeds._utils import _structural_hash from safeds.data.tabular.containers import Column, Table, TaggedTable from safeds.exceptions import ColumnLengthMismatchError, UntaggedTableError @@ -26,7 +26,7 @@ def __hash__(self) -> int: hash : int The hash value. """ - return xxhash.xxh3_64(self.__class__.__qualname__.encode("utf-8") + (1 if self.is_fitted() else 0).to_bytes(1)).intdigest() + return _structural_hash(self.__class__.__qualname__, self.is_fitted()) @abstractmethod def fit(self, training_set: TaggedTable) -> Regressor: diff --git a/src/safeds/ml/classical/regression/_ridge_regression.py b/src/safeds/ml/classical/regression/_ridge_regression.py index d267da9c8..a5ac44d03 100644 --- a/src/safeds/ml/classical/regression/_ridge_regression.py +++ b/src/safeds/ml/classical/regression/_ridge_regression.py @@ -5,6 +5,7 @@ from sklearn.linear_model import Ridge as sk_Ridge +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict @@ -31,6 +32,9 @@ class RidgeRegressor(Regressor): If `alpha` is negative. """ + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names, self._alpha) + def __init__(self, *, alpha: float = 1.0) -> None: # Validation if alpha < 0: diff --git a/src/safeds/ml/classical/regression/_support_vector_machine.py b/src/safeds/ml/classical/regression/_support_vector_machine.py index 711d544d7..e9a1f7d4c 100644 --- a/src/safeds/ml/classical/regression/_support_vector_machine.py +++ b/src/safeds/ml/classical/regression/_support_vector_machine.py @@ -1,10 +1,12 @@ from __future__ import annotations +import sys from abc import ABC, abstractmethod from typing import TYPE_CHECKING from sklearn.svm import SVR as sk_SVR # noqa: N811 +from safeds._utils import _structural_hash from safeds.exceptions import ClosedBound, OpenBound, OutOfBoundsError from safeds.ml.classical._util_sklearn import fit, predict from safeds.ml.classical.regression import Regressor @@ -29,6 +31,33 @@ def _get_sklearn_kernel(self) -> object: The kernel of the SupportVectorMachine. """ + @abstractmethod + def __eq__(self, other: object) -> bool: + """ + Compare two kernels. + + Parameters + ---------- + other: + other object to compare to + + Returns + ------- + equals: + Whether the two kernels are equal + """ + + def __hash__(self) -> int: + """ + Return a deterministic hash value for this kernel. + + Returns + ------- + hash : int + The hash value. + """ + return _structural_hash(self.__class__.__qualname__) + class SupportVectorMachineRegressor(Regressor): """ @@ -47,6 +76,9 @@ class SupportVectorMachineRegressor(Regressor): If `c` is less than or equal to 0. """ + def __hash__(self) -> int: + return _structural_hash(Regressor.__hash__(self), self._target_name, self._feature_names, self._c, self.kernel) + def __init__(self, *, c: float = 1.0, kernel: SupportVectorMachineKernel | None = None) -> None: # Internal state self._wrapped_regressor: sk_SVR | None = None @@ -96,6 +128,13 @@ def _get_sklearn_kernel(self) -> str: """ return "linear" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineRegressor.Kernel.Linear): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + class Polynomial(SupportVectorMachineKernel): def __init__(self, degree: int): if degree < 1: @@ -113,6 +152,24 @@ def _get_sklearn_kernel(self) -> str: """ return "poly" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineRegressor.Kernel.Polynomial): + return NotImplemented + return self._degree == other._degree + + def __hash__(self) -> int: + return _structural_hash(SupportVectorMachineKernel.__hash__(self), self._degree) + + def __sizeof__(self) -> int: + """ + Return the complete size of this object. + + Returns + ------- + Size of this object in bytes. + """ + return sys.getsizeof(self._degree) + class Sigmoid(SupportVectorMachineKernel): def _get_sklearn_kernel(self) -> str: """ @@ -125,6 +182,13 @@ def _get_sklearn_kernel(self) -> str: """ return "sigmoid" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineRegressor.Kernel.Sigmoid): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + class RadialBasisFunction(SupportVectorMachineKernel): def _get_sklearn_kernel(self) -> str: """ @@ -137,6 +201,13 @@ def _get_sklearn_kernel(self) -> str: """ return "rbf" + def __eq__(self, other: object) -> bool: + if not isinstance(other, SupportVectorMachineRegressor.Kernel.RadialBasisFunction): + return NotImplemented + return True + + __hash__ = SupportVectorMachineKernel.__hash__ + def _get_kernel_name(self) -> str: """ Get the name of the kernel. diff --git a/tests/safeds/_utils/__init__.py b/tests/safeds/_utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/safeds/_utils/test_hashing.py b/tests/safeds/_utils/test_hashing.py new file mode 100644 index 000000000..ed1d32d85 --- /dev/null +++ b/tests/safeds/_utils/test_hashing.py @@ -0,0 +1,87 @@ +from typing import Any + +import pytest +from safeds._utils._hashing import _structural_hash, _value_to_bytes +from safeds.data.tabular.containers import Table + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (None, 13852660066117729964), + (b"123456789", 14380211418424798930), + (-42, 7489430509543234423), + (42, 13109960438326920571), + (0, 3448420582392008907), + (True, 2767458027849294907), + (False, 13852660066117729964), + ("abc", 13264335307911969754), + (-1.234, 1303859999365793597), + ((1, "2", 3.0), 1269800189614394802), + ([1, "2", 3.0], 1269800189614394802), + ({1, "2", 3.0}, 17310946488773236131), + (frozenset({1, "2", 3.0}), 17310946488773236131), + ({"a": "b", 1: 2}, 17924302838573884393), + (Table({"col1": [1, 2], "col2:": [3, 4]}), 18297321136957342689), + ], + ids=[ + "none", + "bytes", + "int_negative", + "int_positive", + "int_zero", + "boolean_true", + "boolean_false", + "string", + "float", + "tuple", + "list", + "set", + "frozenset", + "dict", + "object_table", + ], +) +def test_structural_hash(value: Any, expected: int) -> None: + assert _structural_hash(value) == expected + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + (None, b"\0"), + (b"123456789", b"123456789"), + (-42, b"\xff\xff\xff\xff\xff\xff\xff\xd6"), + (42, b"\0\0\0\0\0\0\0*"), + (0, b"\0\0\0\0\0\0\0\0"), + (True, b"\1"), + (False, b"\0"), + ("abc", b"abc"), + (-1.234, b"X9\xb4\xc8v\xbe\xf3\xbf"), + ((1, "2", 3.0), b"\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\x012\0\0\0\0\0\0\x08@"), + ([1, "2", 3.0], b"\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\x012\0\0\0\0\0\0\x08@"), + ({1, "2", 3.0}, b"\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x08@2"), + (frozenset({1, "2", 3.0}), b"\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\x08@2"), + ({"a": "b", 1: 2}, b"\0\0\0\0\0\0\0\x02\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\x02ab"), + (Table({"col1": [1, 2], "col2:": [3, 4]}), b"\x04P\xbfS$\xaf\xf4W"), + ], + ids=[ + "none", + "bytes", + "int_negative", + "int_positive", + "int_zero", + "boolean_true", + "boolean_false", + "string", + "float", + "tuple", + "list", + "set", + "frozenset", + "dict", + "object_table", + ], +) +def test_value_to_bytes(value: Any, expected: bytes) -> None: + assert _value_to_bytes(value) == expected diff --git a/tests/safeds/data/tabular/transformation/test_imputer.py b/tests/safeds/data/tabular/transformation/test_imputer.py index 8dd3ef8d4..15e71a29d 100644 --- a/tests/safeds/data/tabular/transformation/test_imputer.py +++ b/tests/safeds/data/tabular/transformation/test_imputer.py @@ -1,3 +1,4 @@ +import sys import warnings import pytest @@ -356,3 +357,80 @@ def test_get_names_of_removed_columns(self, strategy: ImputerStrategy) -> None: ) transformer = transformer.fit(table, None) assert transformer.get_names_of_removed_columns() == [] + + +class TestHash: + @pytest.mark.parametrize( + ("strategy1", "strategy2"), + ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_same_hash_for_equal_strategy( + self, + strategy1: ImputerStrategy, + strategy2: ImputerStrategy, + ) -> None: + assert hash(strategy1) == hash(strategy2) + + @pytest.mark.parametrize( + ("strategy1", "strategy2"), + ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_different_hash_for_unequal_strategy( + self, + strategy1: ImputerStrategy, + strategy2: ImputerStrategy, + ) -> None: + assert hash(strategy1) != hash(strategy2) + + +class TestEq: + + @pytest.mark.parametrize( + ("strategy1", "strategy2"), + ([(x, y) for x in strategies() for y in strategies() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_equal_strategy( + self, + strategy1: ImputerStrategy, + strategy2: ImputerStrategy, + ) -> None: + assert strategy1 == strategy2 + + @pytest.mark.parametrize( + "strategy", + ([x for x in strategies() if x.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_equal_identity_strategy( + self, + strategy: ImputerStrategy, + ) -> None: + assert strategy == strategy # noqa: PLR0124 + + @pytest.mark.parametrize( + ("strategy1", "strategy2"), + ([(x, y) for x in strategies() for y in strategies() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_unequal_strategy( + self, + strategy1: ImputerStrategy, + strategy2: ImputerStrategy, + ) -> None: + assert strategy1 != strategy2 + + +class TestSizeof: + @pytest.mark.parametrize( + "strategy", + ([Imputer.Strategy.Constant(1)]), + ids=lambda x: x.__class__.__name__, + ) + def test_sizeof_strategy( + self, + strategy: ImputerStrategy, + ) -> None: + assert sys.getsizeof(strategy) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/classical/classification/test_support_vector_machine.py b/tests/safeds/ml/classical/classification/test_support_vector_machine.py index bb54b7d6d..1e0857d6a 100644 --- a/tests/safeds/ml/classical/classification/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/classification/test_support_vector_machine.py @@ -1,7 +1,30 @@ +import sys + import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.classification import SupportVectorMachineClassifier +from safeds.ml.classical.classification._support_vector_machine import SupportVectorMachineKernel + + +def kernels() -> list[SupportVectorMachineKernel]: + """ + Return the list of kernels to test. + + After you implemented a new kernel, add it to this list to ensure its `__hash__` and `__eq__` method work as + expected. + + Returns + ------- + kernels : list[SupportVectorMachineKernel] + The list of kernels to test. + """ + return [ + SupportVectorMachineClassifier.Kernel.Linear(), + SupportVectorMachineClassifier.Kernel.Sigmoid(), + SupportVectorMachineClassifier.Kernel.Polynomial(3), + SupportVectorMachineClassifier.Kernel.RadialBasisFunction(), + ] @pytest.fixture() @@ -84,3 +107,62 @@ def test_should_get_kernel_name_invalid_kernel_type(self) -> None: svm = SupportVectorMachineClassifier(c=2) with pytest.raises(TypeError, match="Invalid kernel type."): svm._get_kernel_name() + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_same_hash_for_equal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert hash(kernel1) == hash(kernel2) + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_different_hash_for_unequal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert hash(kernel1) != hash(kernel2) + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_equal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert kernel1 == kernel2 + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_unequal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert kernel1 != kernel2 + + @pytest.mark.parametrize( + "kernel", + ([SupportVectorMachineClassifier.Kernel.Polynomial(3)]), + ids=lambda x: x.__class__.__name__, + ) + def test_sizeof_kernel( + self, + kernel: SupportVectorMachineKernel, + ) -> None: + assert sys.getsizeof(kernel) > sys.getsizeof(object()) diff --git a/tests/safeds/ml/classical/regression/test_support_vector_machine.py b/tests/safeds/ml/classical/regression/test_support_vector_machine.py index 54495d0ae..88340be25 100644 --- a/tests/safeds/ml/classical/regression/test_support_vector_machine.py +++ b/tests/safeds/ml/classical/regression/test_support_vector_machine.py @@ -1,7 +1,30 @@ +import sys + import pytest from safeds.data.tabular.containers import Table, TaggedTable from safeds.exceptions import OutOfBoundsError from safeds.ml.classical.regression import SupportVectorMachineRegressor +from safeds.ml.classical.regression._support_vector_machine import SupportVectorMachineKernel + + +def kernels() -> list[SupportVectorMachineKernel]: + """ + Return the list of kernels to test. + + After you implemented a new kernel, add it to this list to ensure its `__hash__` and `__eq__` method work as + expected. + + Returns + ------- + kernels : list[SupportVectorMachineKernel] + The list of kernels to test. + """ + return [ + SupportVectorMachineRegressor.Kernel.Linear(), + SupportVectorMachineRegressor.Kernel.Sigmoid(), + SupportVectorMachineRegressor.Kernel.Polynomial(3), + SupportVectorMachineRegressor.Kernel.RadialBasisFunction(), + ] @pytest.fixture() @@ -84,3 +107,62 @@ def test_should_get_kernel_name_invalid_kernel_type(self) -> None: svm = SupportVectorMachineRegressor(c=2) with pytest.raises(TypeError, match="Invalid kernel type."): svm._get_kernel_name() + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_same_hash_for_equal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert hash(kernel1) == hash(kernel2) + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_should_return_different_hash_for_unequal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert hash(kernel1) != hash(kernel2) + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ == y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_equal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert kernel1 == kernel2 + + @pytest.mark.parametrize( + ("kernel1", "kernel2"), + ([(x, y) for x in kernels() for y in kernels() if x.__class__ != y.__class__]), + ids=lambda x: x.__class__.__name__, + ) + def test_unequal_kernel( + self, + kernel1: SupportVectorMachineKernel, + kernel2: SupportVectorMachineKernel, + ) -> None: + assert kernel1 != kernel2 + + @pytest.mark.parametrize( + "kernel", + ([SupportVectorMachineRegressor.Kernel.Polynomial(3)]), + ids=lambda x: x.__class__.__name__, + ) + def test_sizeof_kernel( + self, + kernel: SupportVectorMachineKernel, + ) -> None: + assert sys.getsizeof(kernel) > sys.getsizeof(object())