diff --git a/src/safeds/ml/classification/_classifier.py b/src/safeds/ml/classification/_classifier.py index a0423ab09..7e9e9e731 100644 --- a/src/safeds/ml/classification/_classifier.py +++ b/src/safeds/ml/classification/_classifier.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from safeds.data.tabular.containers import Table, TaggedTable +from sklearn.metrics import accuracy_score as sk_accuracy_score class Classifier(ABC): @@ -44,3 +45,25 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ + + def accuracy(self, validation_or_test_set: TaggedTable) -> float: + """ + Predicts the target values for the features in the validation or test set and compares it to the expected + results. + + Parameters + ---------- + validation_or_test_set : TaggedTable + The validation or test set. + + Returns + ------- + accuracy : float + The calculated accuracy score, i.e. the percentage of equal data. + """ + + expected = validation_or_test_set.target + predicted = self.predict(validation_or_test_set.features).target + + # noinspection PyProtectedMember + return sk_accuracy_score(expected._data, predicted._data) diff --git a/src/safeds/ml/classification/metrics/__init__.py b/src/safeds/ml/classification/metrics/__init__.py deleted file mode 100644 index 92079917a..000000000 --- a/src/safeds/ml/classification/metrics/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from ._module_level_functions import accuracy diff --git a/src/safeds/ml/classification/metrics/_module_level_functions.py b/src/safeds/ml/classification/metrics/_module_level_functions.py deleted file mode 100644 index 6d2dd7c51..000000000 --- a/src/safeds/ml/classification/metrics/_module_level_functions.py +++ /dev/null @@ -1,21 +0,0 @@ -from safeds.data.tabular.containers import Column -from sklearn.metrics import accuracy_score - - -def accuracy(actual: Column, expected: Column) -> float: - """ - Compare the expected column and the predicted column and returns the accuracy. - - Parameters - ---------- - actual : Column - The column containing estimated values. - expected : Column - The column containing expected values. - - Returns - ------- - accuracy : float - The calculated accuracy score. The percentage of equal data. - """ - return accuracy_score(actual._data, expected._data) diff --git a/src/safeds/ml/regression/_regressor.py b/src/safeds/ml/regression/_regressor.py index a32c7f770..6d0ebb9b5 100644 --- a/src/safeds/ml/regression/_regressor.py +++ b/src/safeds/ml/regression/_regressor.py @@ -1,6 +1,9 @@ from abc import ABC, abstractmethod -from safeds.data.tabular.containers import Table, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnLengthMismatchError +from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error +from sklearn.metrics import mean_squared_error as sk_mean_squared_error class Regressor(ABC): @@ -44,3 +47,59 @@ def predict(self, dataset: Table) -> TaggedTable: PredictionError If prediction with the given dataset failed. """ + + def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float: + """ + Return the mean squared error, calculated from a given known truth and a column to compare. + + Parameters + ---------- + validation_or_test_set : TaggedTable + The validation or test set. + + Returns + ------- + mean_squared_error : float + The calculated mean squared error (the average of the distance of each individual row squared). + """ + + expected = validation_or_test_set.target + predicted = self.predict(validation_or_test_set.features).target + + _check_metrics_preconditions(predicted, expected) + return sk_mean_squared_error(expected._data, predicted._data) + + def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float: + """ + Return the mean absolute error, calculated from a given known truth and a column to compare. + + Parameters + ---------- + validation_or_test_set : TaggedTable + The validation or test set. + + Returns + ------- + mean_absolute_error : float + The calculated mean absolute error (the average of the distance of each individual row). + """ + + expected = validation_or_test_set.target + predicted = self.predict(validation_or_test_set.features).target + + _check_metrics_preconditions(predicted, expected) + return sk_mean_absolute_error(expected._data, predicted._data) + + +def _check_metrics_preconditions(actual: Column, expected: Column) -> None: + if not actual.type.is_numeric(): + raise TypeError(f"Column 'actual' is not numerical but {actual.type}.") + if not expected.type.is_numeric(): + raise TypeError(f"Column 'expected' is not numerical but {expected.type}.") + + if actual._data.size != expected._data.size: + raise ColumnLengthMismatchError( + "\n".join( + [f"{column.name}: {column._data.size}" for column in [actual, expected]] + ) + ) diff --git a/src/safeds/ml/regression/metrics/__init__.py b/src/safeds/ml/regression/metrics/__init__.py deleted file mode 100644 index c823ed790..000000000 --- a/src/safeds/ml/regression/metrics/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from ._module_level_functions import mean_absolute_error, mean_squared_error diff --git a/src/safeds/ml/regression/metrics/_module_level_functions.py b/src/safeds/ml/regression/metrics/_module_level_functions.py deleted file mode 100644 index 0fc0d6d27..000000000 --- a/src/safeds/ml/regression/metrics/_module_level_functions.py +++ /dev/null @@ -1,58 +0,0 @@ -from safeds.data.tabular.containers import Column -from safeds.exceptions import ColumnLengthMismatchError -from sklearn.metrics import mean_absolute_error as mean_absolute_error_sklearn -from sklearn.metrics import mean_squared_error as mean_squared_error_sklearn - - -def mean_squared_error(actual: Column, expected: Column) -> float: - """ - Return the mean squared error, calculated from a given known truth and a column to compare. - - Parameters - ---------- - actual : Column - The column containing estimated values. - expected : Column - The column containing ground truth. - - Returns - ------- - mean_squared_error : float - The calculated mean squared error (the average of the distance of each individual row squared). - """ - _check_metrics_preconditions(actual, expected) - return mean_squared_error_sklearn(expected._data.tolist(), actual._data.tolist()) - - -def mean_absolute_error(actual: Column, expected: Column) -> float: - """ - Return the mean absolute error, calculated from a given known truth and a column to compare. - - Parameters - ---------- - actual: Column - The column containing estimated values. - expected: Column - The column containing ground truth. - - Returns - ------- - mean_absolute_error : float - The calculated mean absolute error (the average of the distance of each individual row). - """ - _check_metrics_preconditions(actual, expected) - return mean_absolute_error_sklearn(expected._data.tolist(), actual._data.tolist()) - - -def _check_metrics_preconditions(actual: Column, expected: Column) -> None: - if not actual.type.is_numeric(): - raise TypeError(f"Column 'actual' is not numerical but {actual.type}.") - if not expected.type.is_numeric(): - raise TypeError(f"Column 'expected' is not numerical but {expected.type}.") - - if actual._data.size != expected._data.size: - raise ColumnLengthMismatchError( - "\n".join( - [f"{column.name}: {column._data.size}" for column in [actual, expected]] - ) - ) diff --git a/tests/safeds/ml/classification/metrics/__init__.py b/tests/safeds/ml/classification/_classifier/__init__.py similarity index 100% rename from tests/safeds/ml/classification/metrics/__init__.py rename to tests/safeds/ml/classification/_classifier/__init__.py diff --git a/tests/safeds/ml/classification/_classifier/_dummy_classifier.py b/tests/safeds/ml/classification/_classifier/_dummy_classifier.py new file mode 100644 index 000000000..60bb7859a --- /dev/null +++ b/tests/safeds/ml/classification/_classifier/_dummy_classifier.py @@ -0,0 +1,26 @@ +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.classification import Classifier + + +class DummyClassifier(Classifier): + """ + Dummy classifier to test metrics. + + Metrics methods expect a `TaggedTable` as input with two columns: + + - `predicted`: The predicted targets. + - `expected`: The correct targets. + + `target_name` must be set to `"expected"`. + """ + + def fit(self, training_set: TaggedTable) -> None: + pass + + def predict(self, dataset: Table) -> TaggedTable: + # Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed + predicted = dataset.get_column("predicted") + feature = predicted.rename("feature") + dataset = Table.from_columns([feature, predicted]) + + return TaggedTable(dataset, target_name="predicted") diff --git a/tests/safeds/ml/classification/_classifier/test_accuracy.py b/tests/safeds/ml/classification/_classifier/test_accuracy.py new file mode 100644 index 000000000..ccb01bc32 --- /dev/null +++ b/tests/safeds/ml/classification/_classifier/test_accuracy.py @@ -0,0 +1,20 @@ +import pandas as pd +from safeds.data.tabular.containers import Column, Table, TaggedTable + +from ._dummy_classifier import DummyClassifier + + +def test_accuracy() -> None: + c1 = Column(pd.Series(data=[1, 2, 3, 4]), "predicted") + c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected") + table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected") + + assert DummyClassifier().accuracy(table) == 0.75 + + +def test_accuracy_different_types() -> None: + c1 = Column(pd.Series(data=["1", "2", "3", "4"]), "predicted") + c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected") + table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected") + + assert DummyClassifier().accuracy(table) == 0.0 diff --git a/tests/safeds/ml/classification/metrics/_accuracy/test_accuracy.py b/tests/safeds/ml/classification/metrics/_accuracy/test_accuracy.py deleted file mode 100644 index 34aabf144..000000000 --- a/tests/safeds/ml/classification/metrics/_accuracy/test_accuracy.py +++ /dev/null @@ -1,15 +0,0 @@ -import pandas as pd -from safeds.data.tabular.containers import Column -from safeds.ml.classification.metrics import accuracy - - -def test_accuracy() -> None: - c1 = Column(pd.Series(data=[1, 2, 3, 4]), "TestColumn1") - c2 = Column(pd.Series(data=[1, 2, 3, 3]), "TestColumn2") - assert accuracy(c1, c2) == 0.75 - - -def test_accuracy_different_types() -> None: - c1 = Column(pd.Series(data=["1", "2", "3", "4"]), "TestColumn1") - c2 = Column(pd.Series(data=[1, 2, 3, 3]), "TestColumn2") - assert accuracy(c1, c2) == 0.0 diff --git a/tests/safeds/ml/classification/metrics/_accuracy/__init__.py b/tests/safeds/ml/regression/_regressor/__init__.py similarity index 100% rename from tests/safeds/ml/classification/metrics/_accuracy/__init__.py rename to tests/safeds/ml/regression/_regressor/__init__.py diff --git a/tests/safeds/ml/regression/_regressor/_dummy_regressor.py b/tests/safeds/ml/regression/_regressor/_dummy_regressor.py new file mode 100644 index 000000000..866a6ab6c --- /dev/null +++ b/tests/safeds/ml/regression/_regressor/_dummy_regressor.py @@ -0,0 +1,26 @@ +from safeds.data.tabular.containers import Table, TaggedTable +from safeds.ml.regression import Regressor + + +class DummyRegressor(Regressor): + """ + Dummy regressor to test metrics. + + Metrics methods expect a `TaggedTable` as input with two columns: + + - `predicted`: The predicted targets. + - `expected`: The correct targets. + + `target_name` must be set to `"expected"`. + """ + + def fit(self, training_set: TaggedTable) -> None: + pass + + def predict(self, dataset: Table) -> TaggedTable: + # Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed + predicted = dataset.get_column("predicted") + feature = predicted.rename("feature") + dataset = Table.from_columns([feature, predicted]) + + return TaggedTable(dataset, target_name="predicted") diff --git a/tests/safeds/ml/regression/metrics/test_check_metrics_preconditions.py b/tests/safeds/ml/regression/_regressor/test_check_metrics_preconditions.py similarity index 87% rename from tests/safeds/ml/regression/metrics/test_check_metrics_preconditions.py rename to tests/safeds/ml/regression/_regressor/test_check_metrics_preconditions.py index 924deaff5..d12ac6da6 100644 --- a/tests/safeds/ml/regression/metrics/test_check_metrics_preconditions.py +++ b/tests/safeds/ml/regression/_regressor/test_check_metrics_preconditions.py @@ -2,9 +2,9 @@ import pytest from safeds.data.tabular.containers import Column from safeds.exceptions import ColumnLengthMismatchError -from safeds.ml.regression.metrics._module_level_functions import ( - _check_metrics_preconditions, -) + +# noinspection PyProtectedMember +from safeds.ml.regression._regressor import _check_metrics_preconditions @pytest.mark.parametrize( diff --git a/tests/safeds/ml/regression/_regressor/test_mean_absolute_error.py b/tests/safeds/ml/regression/_regressor/test_mean_absolute_error.py new file mode 100644 index 000000000..adb752264 --- /dev/null +++ b/tests/safeds/ml/regression/_regressor/test_mean_absolute_error.py @@ -0,0 +1,26 @@ +import pytest +from safeds.data.tabular.containers import Column, Table, TaggedTable + +from ._dummy_regressor import DummyRegressor + + +@pytest.mark.parametrize( + "predicted, expected, result", + [ + ([1, 2], [1, 2], 0), + ([0, 0], [1, 1], 1), + ([1, 1, 1], [2, 2, 11], 4), + ([0, 0, 0], [10, 2, 18], 10), + ([0.5, 0.5], [1.5, 1.5], 1), + ], +) +def test_mean_absolute_error_valid( + predicted: list[float], expected: list[float], result: float +) -> None: + predicted_column = Column(predicted, "predicted") + expected_column = Column(expected, "expected") + table = TaggedTable( + Table.from_columns([predicted_column, expected_column]), target_name="expected" + ) + + assert DummyRegressor().mean_absolute_error(table) == result diff --git a/tests/safeds/ml/regression/_regressor/test_mean_squared_error.py b/tests/safeds/ml/regression/_regressor/test_mean_squared_error.py new file mode 100644 index 000000000..27c85e2de --- /dev/null +++ b/tests/safeds/ml/regression/_regressor/test_mean_squared_error.py @@ -0,0 +1,20 @@ +import pytest +from safeds.data.tabular.containers import Column, Table, TaggedTable + +from ._dummy_regressor import DummyRegressor + + +@pytest.mark.parametrize( + "predicted, expected, result", + [([1, 2], [1, 2], 0), ([0, 0], [1, 1], 1), ([1, 1, 1], [2, 2, 11], 34)], +) +def test_mean_squared_error_valid( + predicted: list[float], expected: list[float], result: float +) -> None: + predicted_column = Column(predicted, "predicted") + expected_column = Column(expected, "expected") + table = TaggedTable( + Table.from_columns([predicted_column, expected_column]), target_name="expected" + ) + + assert DummyRegressor().mean_squared_error(table) == result diff --git a/tests/safeds/ml/regression/metrics/test_mean_absolute_error.py b/tests/safeds/ml/regression/metrics/test_mean_absolute_error.py deleted file mode 100644 index e97367762..000000000 --- a/tests/safeds/ml/regression/metrics/test_mean_absolute_error.py +++ /dev/null @@ -1,22 +0,0 @@ -import pandas as pd -import pytest -from safeds.data.tabular.containers import Column -from safeds.ml.regression.metrics import mean_absolute_error - - -@pytest.mark.parametrize( - "actual, expected, result", - [ - ([1, 2], [1, 2], 0), - ([0, 0], [1, 1], 1), - ([1, 1, 1], [2, 2, 11], 4), - ([0, 0, 0], [10, 2, 18], 10), - ([0.5, 0.5], [1.5, 1.5], 1), - ], -) -def test_mean_absolute_error_valid( - actual: list[float], expected: list[float], result: float -) -> None: - actual_column: Column = Column(pd.Series(actual), "actual") - expected_column: Column = Column(pd.Series(expected), "expected") - assert mean_absolute_error(actual_column, expected_column) == result diff --git a/tests/safeds/ml/regression/metrics/test_mean_squared_error.py b/tests/safeds/ml/regression/metrics/test_mean_squared_error.py deleted file mode 100644 index 19bd72c23..000000000 --- a/tests/safeds/ml/regression/metrics/test_mean_squared_error.py +++ /dev/null @@ -1,16 +0,0 @@ -import pandas as pd -import pytest -from safeds.data.tabular.containers import Column -from safeds.ml.regression.metrics import mean_squared_error - - -@pytest.mark.parametrize( - "actual, expected, result", - [([1, 2], [1, 2], 0), ([0, 0], [1, 1], 1), ([1, 1, 1], [2, 2, 11], 34)], -) -def test_mean_squared_error_valid( - actual: list[float], expected: list[float], result: float -) -> None: - actual_column: Column = Column(pd.Series(actual), "actual") - expected_column: Column = Column(pd.Series(expected), "expected") - assert mean_squared_error(actual_column, expected_column) == result