Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: metrics as methods of models #77

Merged
merged 3 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/safeds/ml/classification/_classifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import ABC, abstractmethod

from safeds.data.tabular.containers import Table, TaggedTable
from sklearn.metrics import accuracy_score as sk_accuracy_score


class Classifier(ABC):
Expand Down Expand Up @@ -44,3 +45,25 @@ def predict(self, dataset: Table) -> TaggedTable:
PredictionError
If prediction with the given dataset failed.
"""

def accuracy(self, validation_or_test_set: TaggedTable) -> float:
"""
Predicts the target values for the features in the validation or test set and compares it to the expected
results.

Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.

Returns
-------
accuracy : float
The calculated accuracy score, i.e. the percentage of equal data.
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

# noinspection PyProtectedMember
return sk_accuracy_score(expected._data, predicted._data)
1 change: 0 additions & 1 deletion src/safeds/ml/classification/metrics/__init__.py

This file was deleted.

21 changes: 0 additions & 21 deletions src/safeds/ml/classification/metrics/_module_level_functions.py

This file was deleted.

61 changes: 60 additions & 1 deletion src/safeds/ml/regression/_regressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from abc import ABC, abstractmethod

from safeds.data.tabular.containers import Table, TaggedTable
from safeds.data.tabular.containers import Column, Table, TaggedTable
from safeds.exceptions import ColumnLengthMismatchError
from sklearn.metrics import mean_absolute_error as sk_mean_absolute_error
from sklearn.metrics import mean_squared_error as sk_mean_squared_error


class Regressor(ABC):
Expand Down Expand Up @@ -44,3 +47,59 @@ def predict(self, dataset: Table) -> TaggedTable:
PredictionError
If prediction with the given dataset failed.
"""

def mean_squared_error(self, validation_or_test_set: TaggedTable) -> float:
"""
Return the mean squared error, calculated from a given known truth and a column to compare.

Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.

Returns
-------
mean_squared_error : float
The calculated mean squared error (the average of the distance of each individual row squared).
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_squared_error(expected._data, predicted._data)

def mean_absolute_error(self, validation_or_test_set: TaggedTable) -> float:
"""
Return the mean absolute error, calculated from a given known truth and a column to compare.

Parameters
----------
validation_or_test_set : TaggedTable
The validation or test set.

Returns
-------
mean_absolute_error : float
The calculated mean absolute error (the average of the distance of each individual row).
"""

expected = validation_or_test_set.target
predicted = self.predict(validation_or_test_set.features).target

_check_metrics_preconditions(predicted, expected)
return sk_mean_absolute_error(expected._data, predicted._data)


def _check_metrics_preconditions(actual: Column, expected: Column) -> None:
if not actual.type.is_numeric():
raise TypeError(f"Column 'actual' is not numerical but {actual.type}.")
if not expected.type.is_numeric():
raise TypeError(f"Column 'expected' is not numerical but {expected.type}.")

if actual._data.size != expected._data.size:
raise ColumnLengthMismatchError(
"\n".join(
[f"{column.name}: {column._data.size}" for column in [actual, expected]]
)
)
1 change: 0 additions & 1 deletion src/safeds/ml/regression/metrics/__init__.py

This file was deleted.

58 changes: 0 additions & 58 deletions src/safeds/ml/regression/metrics/_module_level_functions.py

This file was deleted.

26 changes: 26 additions & 0 deletions tests/safeds/ml/classification/_classifier/_dummy_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.classification import Classifier


class DummyClassifier(Classifier):
"""
Dummy classifier to test metrics.

Metrics methods expect a `TaggedTable` as input with two columns:

- `predicted`: The predicted targets.
- `expected`: The correct targets.

`target_name` must be set to `"expected"`.
"""

def fit(self, training_set: TaggedTable) -> None:
pass

def predict(self, dataset: Table) -> TaggedTable:
# Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed
predicted = dataset.get_column("predicted")
feature = predicted.rename("feature")
dataset = Table.from_columns([feature, predicted])

return TaggedTable(dataset, target_name="predicted")
20 changes: 20 additions & 0 deletions tests/safeds/ml/classification/_classifier/test_accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pandas as pd
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_classifier import DummyClassifier


def test_accuracy() -> None:
c1 = Column(pd.Series(data=[1, 2, 3, 4]), "predicted")
c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected")
table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected")

assert DummyClassifier().accuracy(table) == 0.75


def test_accuracy_different_types() -> None:
c1 = Column(pd.Series(data=["1", "2", "3", "4"]), "predicted")
c2 = Column(pd.Series(data=[1, 2, 3, 3]), "expected")
table = TaggedTable(Table.from_columns([c1, c2]), target_name="expected")

assert DummyClassifier().accuracy(table) == 0.0

This file was deleted.

26 changes: 26 additions & 0 deletions tests/safeds/ml/regression/_regressor/_dummy_regressor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from safeds.data.tabular.containers import Table, TaggedTable
from safeds.ml.regression import Regressor


class DummyRegressor(Regressor):
"""
Dummy regressor to test metrics.

Metrics methods expect a `TaggedTable` as input with two columns:

- `predicted`: The predicted targets.
- `expected`: The correct targets.

`target_name` must be set to `"expected"`.
"""

def fit(self, training_set: TaggedTable) -> None:
pass

def predict(self, dataset: Table) -> TaggedTable:
# Needed until https://github.com/Safe-DS/Stdlib/issues/75 is fixed
predicted = dataset.get_column("predicted")
feature = predicted.rename("feature")
dataset = Table.from_columns([feature, predicted])

return TaggedTable(dataset, target_name="predicted")
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import pytest
from safeds.data.tabular.containers import Column
from safeds.exceptions import ColumnLengthMismatchError
from safeds.ml.regression.metrics._module_level_functions import (
_check_metrics_preconditions,
)

# noinspection PyProtectedMember
from safeds.ml.regression._regressor import _check_metrics_preconditions


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_regressor import DummyRegressor


@pytest.mark.parametrize(
"predicted, expected, result",
[
([1, 2], [1, 2], 0),
([0, 0], [1, 1], 1),
([1, 1, 1], [2, 2, 11], 4),
([0, 0, 0], [10, 2, 18], 10),
([0.5, 0.5], [1.5, 1.5], 1),
],
)
def test_mean_absolute_error_valid(
predicted: list[float], expected: list[float], result: float
) -> None:
predicted_column = Column(predicted, "predicted")
expected_column = Column(expected, "expected")
table = TaggedTable(
Table.from_columns([predicted_column, expected_column]), target_name="expected"
)

assert DummyRegressor().mean_absolute_error(table) == result
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
from safeds.data.tabular.containers import Column, Table, TaggedTable

from ._dummy_regressor import DummyRegressor


@pytest.mark.parametrize(
"predicted, expected, result",
[([1, 2], [1, 2], 0), ([0, 0], [1, 1], 1), ([1, 1, 1], [2, 2, 11], 34)],
)
def test_mean_squared_error_valid(
predicted: list[float], expected: list[float], result: float
) -> None:
predicted_column = Column(predicted, "predicted")
expected_column = Column(expected, "expected")
table = TaggedTable(
Table.from_columns([predicted_column, expected_column]), target_name="expected"
)

assert DummyRegressor().mean_squared_error(table) == result
22 changes: 0 additions & 22 deletions tests/safeds/ml/regression/metrics/test_mean_absolute_error.py

This file was deleted.

16 changes: 0 additions & 16 deletions tests/safeds/ml/regression/metrics/test_mean_squared_error.py

This file was deleted.