Skip to content

Commit

Permalink
feat: hyperparamteroptimization for rnns and cnns (#923)
Browse files Browse the repository at this point in the history
Closes #912 

### Summary of Changes

New Features:
- fit_by_exhaustive_search now also works for RNNs and CNNs
- ClassificationMetrics and RegressionMetrics now also work for
TimeSeriesDatasets, even when the predictions are of type list.
- Raise Error when classifing time series data with continuous = True,
as we dont currently support this use-case (since both classification
and continuous predictions require the amount of neurons in the last
layer to be set differently, according to either the number of classes
or the number of values to predict; The output would have to be
2-dimensional)

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
sibre28 and megalinter-bot authored Aug 28, 2024
1 parent ccb60c3 commit b1e8933
Show file tree
Hide file tree
Showing 5 changed files with 1,015 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/safeds/data/labeled/containers/_time_series_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def _into_dataloader_with_window(
batch_size:
The size of data batches that should be loaded at one time.
continuous:
Whether or not to continue the forecast in the steps before forecast horizon.
Whether to continue the forecast in the steps before forecast horizon.
Raises
------
Expand Down
42 changes: 29 additions & 13 deletions src/safeds/ml/metrics/_classification_metrics.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any
from typing import Any

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table
from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import ColumnLengthMismatchError

if TYPE_CHECKING:
from safeds.data.tabular.containers import Column


class ClassificationMetrics(ABC):
"""A collection of classification metrics."""
Expand All @@ -18,7 +15,11 @@ class ClassificationMetrics(ABC):
def __init__(self) -> None: ...

@staticmethod
def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> Table:
def summarize(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> Table:
"""
Summarize classification metrics on the given data.
Expand Down Expand Up @@ -53,7 +54,10 @@ def summarize(predicted: Column | TabularDataset, expected: Column | TabularData
)

@staticmethod
def accuracy(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def accuracy(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the accuracy on the given data.
Expand Down Expand Up @@ -87,7 +91,11 @@ def accuracy(predicted: Column | TabularDataset, expected: Column | TabularDatas
return 0.0 # Types are not compatible, so no prediction can be correct

@staticmethod
def f1_score(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def f1_score(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the F₁ score on the given data.
Expand Down Expand Up @@ -122,7 +130,11 @@ def f1_score(predicted: Column | TabularDataset, expected: Column | TabularDatas
return 2 * true_positives / (2 * true_positives + false_positives + false_negatives)

@staticmethod
def precision(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def precision(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the precision on the given data.
Expand Down Expand Up @@ -156,7 +168,11 @@ def precision(predicted: Column | TabularDataset, expected: Column | TabularData
return true_positives / predicted_positives

@staticmethod
def recall(predicted: Column | TabularDataset, expected: Column | TabularDataset, positive_class: Any) -> float:
def recall(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
positive_class: Any,
) -> float:
"""
Compute the recall on the given data.
Expand Down Expand Up @@ -190,9 +206,9 @@ def recall(predicted: Column | TabularDataset, expected: Column | TabularDataset
return true_positives / actual_positives


def _extract_target(column_or_dataset: Column | TabularDataset) -> Column:
def _extract_target(column_or_dataset: Column | TabularDataset | TimeSeriesDataset) -> Column:
"""Extract the target column from the given column or dataset."""
if isinstance(column_or_dataset, TabularDataset):
if isinstance(column_or_dataset, TabularDataset | TimeSeriesDataset):
return column_or_dataset.target
else:
return column_or_dataset
Expand Down
92 changes: 83 additions & 9 deletions src/safeds/ml/metrics/_regression_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from abc import ABC, abstractmethod

from safeds.data.labeled.containers import TabularDataset
from safeds.data.labeled.containers import TabularDataset, TimeSeriesDataset
from safeds.data.tabular.containers import Column, Table
from safeds.exceptions import ColumnLengthMismatchError

Expand All @@ -14,7 +14,10 @@ class RegressionMetrics(ABC):
def __init__(self) -> None: ...

@staticmethod
def summarize(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> Table:
def summarize(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> Table:
"""
Summarize regression metrics on the given data.
Expand Down Expand Up @@ -57,7 +60,10 @@ def summarize(predicted: Column | TabularDataset, expected: Column | TabularData
)

@staticmethod
def coefficient_of_determination(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def coefficient_of_determination(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the coefficient of determination (R²) on the given data.
Expand Down Expand Up @@ -92,6 +98,20 @@ def coefficient_of_determination(predicted: Column | TabularDataset, expected: C
predicted = _extract_target(predicted)
_check_equal_length(predicted, expected)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_coefficient_of_determination = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_coefficient_of_determination += RegressionMetrics.coefficient_of_determination(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_coefficient_of_determination / expected.row_count

residual_sum_of_squares = (expected._series - predicted._series).pow(2).sum()
total_sum_of_squares = (expected._series - expected._series.mean()).pow(2).sum()

Expand All @@ -104,7 +124,10 @@ def coefficient_of_determination(predicted: Column | TabularDataset, expected: C
return 1 - residual_sum_of_squares / total_sum_of_squares

@staticmethod
def mean_absolute_error(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_absolute_error(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean absolute error (MAE) on the given data.
Expand All @@ -131,10 +154,27 @@ def mean_absolute_error(predicted: Column | TabularDataset, expected: Column | T
if expected.row_count == 0:
return 0.0 # Everything was predicted correctly (since there is nothing to predict)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_mean_absolute_errors = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_mean_absolute_errors += RegressionMetrics.mean_absolute_error(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_mean_absolute_errors / expected.row_count

return (expected._series - predicted._series).abs().mean()

@staticmethod
def mean_directional_accuracy(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_directional_accuracy(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean directional accuracy (MDA) on the given data.
Expand Down Expand Up @@ -172,7 +212,10 @@ def mean_directional_accuracy(predicted: Column | TabularDataset, expected: Colu
return predicted_directions.eq(expected_directions).mean()

@staticmethod
def mean_squared_error(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def mean_squared_error(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the mean squared error (MSE) on the given data.
Expand Down Expand Up @@ -201,10 +244,27 @@ def mean_squared_error(predicted: Column | TabularDataset, expected: Column | Ta
if expected.row_count == 0:
return 0.0 # Everything was predicted correctly (since there is nothing to predict)

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_mean_squared_errors = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_mean_squared_errors += RegressionMetrics.mean_squared_error(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_mean_squared_errors / expected.row_count

return (expected._series - predicted._series).pow(2).mean()

@staticmethod
def median_absolute_deviation(predicted: Column | TabularDataset, expected: Column | TabularDataset) -> float:
def median_absolute_deviation(
predicted: Column | TabularDataset | TimeSeriesDataset,
expected: Column | TabularDataset | TimeSeriesDataset,
) -> float:
"""
Compute the median absolute deviation (MAD) on the given data.
Expand All @@ -231,12 +291,26 @@ def median_absolute_deviation(predicted: Column | TabularDataset, expected: Colu
if expected.row_count == 0:
return 0.0

# For TimeSeries Predictions, where the output is a list of values.
# Expected results are internally converted to a column containing multiple Columns for each prediction window
# Currently only used in fit_by_exhaustive_search, where prediction metrics have to be calculated internally.
if isinstance(expected.get_value(0), Column):
sum_of_median_absolute_deviation = 0.0
for i in range(expected.row_count):
predicted_row_as_col: Column = Column("predicted", predicted[i])
expected_row_as_col = expected.get_value(i)
sum_of_median_absolute_deviation += RegressionMetrics.median_absolute_deviation(
predicted_row_as_col,
expected_row_as_col,
)
return sum_of_median_absolute_deviation / expected.row_count

return (expected._series - predicted._series).abs().median()


def _extract_target(column_or_dataset: Column | TabularDataset) -> Column:
def _extract_target(column_or_dataset: Column | TabularDataset | TimeSeriesDataset) -> Column:
"""Extract the target column from the given column or dataset."""
if isinstance(column_or_dataset, TabularDataset):
if isinstance(column_or_dataset, TabularDataset | TimeSeriesDataset):
return column_or_dataset.target
else:
return column_or_dataset
Expand Down
Loading

0 comments on commit b1e8933

Please sign in to comment.