diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 446a2359b..cb5b05e84 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -2418,7 +2418,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): # def _into_dataloader(self, batch_size: int) -> DataLoader: """ - Return a Dataloader for the data stored in this table, used for training neural networks. + Return a Dataloader for the data stored in this table, used for predicting with neural networks. The original table is not modified. diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 035a4373d..1e55f6961 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,8 +3,8 @@ import sys from typing import TYPE_CHECKING -import numpy as np import torch +from torch import Tensor from torch.utils.data import DataLoader, Dataset from safeds._utils import _structural_hash @@ -876,7 +876,7 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg feature_names=self.features.column_names, ) - def _into_dataloader(self, batch_size: int) -> DataLoader: + def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> DataLoader: """ Return a Dataloader for the data stored in this table, used for training neural networks. @@ -893,24 +893,35 @@ def _into_dataloader(self, batch_size: int) -> DataLoader: The DataLoader. """ - feature_rows = self.features.to_rows() - all_rows = [] - for row in feature_rows: - new_item = [] - for column_name in row: - new_item.append(row.get_value(column_name)) - all_rows.append(new_item.copy()) - return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size) + if num_of_classes <= 2: + return DataLoader( + dataset=_CustomDataset( + torch.Tensor(self.features._data.values), + torch.Tensor(self.target._data).unsqueeze(dim=-1), + ), + batch_size=batch_size, + shuffle=True, + ) + else: + return DataLoader( + dataset=_CustomDataset( + torch.Tensor(self.features._data.values), + torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes), + ), + batch_size=batch_size, + shuffle=True, + ) class _CustomDataset(Dataset): - def __init__(self, features: np.array, target: np.array): - self.X = torch.from_numpy(features.astype(np.float32)) - self.Y = torch.from_numpy(target.astype(np.float32)) - self.len = self.X.shape[0] + + def __init__(self, features: Tensor, target: Tensor): + self.X = features.to(torch.float32) + self.Y = target.to(torch.float32) + self.len = self.X.size(dim=0) def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]: - return self.X[item], self.Y[item].unsqueeze(-1) + return self.X[item], self.Y[item] def __len__(self) -> int: return self.len diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 5f8fa74ee..7ba77798f 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -27,10 +27,12 @@ DatasetContainsTargetError, DatasetMissesDataError, DatasetMissesFeaturesError, + InputSizeError, LearningError, ModelNotFittedError, NonTimeSeriesError, PredictionError, + TestTrainDataMismatchError, UntaggedTableError, ) @@ -57,10 +59,12 @@ "DatasetContainsTargetError", "DatasetMissesDataError", "DatasetMissesFeaturesError", + "InputSizeError", "LearningError", "ModelNotFittedError", "NonTimeSeriesError", "PredictionError", + "TestTrainDataMismatchError", "UntaggedTableError", # Other "Bound", diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py index 933d52a75..4512bd34f 100644 --- a/src/safeds/exceptions/_ml.py +++ b/src/safeds/exceptions/_ml.py @@ -68,6 +68,24 @@ def __init__(self, reason: str): super().__init__(f"Error occurred while predicting: {reason}") +class TestTrainDataMismatchError(Exception): + """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data.""" + + def __init__(self) -> None: + super().__init__( + "The column names in the test table do not match with the feature columns names of the training data.", + ) + + +class InputSizeError(Exception): + """Raised when the amount of features being passed to a network does not match with its input size.""" + + def __init__(self, table_size: int, input_layer_size: int) -> None: + super().__init__( + f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.", + ) + + class UntaggedTableError(Exception): """Raised when an untagged table is used instead of a TaggedTable in a regression or classification.""" diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py index 53b1f98d4..9481e591e 100644 --- a/src/safeds/ml/nn/__init__.py +++ b/src/safeds/ml/nn/__init__.py @@ -1,10 +1,10 @@ """Classes for classification tasks.""" -from ._fnn_layer import FNNLayer +from ._forward_layer import ForwardLayer from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor __all__ = [ - "FNNLayer", + "ForwardLayer", "NeuralNetworkClassifier", "NeuralNetworkRegressor", ] diff --git a/src/safeds/ml/nn/_fnn_layer.py b/src/safeds/ml/nn/_forward_layer.py similarity index 83% rename from src/safeds/ml/nn/_fnn_layer.py rename to src/safeds/ml/nn/_forward_layer.py index a74df8ff3..8164f9e6c 100644 --- a/src/safeds/ml/nn/_fnn_layer.py +++ b/src/safeds/ml/nn/_forward_layer.py @@ -1,6 +1,7 @@ -from torch import nn +from torch import Tensor, nn from safeds.exceptions import ClosedBound, OutOfBoundsError +from safeds.ml.nn._layer import Layer class _InternalLayer(nn.Module): @@ -17,11 +18,11 @@ def __init__(self, input_size: int, output_size: int, activation_function: str): case _: raise ValueError("Unknown Activation Function: " + activation_function) - def forward(self, x: float) -> float: + def forward(self, x: Tensor) -> Tensor: return self._fn(self._layer(x)) -class FNNLayer: +class ForwardLayer(Layer): def __init__(self, output_size: int, input_size: int | None = None): """ Create a FNN Layer. @@ -49,6 +50,18 @@ def __init__(self, output_size: int, input_size: int | None = None): def _get_internal_layer(self, activation_function: str) -> _InternalLayer: return _InternalLayer(self._input_size, self._output_size, activation_function) + @property + def input_size(self) -> int: + """ + Get the input_size of this layer. + + Returns + ------- + result : + The amount of values being passed into this layer. + """ + return self._input_size + @property def output_size(self) -> int: """ diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py new file mode 100644 index 000000000..a2ac00d87 --- /dev/null +++ b/src/safeds/ml/nn/_layer.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod + +from torch import nn + + +class Layer(ABC): + @abstractmethod + def __init__(self) -> None: + pass # pragma: no cover + + @abstractmethod + def _get_internal_layer(self, activation_function: str) -> nn.Module: + pass # pragma: no cover + + @property + @abstractmethod + def input_size(self) -> int: + pass # pragma: no cover + + @property + @abstractmethod + def output_size(self) -> int: + pass # pragma: no cover + + @abstractmethod + def _set_input_size(self, input_size: int) -> None: + pass # pragma: no cover diff --git a/src/safeds/ml/nn/_model.py b/src/safeds/ml/nn/_model.py index 2eaece27f..aa1690ab5 100644 --- a/src/safeds/ml/nn/_model.py +++ b/src/safeds/ml/nn/_model.py @@ -6,21 +6,32 @@ from torch import Tensor, nn from safeds.data.tabular.containers import Column, Table, TaggedTable -from safeds.exceptions import ClosedBound, ModelNotFittedError, OutOfBoundsError -from safeds.ml.nn._fnn_layer import FNNLayer +from safeds.exceptions import ( + ClosedBound, + InputSizeError, + ModelNotFittedError, + OutOfBoundsError, + TestTrainDataMismatchError, +) +from safeds.ml.nn._layer import Layer class NeuralNetworkRegressor: - def __init__(self, layers: list): - self._model = _PytorchModel(layers, is_for_classification=False) + def __init__(self, layers: list[Layer]): + self._model = _InternalModel(layers, is_for_classification=False) + self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False + self._feature_names: None | list[str] = None + self._total_number_of_batches_done = 0 + self._total_number_of_epochs_done = 0 def fit( self, train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, + learning_rate: float = 0.001, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -37,6 +48,8 @@ def fit( The number of times the training cycle should be done. batch_size The size of data batches that should be loaded at one time. + learning_rate + The learning rate of the neural network. callback_on_batch_completion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. callback_on_epoch_completion @@ -57,33 +70,44 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + if train_data.features.number_of_columns is not self._input_size: + raise InputSizeError(train_data.features.number_of_columns, self._input_size) + copied_model = copy.deepcopy(self) + + copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader(copied_model._batch_size) + + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, 1) loss_fn = nn.MSELoss() - optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) - loss_sum = 0.0 - number_of_batches_done = 0 - for epoch in range(epoch_size): - for x, y in dataloader: + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) + for _ in range(epoch_size): + loss_sum = 0.0 + amount_of_loss_values_calculated = 0 + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) loss = loss_fn(pred, y) loss_sum += loss.item() + amount_of_loss_values_calculated += 1 loss.backward() optimizer.step() - number_of_batches_done += 1 + copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - number_of_batches_done, - loss_sum / (number_of_batches_done * batch_size), + copied_model._total_number_of_batches_done, + loss_sum / amount_of_loss_values_calculated, ) + copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: - callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + callback_on_epoch_completion( + copied_model._total_number_of_epochs_done, + loss_sum / amount_of_loss_values_calculated, + ) copied_model._is_fitted = True copied_model._model.eval() return copied_model @@ -111,13 +135,16 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError + if not (sorted(test_data.column_names)).__eq__( + sorted(self._feature_names) if self._feature_names is not None else None, + ): + raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] with torch.no_grad(): for x in dataloader: elem = self._model(x) - for item in range(len(elem)): - predictions.append(elem[item].item()) + predictions += elem.squeeze(dim=1).tolist() return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property @@ -134,17 +161,22 @@ def is_fitted(self) -> bool: class NeuralNetworkClassifier: - def __init__(self, layers: list[FNNLayer]): - self._model = _PytorchModel(layers, is_for_classification=True) + def __init__(self, layers: list[Layer]): + self._model = _InternalModel(layers, is_for_classification=True) + self._input_size = self._model.input_size self._batch_size = 1 self._is_fitted = False - self._is_multi_class = layers[-1].output_size > 1 + self._num_of_classes = layers[-1].output_size + self._feature_names: None | list[str] = None + self._total_number_of_batches_done = 0 + self._total_number_of_epochs_done = 0 def fit( self, train_data: TaggedTable, epoch_size: int = 25, batch_size: int = 1, + learning_rate: float = 0.001, callback_on_batch_completion: Callable[[int, float], None] | None = None, callback_on_epoch_completion: Callable[[int, float], None] | None = None, ) -> Self: @@ -161,6 +193,8 @@ def fit( The number of times the training cycle should be done. batch_size The size of data batches that should be loaded at one time. + learning_rate + The learning rate of the neural network. callback_on_batch_completion Function used to view metrics while training. Gets called after a batch is completed with the index of the last batch and the overall loss average. callback_on_epoch_completion @@ -181,51 +215,47 @@ def fit( raise OutOfBoundsError(actual=epoch_size, name="epoch_size", lower_bound=ClosedBound(1)) if batch_size < 1: raise OutOfBoundsError(actual=batch_size, name="batch_size", lower_bound=ClosedBound(1)) + if train_data.features.number_of_columns is not self._input_size: + raise InputSizeError(train_data.features.number_of_columns, self._input_size) + copied_model = copy.deepcopy(self) + + copied_model._feature_names = train_data.features.column_names copied_model._batch_size = batch_size - dataloader = train_data._into_dataloader(copied_model._batch_size) - if self._is_multi_class: + dataloader = train_data._into_dataloader_with_classes(copied_model._batch_size, copied_model._num_of_classes) + + if copied_model._num_of_classes > 1: loss_fn = nn.CrossEntropyLoss() else: loss_fn = nn.BCELoss() - optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=0.05) - loss_sum = 0.0 - number_of_batches_done = 0 - for epoch in range(epoch_size): - for x, y in dataloader: + optimizer = torch.optim.SGD(copied_model._model.parameters(), lr=learning_rate) + for _ in range(epoch_size): + loss_sum = 0.0 + amount_of_loss_values_calculated = 0 + for x, y in iter(dataloader): optimizer.zero_grad() pred = copied_model._model(x) - if self._is_multi_class: - pred_size = Tensor.size(pred, dim=1) - predictions_for_all_items_of_batch = [] - for value in range(len(y)): - list_of_probabilities_for_each_category = [] - class_index = y[value].item() - for index in range(pred_size): - if index is int(class_index): - list_of_probabilities_for_each_category.append(1.0) - else: - list_of_probabilities_for_each_category.append(0.0) - predictions_for_all_items_of_batch.append(list_of_probabilities_for_each_category.copy()) - - y_reshaped_as_tensor_to_fit_format_of_pred = torch.tensor(predictions_for_all_items_of_batch) - - loss = loss_fn(pred, y_reshaped_as_tensor_to_fit_format_of_pred) - else: - loss = loss_fn(pred, y) + + loss = loss_fn(pred, y) loss_sum += loss.item() + amount_of_loss_values_calculated += 1 loss.backward() optimizer.step() - number_of_batches_done += 1 + + copied_model._total_number_of_batches_done += 1 if callback_on_batch_completion is not None: callback_on_batch_completion( - number_of_batches_done, - loss_sum / (number_of_batches_done * batch_size), + copied_model._total_number_of_batches_done, + loss_sum / amount_of_loss_values_calculated, ) + copied_model._total_number_of_epochs_done += 1 if callback_on_epoch_completion is not None: - callback_on_epoch_completion(epoch + 1, loss_sum / (number_of_batches_done * batch_size)) + callback_on_epoch_completion( + copied_model._total_number_of_epochs_done, + loss_sum / amount_of_loss_values_calculated, + ) copied_model._is_fitted = True copied_model._model.eval() return copied_model @@ -253,27 +283,23 @@ def predict(self, test_data: Table) -> TaggedTable: """ if not self._is_fitted: raise ModelNotFittedError + if not (sorted(test_data.column_names)).__eq__( + sorted(self._feature_names) if self._feature_names is not None else None, + ): + raise TestTrainDataMismatchError dataloader = test_data._into_dataloader(self._batch_size) predictions = [] with torch.no_grad(): for x in dataloader: elem = self._model(x) - for item in range(len(elem)): - if not self._is_multi_class: - if elem[item].item() < 0.5: - predicted_class = 0 # pragma: no cover - else: # pragma: no cover - predicted_class = 1 # pragma: no cover - predictions.append(predicted_class) + if self._num_of_classes > 1: + predictions += torch.argmax(elem, dim=1).tolist() + else: + p = elem.squeeze().round().tolist() + if isinstance(p, float): + predictions.append(p) else: - values = elem[item].tolist() - highest_value = 0 - category_of_highest_value = 0 - for index in range(len(values)): - if values[index] > highest_value: - highest_value = values[index] - category_of_highest_value = index - predictions.append(category_of_highest_value) + predictions += p return test_data.add_column(Column("prediction", predictions)).tag_columns("prediction") @property @@ -289,14 +315,14 @@ def is_fitted(self) -> bool: return self._is_fitted -class _PytorchModel(nn.Module): - def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> None: +class _InternalModel(nn.Module): + def __init__(self, layers: list[Layer], is_for_classification: bool) -> None: super().__init__() - self._layer_list = fnn_layers + self._layer_list = layers internal_layers = [] previous_output_size = None - for layer in fnn_layers: + for layer in layers: if previous_output_size is not None: layer._set_input_size(previous_output_size) internal_layers.append(layer._get_internal_layer(activation_function="relu")) @@ -304,13 +330,17 @@ def __init__(self, fnn_layers: list[FNNLayer], is_for_classification: bool) -> N if is_for_classification: internal_layers.pop() - if fnn_layers[-1].output_size > 2: - internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="softmax")) + if layers[-1].output_size > 2: + internal_layers.append(layers[-1]._get_internal_layer(activation_function="softmax")) else: - internal_layers.append(fnn_layers[-1]._get_internal_layer(activation_function="sigmoid")) - self._pytorch_layers = nn.ModuleList(internal_layers) + internal_layers.append(layers[-1]._get_internal_layer(activation_function="sigmoid")) + self._pytorch_layers = nn.Sequential(*internal_layers) + + @property + def input_size(self) -> int: + return self._layer_list[0].input_size - def forward(self, x: float) -> float: + def forward(self, x: Tensor) -> Tensor: for layer in self._pytorch_layers: x = layer(x) return x diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py index bcef1bd1d..fd9584cba 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_into_dataloader.py @@ -11,7 +11,7 @@ "A": [1, 4], "B": [2, 5], "C": [3, 6], - "T": [0, 0], + "T": [0, 1], }, "T", ["A", "B", "C"], @@ -27,5 +27,5 @@ def test_should_create_dataloader( feature_names: list[str] | None, ) -> None: tagged_table = Table.from_dict(data).tag_columns(target_name, feature_names) - data_loader = tagged_table._into_dataloader(1) + data_loader = tagged_table._into_dataloader_with_classes(1, 2) assert isinstance(data_loader, DataLoader) diff --git a/tests/safeds/ml/nn/test_fnn_layer.py b/tests/safeds/ml/nn/test_forward_layer.py similarity index 66% rename from tests/safeds/ml/nn/test_fnn_layer.py rename to tests/safeds/ml/nn/test_forward_layer.py index e75488bc8..5d29022d4 100644 --- a/tests/safeds/ml/nn/test_fnn_layer.py +++ b/tests/safeds/ml/nn/test_forward_layer.py @@ -1,6 +1,6 @@ import pytest from safeds.exceptions import OutOfBoundsError -from safeds.ml.nn import FNNLayer +from safeds.ml.nn import ForwardLayer @pytest.mark.parametrize( @@ -15,7 +15,19 @@ def test_should_raise_if_input_size_out_of_bounds(input_size: int) -> None: OutOfBoundsError, match=rf"input_size \(={input_size}\) is not inside \[1, \u221e\)\.", ): - FNNLayer(output_size=1, input_size=input_size) + ForwardLayer(output_size=1, input_size=input_size) + + +@pytest.mark.parametrize( + "input_size", + [ + 1, + 20, + ], + ids=["one", "twenty"], +) +def test_should_raise_if_input_size_doesnt_match(input_size: int) -> None: + assert ForwardLayer(output_size=1, input_size=input_size).input_size == input_size @pytest.mark.parametrize( @@ -30,7 +42,7 @@ def test_should_raise_if_unknown_activation_function_is_passed(activation_functi ValueError, match=rf"Unknown Activation Function: {activation_function}", ): - FNNLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) + ForwardLayer(output_size=1, input_size=1)._get_internal_layer(activation_function) @pytest.mark.parametrize( @@ -45,7 +57,7 @@ def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None: OutOfBoundsError, match=rf"output_size \(={output_size}\) is not inside \[1, \u221e\)\.", ): - FNNLayer(output_size=output_size, input_size=1) + ForwardLayer(output_size=output_size, input_size=1) @pytest.mark.parametrize( @@ -57,4 +69,4 @@ def test_should_raise_if_output_size_out_of_bounds(output_size: int) -> None: ids=["one", "twenty"], ) def test_should_raise_if_output_size_doesnt_match(output_size: int) -> None: - assert FNNLayer(output_size=output_size, input_size=1).output_size == output_size + assert ForwardLayer(output_size=output_size, input_size=1).output_size == output_size diff --git a/tests/safeds/ml/nn/test_model.py b/tests/safeds/ml/nn/test_model.py index 939978aee..435af1912 100644 --- a/tests/safeds/ml/nn/test_model.py +++ b/tests/safeds/ml/nn/test_model.py @@ -1,7 +1,7 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ModelNotFittedError, OutOfBoundsError -from safeds.ml.nn import FNNLayer, NeuralNetworkClassifier, NeuralNetworkRegressor +from safeds.exceptions import InputSizeError, ModelNotFittedError, OutOfBoundsError, TestTrainDataMismatchError +from safeds.ml.nn import ForwardLayer, NeuralNetworkClassifier, NeuralNetworkRegressor class TestClassificationModel: @@ -17,7 +17,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([FNNLayer(1, 1)]).fit( + NeuralNetworkClassifier([ForwardLayer(1, 1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -34,45 +34,66 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], ).fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert isinstance(fitted_model, NeuralNetworkClassifier) - def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=1)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=1)], ).fit( - Table.from_dict({"a": [1, 0], "b": [0, 1]}).tag_columns("a"), + Table.from_dict({"a": [1, 0, 1, 0, 1, 0], "b": [0, 1, 0, 12, 3, 3]}).tag_columns("a"), + batch_size=batch_size, ) predictions = fitted_model.predict(Table.from_dict({"b": [1, 0]})) assert isinstance(predictions, TaggedTable) - def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification(self) -> None: + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype_for_multiclass_classification( + self, + batch_size: int, + ) -> None: fitted_model = NeuralNetworkClassifier( - [FNNLayer(input_size=1, output_size=8), FNNLayer(output_size=3)], + [ForwardLayer(input_size=1, output_size=8), ForwardLayer(output_size=3)], ).fit( Table.from_dict({"a": [0, 1, 2], "b": [0, 15, 51]}).tag_columns("a"), + batch_size=batch_size, ) - predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + predictions = fitted_model.predict(Table.from_dict({"b": [1, 4, 124]})) assert isinstance(predictions, TaggedTable) def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), @@ -80,15 +101,37 @@ def test_should_raise_if_is_fitted_is_set_correctly_for_binary_classification(se assert model.is_fitted def test_should_raise_if_is_fitted_is_set_correctly_for_multiclass_classification(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1), FNNLayer(output_size=3)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), ) assert model.is_fitted + def test_should_raise_if_test_and_train_data_mismatch(self) -> None: + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + model = model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + ) + with pytest.raises( + TestTrainDataMismatchError, + match="The column names in the test table do not match with the feature columns names of the training data.", + ): + model.predict( + Table.from_dict({"a": [1], "c": [2]}), + ) + + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + with pytest.raises( + InputSizeError, + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + ) + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -106,7 +149,7 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkClassifier([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkClassifier([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -137,7 +180,7 @@ def test_should_raise_if_epoch_size_out_of_bounds(self, epoch_size: int) -> None OutOfBoundsError, match=rf"epoch_size \(={epoch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), epoch_size=epoch_size, ) @@ -154,40 +197,80 @@ def test_should_raise_if_batch_size_out_of_bounds(self, batch_size: int) -> None OutOfBoundsError, match=rf"batch_size \(={batch_size}\) is not inside \[1, \u221e\)\.", ): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), batch_size=batch_size, ) - def test_should_raise_if_fit_function_returns_wrong_datatype(self) -> None: - fitted_model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_fit_function_returns_wrong_datatype(self, batch_size: int) -> None: + fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + batch_size=batch_size, ) assert isinstance(fitted_model, NeuralNetworkRegressor) - def test_should_raise_if_predict_function_returns_wrong_datatype(self) -> None: - fitted_model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).fit( - Table.from_dict({"a": [1], "b": [2]}).tag_columns("a"), + @pytest.mark.parametrize( + "batch_size", + [ + 1, + 2, + ], + ids=["one", "two"], + ) + def test_should_raise_if_predict_function_returns_wrong_datatype(self, batch_size: int) -> None: + fitted_model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).fit( + Table.from_dict({"a": [1, 0, 1], "b": [2, 3, 4]}).tag_columns("a"), + batch_size=batch_size, ) - predictions = fitted_model.predict(Table.from_dict({"b": [1]})) + predictions = fitted_model.predict(Table.from_dict({"b": [5, 6, 7]})) assert isinstance(predictions, TaggedTable) def test_should_raise_if_model_has_not_been_fitted(self) -> None: with pytest.raises(ModelNotFittedError, match="The model has not been fitted yet."): - NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]).predict( + NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]).predict( Table.from_dict({"a": [1]}), ) def test_should_raise_if_is_fitted_is_set_correctly(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) assert not model.is_fitted model = model.fit( Table.from_dict({"a": [1], "b": [0]}).tag_columns("a"), ) assert model.is_fitted + def test_should_raise_if_test_and_train_data_mismatch(self) -> None: + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) + model = model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5]}).tag_columns("a"), + ) + with pytest.raises( + TestTrainDataMismatchError, + match="The column names in the test table do not match with the feature columns names of the training data.", + ): + model.predict( + Table.from_dict({"a": [1], "c": [2]}), + ) + + def test_should_raise_if_table_size_and_input_size_mismatch(self) -> None: + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1), ForwardLayer(output_size=3)]) + with pytest.raises( + InputSizeError, + ): + model.fit( + Table.from_dict({"a": [1, 0, 2], "b": [0, 15, 5], "c": [3, 33, 333]}).tag_columns("a"), + ) + def test_should_raise_if_fit_doesnt_batch_callback(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False @@ -205,7 +288,7 @@ def callback_was_called(self) -> bool: assert obj.callback_was_called() is True def test_should_raise_if_fit_doesnt_epoch_callback(self) -> None: - model = NeuralNetworkRegressor([FNNLayer(input_size=1, output_size=1)]) + model = NeuralNetworkRegressor([ForwardLayer(input_size=1, output_size=1)]) class Test: self.was_called = False