Skip to content

Commit

Permalink
perf: improve performance of model & forward layer (#616)
Browse files Browse the repository at this point in the history
Closes #610 

### Summary of Changes

Fixed some bugs and improved the performance of some methods, there are
still some changes to be made but it is helpful to merge this now as
@Marsmaennchen221 and @Gerhardsa0 partly depend on it

---------

Co-authored-by: Alexander Gréus <[email protected]>
Co-authored-by: megalinter-bot <[email protected]>
Co-authored-by: Alexander <[email protected]>
Co-authored-by: WinPlay02 <[email protected]>
  • Loading branch information
5 people authored Apr 17, 2024
1 parent 1ed2d56 commit e856cd5
Show file tree
Hide file tree
Showing 11 changed files with 327 additions and 129 deletions.
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2418,7 +2418,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): #

def _into_dataloader(self, batch_size: int) -> DataLoader:
"""
Return a Dataloader for the data stored in this table, used for training neural networks.
Return a Dataloader for the data stored in this table, used for predicting with neural networks.
The original table is not modified.
Expand Down
41 changes: 26 additions & 15 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import sys
from typing import TYPE_CHECKING

import numpy as np
import torch
from torch import Tensor
from torch.utils.data import DataLoader, Dataset

from safeds._utils import _structural_hash
Expand Down Expand Up @@ -876,7 +876,7 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg
feature_names=self.features.column_names,
)

def _into_dataloader(self, batch_size: int) -> DataLoader:
def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> DataLoader:
"""
Return a Dataloader for the data stored in this table, used for training neural networks.
Expand All @@ -893,24 +893,35 @@ def _into_dataloader(self, batch_size: int) -> DataLoader:
The DataLoader.
"""
feature_rows = self.features.to_rows()
all_rows = []
for row in feature_rows:
new_item = []
for column_name in row:
new_item.append(row.get_value(column_name))
all_rows.append(new_item.copy())
return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size)
if num_of_classes <= 2:
return DataLoader(
dataset=_CustomDataset(
torch.Tensor(self.features._data.values),
torch.Tensor(self.target._data).unsqueeze(dim=-1),
),
batch_size=batch_size,
shuffle=True,
)
else:
return DataLoader(
dataset=_CustomDataset(
torch.Tensor(self.features._data.values),
torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes),
),
batch_size=batch_size,
shuffle=True,
)


class _CustomDataset(Dataset):
def __init__(self, features: np.array, target: np.array):
self.X = torch.from_numpy(features.astype(np.float32))
self.Y = torch.from_numpy(target.astype(np.float32))
self.len = self.X.shape[0]

def __init__(self, features: Tensor, target: Tensor):
self.X = features.to(torch.float32)
self.Y = target.to(torch.float32)
self.len = self.X.size(dim=0)

def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
return self.X[item], self.Y[item].unsqueeze(-1)
return self.X[item], self.Y[item]

def __len__(self) -> int:
return self.len
4 changes: 4 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
DatasetContainsTargetError,
DatasetMissesDataError,
DatasetMissesFeaturesError,
InputSizeError,
LearningError,
ModelNotFittedError,
NonTimeSeriesError,
PredictionError,
TestTrainDataMismatchError,
UntaggedTableError,
)

Expand All @@ -57,10 +59,12 @@
"DatasetContainsTargetError",
"DatasetMissesDataError",
"DatasetMissesFeaturesError",
"InputSizeError",
"LearningError",
"ModelNotFittedError",
"NonTimeSeriesError",
"PredictionError",
"TestTrainDataMismatchError",
"UntaggedTableError",
# Other
"Bound",
Expand Down
18 changes: 18 additions & 0 deletions src/safeds/exceptions/_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,24 @@ def __init__(self, reason: str):
super().__init__(f"Error occurred while predicting: {reason}")


class TestTrainDataMismatchError(Exception):
"""Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data."""

def __init__(self) -> None:
super().__init__(
"The column names in the test table do not match with the feature columns names of the training data.",
)


class InputSizeError(Exception):
"""Raised when the amount of features being passed to a network does not match with its input size."""

def __init__(self, table_size: int, input_layer_size: int) -> None:
super().__init__(
f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.",
)


class UntaggedTableError(Exception):
"""Raised when an untagged table is used instead of a TaggedTable in a regression or classification."""

Expand Down
4 changes: 2 additions & 2 deletions src/safeds/ml/nn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""Classes for classification tasks."""

from ._fnn_layer import FNNLayer
from ._forward_layer import ForwardLayer
from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor

__all__ = [
"FNNLayer",
"ForwardLayer",
"NeuralNetworkClassifier",
"NeuralNetworkRegressor",
]
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from torch import nn
from torch import Tensor, nn

from safeds.exceptions import ClosedBound, OutOfBoundsError
from safeds.ml.nn._layer import Layer


class _InternalLayer(nn.Module):
Expand All @@ -17,11 +18,11 @@ def __init__(self, input_size: int, output_size: int, activation_function: str):
case _:
raise ValueError("Unknown Activation Function: " + activation_function)

def forward(self, x: float) -> float:
def forward(self, x: Tensor) -> Tensor:
return self._fn(self._layer(x))


class FNNLayer:
class ForwardLayer(Layer):
def __init__(self, output_size: int, input_size: int | None = None):
"""
Create a FNN Layer.
Expand Down Expand Up @@ -49,6 +50,18 @@ def __init__(self, output_size: int, input_size: int | None = None):
def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
return _InternalLayer(self._input_size, self._output_size, activation_function)

@property
def input_size(self) -> int:
"""
Get the input_size of this layer.
Returns
-------
result :
The amount of values being passed into this layer.
"""
return self._input_size

@property
def output_size(self) -> int:
"""
Expand Down
27 changes: 27 additions & 0 deletions src/safeds/ml/nn/_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from abc import ABC, abstractmethod

from torch import nn


class Layer(ABC):
@abstractmethod
def __init__(self) -> None:
pass # pragma: no cover

@abstractmethod
def _get_internal_layer(self, activation_function: str) -> nn.Module:
pass # pragma: no cover

@property
@abstractmethod
def input_size(self) -> int:
pass # pragma: no cover

@property
@abstractmethod
def output_size(self) -> int:
pass # pragma: no cover

@abstractmethod
def _set_input_size(self, input_size: int) -> None:
pass # pragma: no cover
Loading

0 comments on commit e856cd5

Please sign in to comment.