From b18a06dce090a1bb9b6e3c858b83cd8b6277e280 Mon Sep 17 00:00:00 2001
From: Lars Reimann <mail@larsreimann.com>
Date: Tue, 28 Mar 2023 16:06:31 +0200
Subject: [PATCH] feat: improve transformers for tabular data (#108)

Closes #61.
Closes #90.

### Summary of Changes

* Common superclasses `TableTransformer` and
`InvertibleTableTransformer`
* Common interface for `fit`, `transform`, `fit_transform`,
`inverse_transform`
* Return new transformer when calling `fit`
* More thorough tests

---------

Co-authored-by: lars-reimann <lars-reimann@users.noreply.github.com>
---
 .../data/tabular/transformation/__init__.py   |   2 +-
 .../data/tabular/transformation/_imputer.py   | 131 +++++++------
 .../tabular/transformation/_label_encoder.py  | 155 +++++++--------
 .../transformation/_one_hot_encoder.py        | 162 ++++++++--------
 .../transformation/_table_transformer.py      |  99 ++++++++++
 .../transformation/_imputer/__init__.py       |   0
 .../transformation/_imputer/test_imputer.py   |  69 -------
 .../transformation/_label_encoder/__init__.py |   0
 .../_label_encoder/test_fit_transform.py      |  15 --
 .../_label_encoder/test_inverse_transform.py  |  28 ---
 .../_label_encoder/test_transform.py          |  28 ---
 .../_one_hot_encoder/__init__.py              |   0
 .../_one_hot_encoder/test_fit_transform.py    |  32 ----
 .../test_inverse_transform.py                 |  36 ----
 .../_one_hot_encoder/test_transform.py        |  20 --
 .../tabular/transformation/test_imputer.py    | 175 +++++++++++++++++
 .../transformation/test_label_encoder.py      | 168 +++++++++++++++++
 .../transformation/test_one_hot_encoder.py    | 176 ++++++++++++++++++
 18 files changed, 843 insertions(+), 453 deletions(-)
 create mode 100644 src/safeds/data/tabular/transformation/_table_transformer.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_imputer/__init__.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_imputer/test_imputer.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_label_encoder/__init__.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_label_encoder/test_fit_transform.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_label_encoder/test_inverse_transform.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_label_encoder/test_transform.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_one_hot_encoder/__init__.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_one_hot_encoder/test_fit_transform.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_one_hot_encoder/test_inverse_transform.py
 delete mode 100644 tests/safeds/data/tabular/transformation/_one_hot_encoder/test_transform.py
 create mode 100644 tests/safeds/data/tabular/transformation/test_imputer.py
 create mode 100644 tests/safeds/data/tabular/transformation/test_label_encoder.py
 create mode 100644 tests/safeds/data/tabular/transformation/test_one_hot_encoder.py

diff --git a/src/safeds/data/tabular/transformation/__init__.py b/src/safeds/data/tabular/transformation/__init__.py
index 17e046538..58931d053 100644
--- a/src/safeds/data/tabular/transformation/__init__.py
+++ b/src/safeds/data/tabular/transformation/__init__.py
@@ -1,3 +1,3 @@
-from ._imputer import Imputer
+from ._imputer import Imputer, ImputerStrategy
 from ._label_encoder import LabelEncoder
 from ._one_hot_encoder import OneHotEncoder
diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py
index 45862778f..4a23cca35 100644
--- a/src/safeds/data/tabular/transformation/_imputer.py
+++ b/src/safeds/data/tabular/transformation/_imputer.py
@@ -5,17 +5,18 @@
 
 import pandas as pd
 from safeds.data.tabular.containers import Table
-from sklearn.impute import SimpleImputer
+from safeds.data.tabular.transformation._table_transformer import TableTransformer
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+from sklearn.impute import SimpleImputer as sk_SimpleImputer
 
 
 class ImputerStrategy(ABC):
     @abstractmethod
-    def _augment_imputer(self, imputer: SimpleImputer) -> None:
+    def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
         pass
 
 
-# noinspection PyProtectedMember
-class Imputer:
+class Imputer(TableTransformer):
     """
     Impute the data for a given Table.
 
@@ -39,7 +40,10 @@ class Constant(ImputerStrategy):
             def __init__(self, value: Any):
                 self._value = value
 
-            def _augment_imputer(self, imputer: SimpleImputer) -> None:
+            def __str__(self) -> str:
+                return f"Constant({self._value})"
+
+            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
                 imputer.strategy = "constant"
                 imputer.fill_value = self._value
 
@@ -48,7 +52,10 @@ class Mean(ImputerStrategy):
             An imputation strategy for imputing missing data with mean values.
             """
 
-            def _augment_imputer(self, imputer: SimpleImputer) -> None:
+            def __str__(self) -> str:
+                return "Mean"
+
+            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
                 imputer.strategy = "mean"
 
         class Median(ImputerStrategy):
@@ -56,7 +63,10 @@ class Median(ImputerStrategy):
             An imputation strategy for imputing missing data with median values.
             """
 
-            def _augment_imputer(self, imputer: SimpleImputer) -> None:
+            def __str__(self) -> str:
+                return "Median"
+
+            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
                 imputer.strategy = "median"
 
         class Mode(ImputerStrategy):
@@ -64,81 +74,90 @@ class Mode(ImputerStrategy):
             An imputation strategy for imputing missing data with mode values.
             """
 
-            def _augment_imputer(self, imputer: SimpleImputer) -> None:
+            def __str__(self) -> str:
+                return "Mode"
+
+            def _augment_imputer(self, imputer: sk_SimpleImputer) -> None:
                 imputer.strategy = "most_frequent"
 
     def __init__(self, strategy: ImputerStrategy):
-        self._imp = SimpleImputer()
-        strategy._augment_imputer(self._imp)
-        self._column_names: list[str] = []
+        self._strategy = strategy
 
-    def fit(self, table: Table, column_names: Optional[list[str]] = None) -> None:
+        self._wrapped_transformer: Optional[sk_SimpleImputer] = None
+        self._column_names: Optional[list[str]] = None
+
+    # noinspection PyProtectedMember
+    def fit(self, table: Table, column_names: Optional[list[str]] = None) -> Imputer:
         """
-        Fit the imputer on the dataset.
+        Learn a transformation for a set of columns in a table.
 
         Parameters
         ----------
         table : Table
-            The table used to learn the imputation values.
+            The table used to fit the transformer.
         column_names : Optional[list[str]]
-            An optional list of column names, if the imputer is only supposed to run on specific columns.
+            The list of columns from the table used to fit the transformer. If `None`, all columns are used.
+
+        Returns
+        -------
+        fitted_transformer : TableTransformer
+            The fitted transformer.
         """
         if column_names is None:
-            column_names = table.schema.get_column_names()
+            column_names = table.get_column_names()
+        else:
+            missing_columns = set(column_names) - set(table.get_column_names())
+            if len(missing_columns) > 0:
+                raise UnknownColumnNameError(list(missing_columns))
 
-        if self._imp.strategy == "most_frequent":
+        if isinstance(self._strategy, Imputer.Strategy.Mode):
             for name in column_names:
-                if 1 < len(table.get_column(name).mode()):
-                    raise IndexError(
-                        "There are multiple frequent values in a column given for the Imputer"
-                    )
+                if len(table.get_column(name).mode()) > 1:
+                    raise IndexError("There are multiple most frequent values in a column given for the Imputer")
+
+        indices = [table.schema._get_column_index_by_name(name) for name in column_names]
+
+        wrapped_transformer = sk_SimpleImputer()
+        self._strategy._augment_imputer(wrapped_transformer)
+        wrapped_transformer.fit(table._data[indices])
+
+        result = Imputer(self._strategy)
+        result._wrapped_transformer = wrapped_transformer
+        result._column_names = column_names
 
-        self._column_names = column_names
-        indices = [
-            table.schema._get_column_index_by_name(name) for name in self._column_names
-        ]
-        self._imp.fit(table._data[indices])
+        return result
 
+    # noinspection PyProtectedMember
     def transform(self, table: Table) -> Table:
         """
-        Impute the missing values on the dataset.
+        Apply the learned transformation to a table.
 
         Parameters
         ----------
         table : Table
-            The dataset to be imputed.
+            The table to which the learned transformation is applied.
 
         Returns
         -------
-        table : Table
-            The dataset with missing values imputed by the given strategy.
-        """
-        data = table._data.copy()
-        indices = [
-            table.schema._get_column_index_by_name(name) for name in self._column_names
-        ]
-        data[indices] = pd.DataFrame(
-            self._imp.transform(data[indices]), columns=indices
-        )
-        return Table(data, table.schema)
+        transformed_table : Table
+            The transformed table.
 
-    def fit_transform(
-        self, table: Table, column_names: Optional[list[str]] = None
-    ) -> Table:
+        Raises
+        ----------
+        NotFittedError
+            If the transformer has not been fitted yet.
         """
-        Fit the imputer on the dataset and impute the missing values.
 
-        Parameters
-        ----------
-        table : Table
-            The table used to learn the imputation values.
-        column_names : Optional[list[str]]
-            An optional list of column names, if the imputer is only supposed to run on specific columns.
+        # Transformer has not been fitted yet
+        if self._wrapped_transformer is None or self._column_names is None:
+            raise NotFittedError()
 
-        Returns
-        -------
-        table : Table
-            The dataset with missing values imputed by the given strategy.
-        """
-        self.fit(table, column_names)
-        return self.transform(table)
+        # Input table does not contain all columns used to fit the transformer
+        missing_columns = set(self._column_names) - set(table.get_column_names())
+        if len(missing_columns) > 0:
+            raise UnknownColumnNameError(list(missing_columns))
+
+        data = table._data.copy()
+        indices = [table.schema._get_column_index_by_name(name) for name in self._column_names]
+        data[indices] = pd.DataFrame(self._wrapped_transformer.transform(data[indices]), columns=indices)
+        return Table(data, table.schema)
diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py
index ad28adeed..561627f47 100644
--- a/src/safeds/data/tabular/transformation/_label_encoder.py
+++ b/src/safeds/data/tabular/transformation/_label_encoder.py
@@ -1,12 +1,14 @@
 from __future__ import annotations
 
 import warnings
-from typing import Any
+from typing import Any, Optional
 
-import pandas
 from safeds.data.tabular.containers import Table
-from safeds.exceptions import LearningError, NotFittedError
-from sklearn import exceptions, preprocessing
+from safeds.data.tabular.transformation._table_transformer import (
+    InvertibleTableTransformer,
+)
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder
 
 
 def warn(*_: Any, **__: Any) -> None:
@@ -17,133 +19,108 @@ def warn(*_: Any, **__: Any) -> None:
 
 
 # noinspection PyProtectedMember
-
-
-class LabelEncoder:
+class LabelEncoder(InvertibleTableTransformer):
     """
     The LabelEncoder encodes one or more given columns into labels.
     """
 
     def __init__(self) -> None:
-        self._is_fitted = 0
-        self._le = preprocessing.LabelEncoder()
+        self._wrapped_transformer: Optional[sk_OrdinalEncoder] = None
+        self._column_names: Optional[list[str]] = None
 
-    def fit(self, table: Table, column: str) -> None:
+    def fit(self, table: Table, column_names: Optional[list[str]] = None) -> LabelEncoder:
         """
-        Fit the label encoder with the values in the table.
+        Learn a transformation for a set of columns in a table.
 
         Parameters
         ----------
         table : Table
-            The table containing the data used to fit the label encoder.
-        column : str
-            The list of columns supposed to be label-encoded.
+            The table used to fit the transformer.
+        column_names : Optional[list[str]]
+            The list of columns from the table used to fit the transformer. If `None`, all columns are used.
 
         Returns
         -------
-        None
-            This function does not return any value. It updates the internal state of the label encoder object.
-
-        Raises
-        -------
-        LearningError
-            If the model fitting was unsuccessful.
+        fitted_transformer : TableTransformer
+            The fitted transformer.
         """
-        try:
-            self._le.fit(table.keep_only_columns([column])._data)
-        except exceptions.NotFittedError as exc:
-            raise LearningError("") from exc
+        if column_names is None:
+            column_names = table.get_column_names()
+        else:
+            missing_columns = set(column_names) - set(table.get_column_names())
+            if len(missing_columns) > 0:
+                raise UnknownColumnNameError(list(missing_columns))
+
+        indices = [table.schema._get_column_index_by_name(name) for name in column_names]
+
+        wrapped_transformer = sk_OrdinalEncoder()
+        wrapped_transformer.fit(table._data[indices])
 
-    def transform(self, table: Table, column: str) -> Table:
+        result = LabelEncoder()
+        result._wrapped_transformer = wrapped_transformer
+        result._column_names = column_names
+
+        return result
+
+    def transform(self, table: Table) -> Table:
         """
-        Transform the given table to a normalized encoded table.
+        Apply the learned transformation to a table.
 
         Parameters
         ----------
         table : Table
-                The table with target values.
-        column : str
-                The name of the column.
+            The table to which the learned transformation is applied.
 
         Returns
         -------
-        result : Table
-            Table with normalized encodings.
+        transformed_table : Table
+            The transformed table.
 
         Raises
-        ------
+        ----------
         NotFittedError
-            If the Model wasn't fitted before transforming.
+            If the transformer has not been fitted yet.
         """
-        p_df = table._data
-        p_df.columns = table.schema.get_column_names()
-        try:
-            p_df[column] = self._le.transform(p_df[column])
-            return Table(p_df)
-        except Exception as exc:
-            raise NotFittedError from exc
-
-    def fit_transform(self, table: Table, columns: list[str]) -> Table:
-        """
-        Label-encode the table with the label encoder.
 
-        Parameters
-        ----------
-        table : Table
-            The table to be transformed.
-        columns : list[str]
-            The list of column names to be encoded.
+        # Transformer has not been fitted yet
+        if self._wrapped_transformer is None or self._column_names is None:
+            raise NotFittedError()
 
-        Returns
-        -------
-        table : Table
-            The label-encoded table.
+        # Input table does not contain all columns used to fit the transformer
+        missing_columns = set(self._column_names) - set(table.get_column_names())
+        if len(missing_columns) > 0:
+            raise UnknownColumnNameError(list(missing_columns))
 
-        Raises
-        -------
-        NotFittedError
-            If the encoder wasn't fitted before transforming.
+        data = table._data.copy()
+        data.columns = table.get_column_names()
+        data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
+        return Table(data)
 
+    def inverse_transform(self, transformed_table: Table) -> Table:
         """
-        p_df = table._data
-        p_df.columns = table.schema.get_column_names()
-        try:
-            for col in columns:
-                # Fit the LabelEncoder on the Column
-                self._le.fit(p_df[col])
-
-                # transform the column using the trained Label Encoder
-                p_df[col] = self._le.transform(p_df[col])
-            return Table(pandas.DataFrame(p_df))
-        except exceptions.NotFittedError as exc:
-            raise NotFittedError from exc
-
-    def inverse_transform(self, table: Table, column: str) -> Table:
-        """
-        Inverse-transform the table back to its original encodings.
+        Undo the learned transformation.
 
         Parameters
         ----------
-        table : Table
-            The table to be inverse-transformed.
-        column : str
-            The column to be inverse-transformed.
+        transformed_table : Table
+            The table to be transformed back to the original version.
 
         Returns
         -------
         table : Table
-            The inverse-transformed table.
+            The original table.
 
         Raises
-        -------
+        ----------
         NotFittedError
-            If the encoder wasn't fitted before transforming.
+            If the transformer has not been fitted yet.
         """
 
-        try:
-            p_df = table._data
-            p_df.columns = table.schema.get_column_names()
-            p_df[column] = self._le.inverse_transform(p_df[column])
-            return Table(p_df)
-        except exceptions.NotFittedError as exc:
-            raise NotFittedError from exc
+        # Transformer has not been fitted yet
+        if self._wrapped_transformer is None or self._column_names is None:
+            raise NotFittedError()
+
+        data = transformed_table._data.copy()
+        data.columns = transformed_table.get_column_names()
+        data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names])
+        return Table(data)
diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py
index a3e58d8ad..033f81004 100644
--- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py
+++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py
@@ -1,127 +1,131 @@
+from __future__ import annotations
+
+from typing import Optional
+
 import pandas as pd
 from safeds.data.tabular.containers import Table
-from safeds.exceptions import LearningError, NotFittedError
-from sklearn import exceptions
-from sklearn.preprocessing import OneHotEncoder as OHE_sklearn
+from safeds.data.tabular.transformation._table_transformer import (
+    InvertibleTableTransformer,
+)
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+from sklearn.preprocessing import OneHotEncoder as sk_OneHotEncoder
 
 
-class OneHotEncoder:
+class OneHotEncoder(InvertibleTableTransformer):
     """
     The OneHotEncoder encodes categorical columns to numerical features [0,1] that represent the existence for each value.
     """
 
     def __init__(self) -> None:
-        self._encoder = OHE_sklearn()
+        self._wrapped_transformer: Optional[sk_OneHotEncoder] = None
+        self._column_names: Optional[list[str]] = None
 
-    def fit(self, table: Table, columns: list[str]) -> None:
+    # noinspection PyProtectedMember
+    def fit(self, table: Table, column_names: Optional[list[str]] = None) -> OneHotEncoder:
         """
-        Fit the encoder to a table.
+        Learn a transformation for a set of columns in a table.
 
         Parameters
         ----------
         table : Table
-            The table used to fit the encoder.
-        columns : list[str]:
-            The list of columns from the table used to fit the encoder.
+            The table used to fit the transformer.
+        column_names : Optional[list[str]]
+            The list of columns from the table used to fit the transformer. If `None`, all columns are used.
 
-        Raises
-        ----------
-        LearningError
-            If there was an error during fitting.
+        Returns
+        -------
+        fitted_transformer : TableTransformer
+            The fitted transformer.
         """
-        try:
-            table_k_columns = table.keep_only_columns(column_names=columns)
-            df = table_k_columns._data
-            df.columns = table_k_columns.schema.get_column_names()
-            self._encoder.fit(df)
-        except exceptions.NotFittedError as exc:
-            raise LearningError("") from exc
+        if column_names is None:
+            column_names = table.get_column_names()
+        else:
+            missing_columns = set(column_names) - set(table.get_column_names())
+            if len(missing_columns) > 0:
+                raise UnknownColumnNameError(list(missing_columns))
+
+        data = table._data.copy()
+        data.columns = table.get_column_names()
+
+        wrapped_transformer = sk_OneHotEncoder()
+        wrapped_transformer.fit(data[column_names])
+
+        result = OneHotEncoder()
+        result._wrapped_transformer = wrapped_transformer
+        result._column_names = column_names
 
+        return result
+
+    # noinspection PyProtectedMember
     def transform(self, table: Table) -> Table:
         """
-        Transform the data with the trained encoder.
+        Apply the learned transformation to a table.
 
         Parameters
         ----------
         table : Table
-            The data to be transformed.
+            The table to which the learned transformation is applied.
 
         Returns
-        ----------
-        table : Table
+        -------
+        transformed_table : Table
             The transformed table.
 
         Raises
         ----------
         NotFittedError
-            If the encoder wasn't fitted before transforming.
-        """
-        try:
-            table_k_columns = table.keep_only_columns(self._encoder.feature_names_in_)
-            df_k_columns = table_k_columns._data
-            df_k_columns.columns = table_k_columns.schema.get_column_names()
-            df_new = pd.DataFrame(self._encoder.transform(df_k_columns).toarray())
-            df_new.columns = self._encoder.get_feature_names_out()
-            df_concat = table._data.copy()
-            df_concat.columns = table.schema.get_column_names()
-            data_new = pd.concat([df_concat, df_new], axis=1).drop(
-                self._encoder.feature_names_in_, axis=1
-            )
-            return Table(data_new)
-        except Exception as exc:
-            raise NotFittedError from exc
-
-    def fit_transform(self, table: Table, columns: list[str]) -> Table:
+            If the transformer has not been fitted yet.
         """
-        Fit and transform data with a OneHotEncoder.
 
-        Parameters
-        ----------
-        table : Table
-            The table used to fit the encoder and subsequently to be transformed
-        columns : list[str]:
-            The list of columns from the table used to fit the encoder and subsequently to be transformed.
+        # Transformer has not been fitted yet
+        if self._wrapped_transformer is None or self._column_names is None:
+            raise NotFittedError()
 
-        Returns
-        ----------
-        table : Table
-            The transformed table.
+        # Input table does not contain all columns used to fit the transformer
+        missing_columns = set(self._column_names) - set(table.get_column_names())
+        if len(missing_columns) > 0:
+            raise UnknownColumnNameError(list(missing_columns))
 
-        """
-        self.fit(table, columns)
-        return self.transform(table)
+        original = table._data.copy()
+        original.columns = table.schema.get_column_names()
+
+        one_hot_encoded = pd.DataFrame(self._wrapped_transformer.transform(original[self._column_names]).toarray())
+        one_hot_encoded.columns = self._wrapped_transformer.get_feature_names_out()
+
+        unchanged = original.drop(self._column_names, axis=1)
 
-    def inverse_transform(self, table: Table) -> Table:
+        return Table(pd.concat([unchanged, one_hot_encoded], axis=1))
+
+    # noinspection PyProtectedMember
+    def inverse_transform(self, transformed_table: Table) -> Table:
         """
-        Reset a transformed table to its original state.
+        Undo the learned transformation.
 
         Parameters
         ----------
-        table : Table
-            The table to be inverse-transformed.
+        transformed_table : Table
+            The table to be transformed back to the original version.
 
         Returns
-        ----------
+        -------
         table : Table
-            The inverse-transformed table.
+            The original table.
 
         Raises
         ----------
         NotFittedError
-            If the encoder wasn't fitted before transforming.
-
+            If the transformer has not been fitted yet.
         """
-        try:
-            data = self._encoder.inverse_transform(
-                table.keep_only_columns(self._encoder.get_feature_names_out())._data
-            )
-            df = pd.DataFrame(data)
-            df.columns = self._encoder.feature_names_in_
-            new_table = Table(df)
-            for col in table.drop_columns(
-                self._encoder.get_feature_names_out()
-            ).to_columns():
-                new_table = new_table.add_column(col)
-            return new_table
-        except exceptions.NotFittedError as exc:
-            raise NotFittedError from exc
+        # Transformer has not been fitted yet
+        if self._wrapped_transformer is None or self._column_names is None:
+            raise NotFittedError()
+
+        data = transformed_table._data.copy()
+        data.columns = transformed_table.get_column_names()
+
+        decoded = pd.DataFrame(
+            self._wrapped_transformer.inverse_transform(transformed_table._data), columns=self._column_names
+        )
+        unchanged = data.drop(self._wrapped_transformer.get_feature_names_out(), axis=1)
+
+        return Table(pd.concat([unchanged, decoded], axis=1))
diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py
new file mode 100644
index 000000000..b2844de22
--- /dev/null
+++ b/src/safeds/data/tabular/transformation/_table_transformer.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from safeds.data.tabular.containers import Table
+
+
+class TableTransformer(ABC):
+    """
+    A `TableTransformer` learns a transformation for a set of columns in a `Table` and can then apply the learned
+    transformation to another `Table` with the same columns.
+    """
+
+    @abstractmethod
+    def fit(self, table: Table, column_names: Optional[list[str]] = None) -> TableTransformer:
+        """
+        Learn a transformation for a set of columns in a table.
+
+        Parameters
+        ----------
+        table : Table
+            The table used to fit the transformer.
+        column_names : Optional[list[str]]
+            The list of columns from the table used to fit the transformer. If `None`, all columns are used.
+
+        Returns
+        -------
+        fitted_transformer : TableTransformer
+            The fitted transformer.
+        """
+
+    @abstractmethod
+    def transform(self, table: Table) -> Table:
+        """
+        Apply the learned transformation to a table.
+
+        Parameters
+        ----------
+        table : Table
+            The table to which the learned transformation is applied.
+
+        Returns
+        -------
+        transformed_table : Table
+            The transformed table.
+
+        Raises
+        ----------
+        NotFittedError
+            If the transformer has not been fitted yet.
+        """
+
+    def fit_transform(self, table: Table, column_names: Optional[list[str]] = None) -> Table:
+        """
+        Learn a transformation for a set of columns in a table and apply the learned transformation to the same table.
+        If you also need the fitted transformer, use `fit` and `transform` separately.
+
+        Parameters
+        ----------
+        table : Table
+            The table used to fit the transformer. The transformer is then applied to this table.
+        column_names : Optional[list[str]]
+            The list of columns from the table used to fit the transformer. If `None`, all columns are used.
+
+        Returns
+        -------
+        transformed_table : Table
+            The transformed table.
+        """
+        return self.fit(table, column_names).transform(table)
+
+
+class InvertibleTableTransformer(TableTransformer):
+    """
+    An `InvertibleTableTransformer` is a `TableTransformer` that can also undo the learned transformation after it has
+    been applied.
+    """
+
+    @abstractmethod
+    def inverse_transform(self, transformed_table: Table) -> Table:
+        """
+        Undo the learned transformation.
+
+        Parameters
+        ----------
+        transformed_table : Table
+            The table to be transformed back to the original version.
+
+        Returns
+        -------
+        table : Table
+            The original table.
+
+        Raises
+        ----------
+        NotFittedError
+            If the transformer has not been fitted yet.
+        """
diff --git a/tests/safeds/data/tabular/transformation/_imputer/__init__.py b/tests/safeds/data/tabular/transformation/_imputer/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/safeds/data/tabular/transformation/_imputer/test_imputer.py b/tests/safeds/data/tabular/transformation/_imputer/test_imputer.py
deleted file mode 100644
index 663823ef7..000000000
--- a/tests/safeds/data/tabular/transformation/_imputer/test_imputer.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import Imputer
-
-
-def test_imputer_mean() -> None:
-    table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
-    column = table.get_column("col1")
-    imp = Imputer(Imputer.Strategy.Mean())
-    new_table = imp.fit_transform(table)
-
-    assert new_table.get_column("col1")._data[0] == column.mean()
-
-
-def test_imputer_median() -> None:
-    table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
-    column = table.get_column("col1")
-    imp = Imputer(Imputer.Strategy.Median())
-    new_table = imp.fit_transform(table)
-
-    assert new_table.get_column("col1")._data[0] == column.median()
-
-
-def test_imputer_mode() -> None:
-    table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 2, 4, 5]}))
-    column = table.get_column("col1")
-    imp = Imputer(Imputer.Strategy.Mode())
-    new_table = imp.fit_transform(table)
-
-    assert new_table.get_column("col1")._data[0] == column.mode()[0]
-
-
-def test_imputer_mode_invalid() -> None:
-    table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
-    imp = Imputer(Imputer.Strategy.Mode())
-    with pytest.raises(IndexError):
-        imp.fit_transform(table)
-
-
-def test_imputer_constant() -> None:
-    table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
-    imp = Imputer(Imputer.Strategy.Constant(0))
-    new_table = imp.fit_transform(table)
-
-    assert new_table.get_column("col1")._data[0] == 0
-
-
-def test_imputer_specific_column() -> None:
-    table = Table(
-        pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5], "col2": [np.nan, 2, 3, 4, 5]})
-    )
-    imp = Imputer(Imputer.Strategy.Constant(0))
-    new_table = imp.fit_transform(table, ["col1"])
-
-    assert new_table.get_column("col1")._data[0] == 0
-    assert np.isnan(new_table.get_column("col2")._data[0])
-
-
-def test_imputer_all_columns() -> None:
-    table = Table(
-        pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5], "col2": [np.nan, 2, 3, 4, 5]})
-    )
-    imp = Imputer(Imputer.Strategy.Constant(0))
-    new_table = imp.fit_transform(table)
-
-    assert new_table.get_column("col1")._data[0] == 0
-    assert new_table.get_column("col2")._data[0] == 0
diff --git a/tests/safeds/data/tabular/transformation/_label_encoder/__init__.py b/tests/safeds/data/tabular/transformation/_label_encoder/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/safeds/data/tabular/transformation/_label_encoder/test_fit_transform.py b/tests/safeds/data/tabular/transformation/_label_encoder/test_fit_transform.py
deleted file mode 100644
index d83366407..000000000
--- a/tests/safeds/data/tabular/transformation/_label_encoder/test_fit_transform.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import pandas as pd
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import LabelEncoder
-
-
-def test_fit_transform_valid() -> None:
-    test_table = Table(
-        pd.DataFrame({"cities": ["paris", "paris", "tokyo", "amsterdam"]})
-    )
-    le = LabelEncoder()
-    test_table = le.fit_transform(test_table, ["cities"])
-    assert test_table.schema.has_column("cities")
-    assert test_table.to_columns()[0].get_value(0) == 1
-    assert test_table.to_columns()[0].get_value(2) == 2
-    assert test_table.to_columns()[0].get_value(3) == 0
diff --git a/tests/safeds/data/tabular/transformation/_label_encoder/test_inverse_transform.py b/tests/safeds/data/tabular/transformation/_label_encoder/test_inverse_transform.py
deleted file mode 100644
index 63ab545a5..000000000
--- a/tests/safeds/data/tabular/transformation/_label_encoder/test_inverse_transform.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import LabelEncoder
-from safeds.exceptions import NotFittedError
-
-
-def test_inverse_transform_valid() -> None:
-    test_table = Table(
-        pd.DataFrame({"cities": ["paris", "paris", "tokyo", "amsterdam"]})
-    )
-    le = LabelEncoder()
-    test_table = le.fit_transform(test_table, ["cities"])
-    test_table = le.inverse_transform(test_table, "cities")
-    assert test_table.schema.has_column("cities")
-    assert test_table.to_columns()[0].get_value(0) == "paris"
-    assert test_table.to_columns()[0].get_value(2) == "tokyo"
-    assert test_table.to_columns()[0].get_value(3) == "amsterdam"
-
-
-def test_inverse_transform_invalid() -> None:
-    test_table = Table(
-        pd.DataFrame({"cities": ["paris", "paris", "tokyo", "amsterdam"]})
-    )
-    le = LabelEncoder()
-    # le.fit(test_table) removed to force NotFittedError
-    with pytest.raises(NotFittedError):
-        le.inverse_transform(test_table, "cities")
diff --git a/tests/safeds/data/tabular/transformation/_label_encoder/test_transform.py b/tests/safeds/data/tabular/transformation/_label_encoder/test_transform.py
deleted file mode 100644
index 1bed487ee..000000000
--- a/tests/safeds/data/tabular/transformation/_label_encoder/test_transform.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import LabelEncoder
-from safeds.exceptions import NotFittedError
-
-
-def test_transform_valid() -> None:
-    test_table = Table(
-        pd.DataFrame({"cities": ["paris", "paris", "tokyo", "amsterdam"]})
-    )
-    le = LabelEncoder()
-    le.fit(test_table, "cities")
-    test_table = le.transform(test_table, "cities")
-    assert test_table.schema.has_column("cities")
-    assert test_table.to_columns()[0].get_value(0) == 1
-    assert test_table.to_columns()[0].get_value(2) == 2
-    assert test_table.to_columns()[0].get_value(3) == 0
-
-
-def test_transform_invalid() -> None:
-    test_table = Table(
-        pd.DataFrame({"cities": ["paris", "paris", "tokyo", "amsterdam"]})
-    )
-    le = LabelEncoder()
-    # le.fit(test_table) removed to force NotFittedError
-    with pytest.raises(NotFittedError):
-        le.transform(test_table, "cities")
diff --git a/tests/safeds/data/tabular/transformation/_one_hot_encoder/__init__.py b/tests/safeds/data/tabular/transformation/_one_hot_encoder/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_fit_transform.py b/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_fit_transform.py
deleted file mode 100644
index ef6239c8b..000000000
--- a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_fit_transform.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import pandas as pd
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import OneHotEncoder
-
-
-def test_fit_transform() -> None:
-    table = Table(
-        pd.DataFrame(
-            data={
-                "col1": ["A", "B", "C", "A"],
-                "col2": ["Test1", "Test1", "Test3", "Test1"],
-                "col3": [1, 2, 3, 4],
-            }
-        )
-    )
-    ohe = OneHotEncoder()
-    table_ohe = ohe.fit_transform(table, ["col1", "col2"])
-    assert table_ohe.count_columns() == 6
-    assert table_ohe.get_row(0).get_value("col1_A") == 1
-    assert table_ohe.get_row(1).get_value("col1_B") == 1
-    assert table_ohe.get_row(2).get_value("col1_C") == 1
-    assert table_ohe.get_row(3).get_value("col1_A") == 1
-    assert table_ohe.get_row(0).get_value("col2_Test1") == 1
-    assert table_ohe.get_row(1).get_value("col2_Test1") == 1
-    assert table_ohe.get_row(2).get_value("col2_Test3") == 1
-    assert table_ohe.get_row(3).get_value("col2_Test1") == 1
-    assert table_ohe.get_column("col1_A").sum() == 2
-    assert table_ohe.get_column("col1_B").sum() == 1
-    assert table_ohe.get_column("col1_C").sum() == 1
-    assert table_ohe.get_column("col2_Test1").sum() == 3
-    assert table_ohe.get_column("col2_Test3").sum() == 1
-    assert table_ohe.get_column("col3").sum() == 10
diff --git a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_inverse_transform.py b/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_inverse_transform.py
deleted file mode 100644
index f7b4de2a0..000000000
--- a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_inverse_transform.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import OneHotEncoder
-from safeds.exceptions import NotFittedError
-
-
-def test_fit_transform() -> None:
-    table = Table(
-        pd.DataFrame(
-            data={
-                "col1": ["A", "B", "C", "A"],
-                "col2": ["Test1", "Test1", "Test3", "Test1"],
-                "col3": [1, 2, 3, 4],
-            }
-        )
-    )
-    ohe = OneHotEncoder()
-    table_ohe = ohe.fit_transform(table, ["col1", "col2"])
-    table_old = ohe.inverse_transform(table_ohe)
-    assert table_old == table
-
-
-def test_fit_transform_invalid() -> None:
-    table = Table(
-        pd.DataFrame(
-            data={
-                "col1": ["A", "B", "C", "A"],
-                "col2": ["Test1", "Test1", "Test3", "Test1"],
-                "col3": [1, 2, 3, 4],
-            }
-        )
-    )
-    ohe = OneHotEncoder()
-    with pytest.raises(NotFittedError):
-        ohe.inverse_transform(table)
diff --git a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_transform.py b/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_transform.py
deleted file mode 100644
index 1a1405dce..000000000
--- a/tests/safeds/data/tabular/transformation/_one_hot_encoder/test_transform.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import pandas as pd
-import pytest
-from safeds.data.tabular.containers import Table
-from safeds.data.tabular.transformation import OneHotEncoder
-from safeds.exceptions import NotFittedError
-
-
-def test_transform_invalid() -> None:
-    table = Table(
-        pd.DataFrame(
-            data={
-                "col1": ["A", "B", "C", "A"],
-                "col2": ["Test1", "Test1", "Test3", "Test1"],
-                "col3": [1, 2, 3, 4],
-            }
-        )
-    )
-    ohe = OneHotEncoder()
-    with pytest.raises(NotFittedError):
-        ohe.transform(table)
diff --git a/tests/safeds/data/tabular/transformation/test_imputer.py b/tests/safeds/data/tabular/transformation/test_imputer.py
new file mode 100644
index 000000000..84d039571
--- /dev/null
+++ b/tests/safeds/data/tabular/transformation/test_imputer.py
@@ -0,0 +1,175 @@
+from typing import Optional
+
+import pytest
+from safeds.data.tabular.containers import Column, Table
+from safeds.data.tabular.transformation import Imputer, ImputerStrategy
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+
+
+class TestFit:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1, 3, None]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            Imputer(Imputer.Strategy.Constant(0)).fit(table, ["b"])
+
+    def test_should_not_change_original_transformer(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1, 3, None]),
+            ]
+        )
+
+        transformer = Imputer(Imputer.Strategy.Constant(0))
+        transformer.fit(table)
+
+        assert transformer._wrapped_transformer is None
+        assert transformer._column_names is None
+
+
+class TestTransform:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table_to_fit = Table.from_columns(
+            [
+                Column("a", [1, 3, None]),
+            ]
+        )
+
+        transformer = Imputer(Imputer.Strategy.Constant(0)).fit(table_to_fit)
+
+        table_to_transform = Table.from_columns(
+            [
+                Column("b", [1, 3, None]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            transformer.transform(table_to_transform)
+
+    def test_should_raise_if_not_fitted(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1, 3, None]),
+            ]
+        )
+
+        transformer = Imputer(Imputer.Strategy.Constant(0))
+
+        with pytest.raises(NotFittedError):
+            transformer.transform(table)
+
+
+class TestFitTransform:
+    @pytest.mark.parametrize(
+        ("table", "column_names", "strategy", "expected"),
+        [
+            (
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, None]),
+                    ]
+                ),
+                None,
+                Imputer.Strategy.Constant(0.0),
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 0.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, None]),
+                    ]
+                ),
+                None,
+                Imputer.Strategy.Mean(),
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 2.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 1.0, None]),
+                    ]
+                ),
+                None,
+                Imputer.Strategy.Median(),
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 1.0, 1.0]),
+                        Column("a", [1.0, 3.0, 1.0, 1.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 3.0, None]),
+                    ]
+                ),
+                None,
+                Imputer.Strategy.Mode(),
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 3.0, 3.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, None]),
+                        Column("b", [1.0, 3.0, None]),
+                    ]
+                ),
+                ["a"],
+                Imputer.Strategy.Constant(0.0),
+                Table.from_columns(
+                    [
+                        Column("a", [1.0, 3.0, 0.0]),
+                        Column("b", [1.0, 3.0, None]),
+                    ]
+                ),
+            ),
+        ],
+    )
+    def test_should_return_transformed_table(
+        self, table: Table, column_names: Optional[list[str]], strategy: ImputerStrategy, expected: Table
+    ) -> None:
+        assert Imputer(strategy).fit_transform(table, column_names) == expected
+
+    def test_should_raise_if_strategy_is_mode_but_multiple_values_are_most_frequent(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1, 2, 3, None]),
+            ]
+        )
+
+        with pytest.raises(IndexError):
+            Imputer(Imputer.Strategy.Mode()).fit_transform(table)
+
+    def test_should_not_change_original_table(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1, None, None]),
+            ]
+        )
+
+        Imputer(strategy=Imputer.Strategy.Constant(1)).fit_transform(table)
+
+        expected = Table.from_columns(
+            [
+                Column("a", [1, None, None]),
+            ]
+        )
+
+        assert table == expected
diff --git a/tests/safeds/data/tabular/transformation/test_label_encoder.py b/tests/safeds/data/tabular/transformation/test_label_encoder.py
new file mode 100644
index 000000000..e29e74742
--- /dev/null
+++ b/tests/safeds/data/tabular/transformation/test_label_encoder.py
@@ -0,0 +1,168 @@
+from typing import Optional
+
+import pytest
+from safeds.data.tabular.containers import Column, Table
+from safeds.data.tabular.transformation import LabelEncoder
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+
+
+class TestFit:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            LabelEncoder().fit(table, ["col2"])
+
+    def test_should_not_change_original_transformer(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = LabelEncoder()
+        transformer.fit(table)
+
+        assert transformer._wrapped_transformer is None
+        assert transformer._column_names is None
+
+
+class TestTransform:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table_to_fit = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = LabelEncoder().fit(table_to_fit)
+
+        table_to_transform = Table.from_columns(
+            [
+                Column("col2", ["a", "b", "c"]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            transformer.transform(table_to_transform)
+
+    def test_should_raise_if_not_fitted(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = LabelEncoder()
+
+        with pytest.raises(NotFittedError):
+            transformer.transform(table)
+
+
+class TestFitTransform:
+    @pytest.mark.parametrize(
+        ("table", "column_names", "expected"),
+        [
+            (
+                Table.from_columns(
+                    [
+                        Column("col1", ["a", "b", "b", "c"]),
+                    ]
+                ),
+                None,
+                Table.from_columns(
+                    [
+                        Column("col1", [0.0, 1.0, 1.0, 2.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("col1", ["a", "b", "b", "c"]),
+                        Column("col2", ["a", "b", "b", "c"]),
+                    ]
+                ),
+                ["col1"],
+                Table.from_columns(
+                    [
+                        Column("col1", [0.0, 1.0, 1.0, 2.0]),
+                        Column("col2", ["a", "b", "b", "c"]),
+                    ]
+                ),
+            ),
+        ],
+    )
+    def test_should_return_transformed_table(
+        self, table: Table, column_names: Optional[list[str]], expected: Table
+    ) -> None:
+        assert LabelEncoder().fit_transform(table, column_names) == expected
+
+    def test_should_not_change_original_table(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        LabelEncoder().fit_transform(table)
+
+        expected = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        assert table == expected
+
+
+class TestInverseTransform:
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_columns(
+                [
+                    Column("col1", ["a", "b", "b", "c"]),
+                ]
+            ),
+        ],
+    )
+    def test_should_return_original_table(self, table: Table) -> None:
+        transformer = LabelEncoder().fit(table)
+
+        assert transformer.inverse_transform(transformer.transform(table)) == table
+
+    def test_should_not_change_transformed_table(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = LabelEncoder().fit(table)
+        transformed_table = transformer.transform(table)
+        transformer.inverse_transform(transformed_table)
+
+        expected = Table.from_columns(
+            [
+                Column("col1", [0.0, 1.0, 2.0]),
+            ]
+        )
+
+        assert transformed_table == expected
+
+    def test_should_raise_if_not_fitted(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", [0.0, 1.0, 1.0, 2.0]),
+            ]
+        )
+
+        transformer = LabelEncoder()
+
+        with pytest.raises(NotFittedError):
+            transformer.inverse_transform(table)
diff --git a/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py
new file mode 100644
index 000000000..061d6d6b9
--- /dev/null
+++ b/tests/safeds/data/tabular/transformation/test_one_hot_encoder.py
@@ -0,0 +1,176 @@
+from typing import Optional
+
+import pytest
+from safeds.data.tabular.containers import Column, Table
+from safeds.data.tabular.transformation import OneHotEncoder
+from safeds.exceptions import NotFittedError, UnknownColumnNameError
+
+
+class TestFit:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            OneHotEncoder().fit(table, ["col2"])
+
+    def test_should_not_change_original_transformer(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = OneHotEncoder()
+        transformer.fit(table)
+
+        assert transformer._wrapped_transformer is None
+        assert transformer._column_names is None
+
+
+class TestTransform:
+    def test_should_raise_if_column_not_found(self) -> None:
+        table_to_fit = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = OneHotEncoder().fit(table_to_fit)
+
+        table_to_transform = Table.from_columns(
+            [
+                Column("col2", ["a", "b", "c"]),
+            ]
+        )
+
+        with pytest.raises(UnknownColumnNameError):
+            transformer.transform(table_to_transform)
+
+    def test_should_raise_if_not_fitted(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        transformer = OneHotEncoder()
+
+        with pytest.raises(NotFittedError):
+            transformer.transform(table)
+
+
+class TestFitTransform:
+    @pytest.mark.parametrize(
+        ("table", "column_names", "expected"),
+        [
+            (
+                Table.from_columns(
+                    [
+                        Column("col1", ["a", "b", "b", "c"]),
+                    ]
+                ),
+                None,
+                Table.from_columns(
+                    [
+                        Column("col1_a", [1.0, 0.0, 0.0, 0.0]),
+                        Column("col1_b", [0.0, 1.0, 1.0, 0.0]),
+                        Column("col1_c", [0.0, 0.0, 0.0, 1.0]),
+                    ]
+                ),
+            ),
+            (
+                Table.from_columns(
+                    [
+                        Column("col1", ["a", "b", "b", "c"]),
+                        Column("col2", ["a", "b", "b", "c"]),
+                    ]
+                ),
+                ["col1"],
+                Table.from_columns(
+                    [
+                        Column("col2", ["a", "b", "b", "c"]),
+                        Column("col1_a", [1.0, 0.0, 0.0, 0.0]),
+                        Column("col1_b", [0.0, 1.0, 1.0, 0.0]),
+                        Column("col1_c", [0.0, 0.0, 0.0, 1.0]),
+                    ]
+                ),
+            ),
+        ],
+    )
+    def test_should_return_transformed_table(
+        self, table: Table, column_names: Optional[list[str]], expected: Table
+    ) -> None:
+        assert OneHotEncoder().fit_transform(table, column_names) == expected
+
+    def test_should_not_change_original_table(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        OneHotEncoder().fit_transform(table)
+
+        expected = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "c"]),
+            ]
+        )
+
+        assert table == expected
+
+
+class TestInverseTransform:
+    @pytest.mark.parametrize(
+        "table",
+        [
+            Table.from_columns(
+                [
+                    Column("col1", ["a", "b", "b", "c"]),
+                ]
+            ),
+        ],
+    )
+    def test_should_return_original_table(self, table: Table) -> None:
+        transformer = OneHotEncoder().fit(table)
+
+        assert transformer.inverse_transform(transformer.transform(table)) == table
+
+    def test_should_not_change_transformed_table(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("col1", ["a", "b", "b", "c"]),
+            ]
+        )
+
+        transformer = OneHotEncoder().fit(table)
+        transformed_table = transformer.transform(table)
+        transformer.inverse_transform(transformed_table)
+
+        expected = Table.from_columns(
+            [
+                Column("col1_a", [1.0, 0.0, 0.0, 0.0]),
+                Column("col1_b", [0.0, 1.0, 1.0, 0.0]),
+                Column("col1_c", [0.0, 0.0, 0.0, 1.0]),
+            ]
+        )
+
+        assert transformed_table == expected
+
+    def test_should_raise_if_not_fitted(self) -> None:
+        table = Table.from_columns(
+            [
+                Column("a", [1.0, 0.0, 0.0, 0.0]),
+                Column("b", [0.0, 1.0, 1.0, 0.0]),
+                Column("c", [0.0, 0.0, 0.0, 1.0]),
+            ]
+        )
+
+        transformer = OneHotEncoder()
+
+        with pytest.raises(NotFittedError):
+            transformer.inverse_transform(table)