From 40fb7566307b4c015f3acae7bb94f4e937977e07 Mon Sep 17 00:00:00 2001 From: patrikguempel Date: Sat, 6 May 2023 10:10:31 +0200 Subject: [PATCH 1/2] feat: Added Column#transform (#270) Closes #255 . ### Summary of Changes Users are now able to call Column#transform to apply a function to every data point within the column Co-authored-by: 92090487-PhilipGutberlet@useres.noreply.github.com --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann --- src/safeds/data/tabular/containers/_column.py | 27 +++++++++++++++++ .../containers/_column/test_transform.py | 29 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_column/test_transform.py diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index 3851ac291..52704edf8 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -24,6 +24,7 @@ from collections.abc import Callable, Iterator T = TypeVar("T") +R = TypeVar("R") class Column(Sequence[T]): @@ -300,6 +301,8 @@ def rename(self, new_name: str) -> Column: """ Return a new column with a new name. + This column is not modified. + Parameters ---------- new_name : str @@ -312,6 +315,30 @@ def rename(self, new_name: str) -> Column: """ return Column._from_pandas_series(self._data.rename(new_name), self._type) + def transform(self, transformer: Callable[[T], R]) -> Column[R]: + """ + Apply a transform method to every data point. + + This column is not modified. + + Parameters + ---------- + transformer : Callable[[T], R] + Function that will be applied to all data points. + + Returns + ------- + transformed_column: Column + The transformed column. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> price = Column("price", [4.99, 5.99, 2.49]) + >>> sale = price.transform(lambda amount: amount * 0.8) + """ + return Column(self.name, self._data.apply(transformer, convert_dtype=True)) + # ------------------------------------------------------------------------------------------------------------------ # Statistics # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/data/tabular/containers/_column/test_transform.py b/tests/safeds/data/tabular/containers/_column/test_transform.py new file mode 100644 index 000000000..8e35ab2c0 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_column/test_transform.py @@ -0,0 +1,29 @@ +import pytest +from safeds.data.tabular.containers import Column + + +@pytest.mark.parametrize( + ("column", "expected"), + [ + (Column("test", []), Column("test", [])), + (Column("test", [1, 2]), Column("test", [2, 3])), + (Column("test", [-0.5, 0, 4]), Column("test", [0.5, 1, 5])), + ], + ids=["empty", "integers", "floats"], +) +def test_should_transform_column(column: Column, expected: Column) -> None: + assert column.transform(lambda it: it + 1) == expected + + +@pytest.mark.parametrize( + ("column", "original"), + [ + (Column("test", []), Column("test", [])), + (Column("test", [1, 2]), Column("test", [1, 2])), + (Column("test", [-0.5, 0, 4]), Column("test", [-0.5, 0, 4])), + ], + ids=["empty", "integers", "floats"], +) +def test_should_not_change_original_column(column: Column, original: Column) -> None: + column.transform(lambda it: it + 1) + assert column == original From dcf2e6c0040385af724af7cb224bdc8d2cd48750 Mon Sep 17 00:00:00 2001 From: Philip Gutberlet <92990487+PhilipGutberlet@users.noreply.github.com> Date: Sat, 6 May 2023 10:37:05 +0200 Subject: [PATCH 2/2] docs: Highlighting the fact, that certain methods do not work in place (#279) Closes #189. ### Summary of Changes Highlighting the fact, that fitting methods of Classifier, Regressor, Tabletransformer and certain methods of the _table class do not work in place. --------- Co-authored-by: Lars Reimann Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- docs/CHANGELOG.md | 2 +- src/safeds/data/tabular/containers/_table.py | 62 +++++++++++++++++++ .../data/tabular/transformation/_imputer.py | 4 ++ .../tabular/transformation/_label_encoder.py | 6 ++ .../transformation/_one_hot_encoder.py | 26 +++++--- .../transformation/_table_transformer.py | 8 ++- 6 files changed, 97 insertions(+), 11 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ff6378e68..8b34868b6 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -3,7 +3,7 @@ ### Features -* `OneHotEncoder.inverse_transform` now maintains the column order from the original table ([#195](https://github.com/Safe-DS/Stdlib/issues/195)) ([3ec0041](https://github.com/Safe-DS/Stdlib/commit/3ec0041669ffe97640f96db345f3f43720d5c3f7)), closes [#109](https://github.com/Safe-DS/Stdlib/issues/109) [#109](https://github.com/Safe-DS/Stdlib/issues/109) +* `OneHotEncoder.inverse_transform` now maintains the column order from the original table ([#195](https://github.com/Safe-DS/Stdlib/issues/195)) ([3ec0041](https://github.com/Safe-DS/Stdlib/commit/3ec0041669ffe97640f96db345f3f43720d5c3f7)), closes [#109](https://github.com/Safe-DS/Stdlib/issues/109) * add `plot_` prefix back to plotting methods ([#212](https://github.com/Safe-DS/Stdlib/issues/212)) ([e50c3b0](https://github.com/Safe-DS/Stdlib/commit/e50c3b0118825e33eef0e2a7073673603e316ee5)), closes [#211](https://github.com/Safe-DS/Stdlib/issues/211) * adjust `Column`, `Schema` and `Table` to changes in `Row` ([#216](https://github.com/Safe-DS/Stdlib/issues/216)) ([ca3eebb](https://github.com/Safe-DS/Stdlib/commit/ca3eebbe2166f08d76cdcb89a012518ae8ff8e4e)) * back `Row` by a `polars.DataFrame` ([#214](https://github.com/Safe-DS/Stdlib/issues/214)) ([62ca34d](https://github.com/Safe-DS/Stdlib/commit/62ca34dd399da8b4e34b89bad408311707b53f41)), closes [#196](https://github.com/Safe-DS/Stdlib/issues/196) [#149](https://github.com/Safe-DS/Stdlib/issues/149) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 015afe946..437203119 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -63,6 +63,8 @@ def from_csv_file(path: str | Path) -> Table: """ Read data from a CSV file into a table. + This table is not modified. + Parameters ---------- path : str | Path @@ -90,6 +92,8 @@ def from_excel_file(path: str | Path) -> Table: """ Read data from an Excel file into a table. + This table is not modified. + Parameters ---------- path : str | Path @@ -117,6 +121,8 @@ def from_json_file(path: str | Path) -> Table: """ Read data from a JSON file into a table. + This table is not modified. + Parameters ---------- path : str | Path @@ -144,6 +150,8 @@ def from_dict(data: dict[str, list[Any]]) -> Table: """ Create a table from a dictionary that maps column names to column values. + This table is not modified. + Parameters ---------- data : dict[str, list[Any]] @@ -180,6 +188,8 @@ def from_columns(columns: list[Column]) -> Table: """ Return a table created from a list of columns. + This table is not modified. + Parameters ---------- columns : list[Column] @@ -211,6 +221,8 @@ def from_rows(rows: list[Row]) -> Table: """ Return a table created from a list of rows. + This table is not modified. + Parameters ---------- rows : list[Row] @@ -248,6 +260,8 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) -> """ Create a table from a `pandas.DataFrame`. + This table is not modified. + Parameters ---------- data : pd.DataFrame @@ -483,6 +497,8 @@ def summary(self) -> Table: """ Return a table with a number of statistical key values. + This table is not modified. + Returns ------- result : Table @@ -528,6 +544,8 @@ def add_column(self, column: Column) -> Table: """ Return the original table with the provided column attached at the end. + This table is not modified. + Returns ------- result : Table @@ -557,6 +575,8 @@ def add_columns(self, columns: list[Column] | Table) -> Table: """ Add multiple columns to the table. + This table is not modified. + Parameters ---------- columns : list[Column] or Table @@ -592,6 +612,8 @@ def add_row(self, row: Row) -> Table: """ Add a row to the table. + This table is not modified. + Parameters ---------- row : Row @@ -614,6 +636,8 @@ def add_rows(self, rows: list[Row] | Table) -> Table: """ Add multiple rows to a table. + This table is not modified. + Parameters ---------- rows : list[Row] or Table @@ -641,6 +665,8 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table: """ Return a table with rows filtered by Callable (e.g. lambda function). + This table is not modified. + Parameters ---------- query : lambda function @@ -662,6 +688,8 @@ def keep_only_columns(self, column_names: list[str]) -> Table: """ Return a table with only the given column(s). + This table is not modified. + Parameters ---------- column_names : list[str] @@ -692,6 +720,8 @@ def remove_columns(self, column_names: list[str]) -> Table: """ Return a table without the given column(s). + This table is not modified. + Parameters ---------- column_names : list[str] @@ -722,6 +752,8 @@ def remove_columns_with_missing_values(self) -> Table: """ Return a table without the columns that contain missing values. + This table is not modified. + Returns ------- table : Table @@ -733,6 +765,8 @@ def remove_columns_with_non_numerical_values(self) -> Table: """ Return a table without the columns that contain non-numerical values. + This table is not modified. + Returns ------- table : Table @@ -745,6 +779,8 @@ def remove_duplicate_rows(self) -> Table: """ Return a copy of the table with every duplicate row removed. + This table is not modified. + Returns ------- result : Table @@ -758,6 +794,8 @@ def remove_rows_with_missing_values(self) -> Table: """ Return a table without the rows that contain missing values. + This table is not modified. + Returns ------- table : Table @@ -775,6 +813,8 @@ def remove_rows_with_outliers(self) -> Table: Missing values are not considered outliers. They are also ignored during the calculation of the standard deviation. + This table is not modified. + Returns ------- new_table : Table @@ -792,6 +832,8 @@ def rename_column(self, old_name: str, new_name: str) -> Table: """ Rename a single column. + This table is not modified. + Parameters ---------- old_name : str @@ -826,6 +868,8 @@ def replace_column(self, old_column_name: str, new_column: Column) -> Table: """ Return a copy of the table with the specified old column replaced by a new column. Keeps the order of columns. + This table is not modified. + Parameters ---------- old_column_name : str @@ -874,6 +918,8 @@ def shuffle_rows(self) -> Table: """ Shuffle the table randomly. + This table is not modified. + Returns ------- result : Table @@ -893,6 +939,8 @@ def slice_rows( """ Slice a part of the table into a new table. + This table is not modified. + Parameters ---------- start : int @@ -942,6 +990,8 @@ def sort_columns( If no comparator is given, the columns will be sorted alphabetically by their name. + This table is not modified. + Parameters ---------- comparator : Callable[[Column, Column], int] @@ -967,6 +1017,8 @@ def sort_rows(self, comparator: Callable[[Row, Row], int]) -> Table: * If `row1` should be ordered after `row2`, the function should return a positive number. * If the original order of `row1` and `row2` should be kept, the function should return 0. + This table is not modified. + Parameters ---------- comparator : Callable[[Row, Row], int] @@ -985,6 +1037,8 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]: """ Split the table into two new tables. + This table is not modified. + Parameters ---------- percentage_in_first : float @@ -1009,6 +1063,8 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None) """ Mark the columns of the table as target column or feature columns. The original table is not modified. + This table is not modified. + Parameters ---------- target_name : str @@ -1029,6 +1085,8 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tabl """ Transform provided column by calling provided transformer. + This table is not modified. + Returns ------- result : Table @@ -1050,6 +1108,8 @@ def transform_table(self, transformer: TableTransformer) -> Table: """ Apply a learned transformation onto this table. + This table is not modified. + Parameters ---------- transformer : TableTransformer @@ -1084,6 +1144,8 @@ def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Ta """ Invert the transformation applied by the given transformer. + This table is not modified. + Parameters ---------- transformer : InvertibleTableTransformer diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index 6a890f97a..e3affedf0 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -94,6 +94,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> Imputer: """ Learn a transformation for a set of columns in a table. + This transformer is not modified. + Parameters ---------- table : Table @@ -133,6 +135,8 @@ def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. + The table is not modified. + Parameters ---------- table : Table diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 2dd4fe6db..304c18321 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -21,6 +21,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> LabelEncoder: """ Learn a transformation for a set of columns in a table. + This transformer is not modified. + Parameters ---------- table : Table @@ -53,6 +55,8 @@ def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. + The table is not modified. + Parameters ---------- table : Table @@ -86,6 +90,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: """ Undo the learned transformation. + The table is not modified. + Parameters ---------- transformed_table : Table diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index 69c9f0d4e..4af51065f 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -22,6 +22,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: """ Learn a transformation for a set of columns in a table. + This transformer is not modified. + Parameters ---------- table : Table @@ -61,6 +63,8 @@ def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. + The table is not modified. + Parameters ---------- table : Table @@ -114,6 +118,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: """ Undo the learned transformation. + The table is not modified. + Parameters ---------- transformed_table : Table @@ -146,15 +152,17 @@ def inverse_transform(self, transformed_table: Table) -> Table: res = Table(pd.concat([unchanged, decoded], axis=1)) column_names = [ - name - if name not in [value for value_list in list(self._column_names.values()) for value in value_list] - else list(self._column_names.keys())[ - [ - list(self._column_names.values()).index(value) - for value in list(self._column_names.values()) - if name in value - ][0] - ] + ( + name + if name not in [value for value_list in list(self._column_names.values()) for value in value_list] + else list(self._column_names.keys())[ + [ + list(self._column_names.values()).index(value) + for value in list(self._column_names.values()) + if name in value + ][0] + ] + ) for name in transformed_table.column_names ] res = res.sort_columns(lambda col1, col2: column_names.index(col1.name) - column_names.index(col2.name)) diff --git a/src/safeds/data/tabular/transformation/_table_transformer.py b/src/safeds/data/tabular/transformation/_table_transformer.py index ba431d0bc..9e881f357 100644 --- a/src/safeds/data/tabular/transformation/_table_transformer.py +++ b/src/safeds/data/tabular/transformation/_table_transformer.py @@ -15,6 +15,8 @@ def fit(self, table: Table, column_names: list[str] | None) -> TableTransformer: """ Learn a transformation for a set of columns in a table. + This transformer is not modified. + Parameters ---------- table : Table @@ -33,6 +35,8 @@ def transform(self, table: Table) -> Table: """ Apply the learned transformation to a table. + The table is not modified. + Parameters ---------- table : Table @@ -64,7 +68,7 @@ def fit_and_transform(self, table: Table, column_names: list[str] | None = None) """ Learn a transformation for a set of columns in a table and apply the learned transformation to the same table. - If you also need the fitted transformer, use `fit` and `transform` separately. + The table is not modified. If you also need the fitted transformer, use `fit` and `transform` separately. Parameters ---------- @@ -89,6 +93,8 @@ def inverse_transform(self, transformed_table: Table) -> Table: """ Undo the learned transformation. + The table is not modified. + Parameters ---------- transformed_table : Table