From a1cdaef259e9f889f8fb0355ef69d4666faa9c94 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Sat, 4 May 2024 22:58:45 +0200 Subject: [PATCH] feat: `Table.remove_rows` (#720) Closes #698 ### Summary of Changes Add a method `Table.remove_rows`. It's the same as `Table.filter_rows` with a negated query. --- src/safeds/data/tabular/containers/_table.py | 49 +++++++++++++++++-- .../containers/_table/test_filter_rows.py | 21 ++++---- .../containers/_table/test_remove_rows.py | 38 ++++++++++++++ 3 files changed, 93 insertions(+), 15 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_table/test_remove_rows.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 368f6c782..9cc1e0db5 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -1126,19 +1126,24 @@ def add_rows(self, rows: list[Row] | Table) -> Table: def filter_rows(self, query: Callable[[Row], bool]) -> Table: """ - Return a new table with rows filtered by Callable (e.g. lambda function). + Return a new table containing only the rows that satisfy the query. The original table is not modified. Parameters ---------- query: - A Callable that is applied to all rows. + A callable that returns True if a row should be included in the new table. Returns ------- table: - A table containing only the rows filtered by the query. + A table containing only the rows that satisfy the query. + + See Also + -------- + remove_rows: + Remove rows that satifsfy a query. Examples -------- @@ -1337,6 +1342,44 @@ def remove_columns_with_non_numerical_values(self) -> Table: """ return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()]) + def remove_rows(self, query: Callable[[Row], bool]) -> Table: + """ + Return a new table without the rows that satisfy the query. + + The original table is not modified. + + Parameters + ---------- + query: + A callable that returns True if the row should be removed. + + Returns + ------- + table: + A table without the rows that satisfy the query. + + See Also + -------- + filter_rows: + Create a table containing only the rows that satisfy a query. + + Examples + -------- + >>> from safeds.data.tabular.containers import Table + >>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]}) + >>> table.remove_rows(lambda x: x["a"] < 2) + a b + 0 3 4 + """ + import pandas as pd + + rows: list[Row] = [row for row in self.to_rows() if not query(row)] + if len(rows) == 0: + result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema) + else: + result_table = self.from_rows(rows) + return result_table + def remove_duplicate_rows(self) -> Table: """ Return a new table with every duplicate row removed. diff --git a/tests/safeds/data/tabular/containers/_table/test_filter_rows.py b/tests/safeds/data/tabular/containers/_table/test_filter_rows.py index ad8c4f45e..75b08e480 100644 --- a/tests/safeds/data/tabular/containers/_table/test_filter_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_filter_rows.py @@ -8,10 +8,10 @@ ("table1", "filter_column", "filter_value", "table2"), [ ( - Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + Table(), "col1", 1, - Table({"col1": [1, 1], "col2": [1, 4]}), + Table._from_pandas_dataframe(pd.DataFrame(), Schema({})), ), ( Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}), @@ -20,22 +20,19 @@ Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})), ), ( - Table(), + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), "col1", 1, - Table._from_pandas_dataframe(pd.DataFrame(), Schema({})), + Table({"col1": [1, 1], "col2": [1, 4]}), ), ], - ids=["filter for col1 = 1", "no finding", "empty table"], + ids=[ + "empty table", + "no matches", + "matches", + ], ) def test_should_filter_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None: table1 = table1.filter_rows(lambda row: row.get_value(filter_column) == filter_value) assert table1.schema == table2.schema assert table2 == table1 - - -# noinspection PyTypeChecker -def test_should_raise_error_if_column_type_invalid() -> None: - table = Table({"col1": [1, 2, 3], "col2": [1, 1, 4]}) - with pytest.raises(TypeError, match=r"'Series' object is not callable"): - table.filter_rows(table.get_column("col1")._data > table.get_column("col2")._data) diff --git a/tests/safeds/data/tabular/containers/_table/test_remove_rows.py b/tests/safeds/data/tabular/containers/_table/test_remove_rows.py new file mode 100644 index 000000000..35150dd43 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_remove_rows.py @@ -0,0 +1,38 @@ +import pandas as pd +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.typing import ColumnType, Schema + + +@pytest.mark.parametrize( + ("table1", "remove_column", "remove_value", "table2"), + [ + ( + Table(), + "col1", + 1, + Table._from_pandas_dataframe(pd.DataFrame(), Schema({})), + ), + ( + Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}), + "col1", + 1, + Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}), + ), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + "col1", + 1, + Table({"col1": [2], "col2": [2]}), + ), + ], + ids=[ + "empty table", + "no match", + "matches", + ], +) +def test_should_remove_rows(table1: Table, remove_column: str, remove_value: ColumnType, table2: Table) -> None: + table1 = table1.remove_rows(lambda row: row.get_value(remove_column) == remove_value) + assert table1.schema == table2.schema + assert table2 == table1