Skip to content

Commit

Permalink
feat: Table.remove_rows (#720)
Browse files Browse the repository at this point in the history
Closes #698

### Summary of Changes

Add a method `Table.remove_rows`. It's the same as `Table.filter_rows`
with a negated query.
  • Loading branch information
lars-reimann authored May 4, 2024
1 parent f6c379e commit a1cdaef
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 15 deletions.
49 changes: 46 additions & 3 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,19 +1126,24 @@ def add_rows(self, rows: list[Row] | Table) -> Table:

def filter_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table with rows filtered by Callable (e.g. lambda function).
Return a new table containing only the rows that satisfy the query.
The original table is not modified.
Parameters
----------
query:
A Callable that is applied to all rows.
A callable that returns True if a row should be included in the new table.
Returns
-------
table:
A table containing only the rows filtered by the query.
A table containing only the rows that satisfy the query.
See Also
--------
remove_rows:
Remove rows that satifsfy a query.
Examples
--------
Expand Down Expand Up @@ -1337,6 +1342,44 @@ def remove_columns_with_non_numerical_values(self) -> Table:
"""
return Table.from_columns([column for column in self.to_columns() if column.type.is_numeric()])

def remove_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Return a new table without the rows that satisfy the query.
The original table is not modified.
Parameters
----------
query:
A callable that returns True if the row should be removed.
Returns
-------
table:
A table without the rows that satisfy the query.
See Also
--------
filter_rows:
Create a table containing only the rows that satisfy a query.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table.from_dict({"a": [1, 3], "b": [2, 4]})
>>> table.remove_rows(lambda x: x["a"] < 2)
a b
0 3 4
"""
import pandas as pd

rows: list[Row] = [row for row in self.to_rows() if not query(row)]
if len(rows) == 0:
result_table = Table._from_pandas_dataframe(pd.DataFrame(), self._schema)
else:
result_table = self.from_rows(rows)
return result_table

def remove_duplicate_rows(self) -> Table:
"""
Return a new table with every duplicate row removed.
Expand Down
21 changes: 9 additions & 12 deletions tests/safeds/data/tabular/containers/_table/test_filter_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
("table1", "filter_column", "filter_value", "table2"),
[
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table(),
"col1",
1,
Table({"col1": [1, 1], "col2": [1, 4]}),
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
(
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
Expand All @@ -20,22 +20,19 @@
Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
),
(
Table(),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
Table({"col1": [1, 1], "col2": [1, 4]}),
),
],
ids=["filter for col1 = 1", "no finding", "empty table"],
ids=[
"empty table",
"no matches",
"matches",
],
)
def test_should_filter_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.filter_rows(lambda row: row.get_value(filter_column) == filter_value)
assert table1.schema == table2.schema
assert table2 == table1


# noinspection PyTypeChecker
def test_should_raise_error_if_column_type_invalid() -> None:
table = Table({"col1": [1, 2, 3], "col2": [1, 1, 4]})
with pytest.raises(TypeError, match=r"'Series' object is not callable"):
table.filter_rows(table.get_column("col1")._data > table.get_column("col2")._data)
38 changes: 38 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_remove_rows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.typing import ColumnType, Schema


@pytest.mark.parametrize(
("table1", "remove_column", "remove_value", "table2"),
[
(
Table(),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
(
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
"col1",
1,
Table({"col1": [3, 2, 4], "col2": [1, 2, 4]}),
),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
"col1",
1,
Table({"col1": [2], "col2": [2]}),
),
],
ids=[
"empty table",
"no match",
"matches",
],
)
def test_should_remove_rows(table1: Table, remove_column: str, remove_value: ColumnType, table2: Table) -> None:
table1 = table1.remove_rows(lambda row: row.get_value(remove_column) == remove_value)
assert table1.schema == table2.schema
assert table2 == table1

0 comments on commit a1cdaef

Please sign in to comment.