Skip to content

Commit

Permalink
perf: treat Tables specially when calling add_rows (#606)
Browse files Browse the repository at this point in the history
Fixes partially #575

### Summary of Changes

Add special case to `add_rows` when adding a `Table`. 
The table is no longer split into a list of rows, instead it is directly
added to the `DataFrame`.
In special cases, this may improve the performance up to 1000x.

---------

Co-authored-by: Lars Reimann <[email protected]>
  • Loading branch information
WinPlay02 and lars-reimann authored Apr 3, 2024
1 parent 98bccd0 commit e555b85
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
18 changes: 17 additions & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,23 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
2 5 6
"""
if isinstance(rows, Table):
rows = rows.to_rows()
if rows.number_of_rows == 0:
return self
if self.number_of_columns == 0:
return rows
different_column_names = set(self.column_names) - set(rows.column_names)
if len(different_column_names) > 0:
raise UnknownColumnNameError(
sorted(
different_column_names,
key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,
),
)

new_df = pd.concat([self._data, rows._data]).infer_objects()
new_df.columns = self.column_names
schema = Schema._merge_multiple_schemas([self.schema, rows.schema])
return Table._from_pandas_dataframe(new_df, schema)

if len(rows) == 0:
return self
Expand Down
14 changes: 12 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table({"col1": ["d", "e"], "col2": [6, 8]}),
),
(
Table({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}),
[],
Table({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}),
)
],
ids=["Rows with string and integer values", "different schema", "empty"],
ids=["Rows with string and integer values", "different schema", "empty", "add empty"],
)
def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None:
table1 = table1.add_rows(rows)
Expand Down Expand Up @@ -80,8 +85,13 @@ def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Tabl
[Row({"col1": 2, "col3": 4}), Row({"colA": 5, "col2": "Hallo"})],
r"Could not find column\(s\) 'col1, col2'",
),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [2, 5], "col3": [4, "Hallo"]}),
r"Could not find column\(s\) 'col2'",
),
],
ids=["column names do not match", "multiple columns missing"],
ids=["column names do not match", "multiple columns missing", "column missing from other table"],
)
def test_should_raise_error_if_row_column_names_invalid(table: Table, rows: list[Row], expected_error_msg: str) -> None:
with pytest.raises(UnknownColumnNameError, match=expected_error_msg):
Expand Down

0 comments on commit e555b85

Please sign in to comment.