Skip to content

Commit

Permalink
feat: join (#870)
Browse files Browse the repository at this point in the history
Closes #745 

### Summary of Changes

Implemented a join function for Table

<!-- Please provide a summary of changes in this pull request, ensuring
all changes are explained. -->

---------

Co-authored-by: grefrathc <[email protected]>
Co-authored-by: megalinter-bot <[email protected]>
Co-authored-by: Lars Reimann <[email protected]>
  • Loading branch information
4 people authored Jul 19, 2024
1 parent 9f5992a commit 5764441
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 0 deletions.
59 changes: 59 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,65 @@ def inverse_transform_table(self, fitted_transformer: InvertibleTableTransformer
"""
return fitted_transformer.inverse_transform(self)

def join(
self,
right_table: Table,
left_names: str | list[str],
right_names: str | list[str],
*,
mode: Literal["inner", "left", "outer"] = "inner",
) -> Table:
"""
Join a table with the current table and return the result.
Parameters
----------
right_table:
The other table which is to be joined to the current table.
left_names:
Name or list of names of columns from the current table on which to join right_table.
right_names:
Name or list of names of columns from right_table on which to join the current table.
mode:
Specify which type of join you want to use. Options include 'inner', 'outer', 'left', 'right'.
Returns
-------
new_table:
The table with the joined table.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table1 = Table({"a": [1, 2], "b": [3, 4]})
>>> table2 = Table({"d": [1, 5], "e": [5, 6]})
>>> table1.join(table2, "a", "d", mode="left")
+-----+-----+------+
| a | b | e |
| --- | --- | --- |
| i64 | i64 | i64 |
+==================+
| 1 | 3 | 5 |
| 2 | 4 | null |
+-----+-----+------+
"""
# Validation
_check_columns_exist(self, left_names)
_check_columns_exist(right_table, right_names)

if len(left_names) != len(right_names):
raise ValueError("The number of columns to join on must be the same in both tables.")

# Implementation
return self._from_polars_lazy_frame(
self._lazy_frame.join(
right_table._lazy_frame,
left_on=left_names,
right_on=right_names,
how=mode,
),
)

def transform_table(self, fitted_transformer: TableTransformer) -> Table:
"""
Return a new table transformed by a **fitted** transformer.
Expand Down
91 changes: 91 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from typing import Literal

import pytest
from safeds.data.tabular.containers import Table
from safeds.exceptions import ColumnNotFoundError


@pytest.mark.parametrize(
("table_left", "table_right", "left_names", "right_names", "mode", "table_expected"),
[
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"outer",
Table({"a": [1, None, 2], "b": [3, None, 4], "d": [1, 5, None], "e": [5, 6, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"left",
Table({"a": [1, 2], "b": [3, 4], "e": [5, None]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["a"],
["d"],
"inner",
Table({"a": [1], "b": [3], "e": [5]}),
),
(
Table({"a": [1, 2], "b": [3, 4], "c": [5, 6]}),
Table({"d": [1, 5], "e": [5, 6], "g": [7, 9]}),
["a", "c"],
["d", "e"],
"inner",
Table({"a": [1], "b": [3], "c": [5], "g": [7]}),
),
(
Table({"a": [1, 2], "b": [3, 4]}),
Table({"d": [1, 5], "e": [5, 6]}),
["b"],
["e"],
"inner",
Table({"a": [], "b": [], "d": []}),
),
],
)
def test_should_join_two_tables(
table_left: Table,
table_right: Table,
left_names: list[str],
right_names: list[str],
mode: Literal["inner", "left", "outer"],
table_expected: Table,
) -> None:
assert table_left.join(table_right, left_names, right_names, mode=mode) == table_expected


def test_should_raise_if_columns_are_mismatched() -> None:
table_left = Table({"a": [1, 2], "b": [3, 4]})
table_right = Table({"d": [1, 5], "e": [5, 6]})
left_names = ["a"]
right_names = ["d", "e"]
with pytest.raises(ValueError, match="The number of columns to join on must be the same in both tables."):
table_left.join(table_right, left_names, right_names)


@pytest.mark.parametrize(
("table_left", "table_right", "left_names", "right_names"),
[
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["c"], ["d"]),
(Table({"a": [1, 2], "b": [3, 4]}), Table({"d": [1, 5], "e": [5, 6]}), ["a"], ["f"]),
],
ids=[
"wrong_left_name",
"wrong_right_name",
],
)
def test_should_raise_if_columns_are_missing(
table_left: Table,
table_right: Table,
left_names: list[str],
right_names: list[str],
) -> None:
with pytest.raises(ColumnNotFoundError):
table_left.join(table_right, left_names=left_names, right_names=right_names)

0 comments on commit 5764441

Please sign in to comment.