Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: check that methods of table can handle an empty table #314

Merged
merged 83 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from 78 commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
fa24585
Added tests for empty table add_column and add_columns
patrikguempel May 12, 2023
8fecfd4
hotfix table eq, check column names on table without rows
patrikguempel May 12, 2023
f3abb76
add check for completely empty table
patrikguempel May 12, 2023
99d2b9a
added code in parametrized test to add row in empty table
jxnior01 May 12, 2023
4843b1e
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
jxnior01 May 12, 2023
716221b
empty table now support adding any row
patrikguempel May 12, 2023
b0a93e7
added add_rows for empty tables
patrikguempel May 12, 2023
b0c9014
renamed test
jxnior01 May 19, 2023
aa4b4e2
added support to read empty file to get an empty table using from_csv…
patrikguempel May 19, 2023
1236c81
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
8be21a3
added test for empty table in test_has_column.py
patrikguempel May 19, 2023
ab5dd0f
created tests for empty table within test_inverse_transform_table.py,…
patrikguempel May 19, 2023
2d4ff5b
created tests for empty table for each plot
patrikguempel May 19, 2023
1c993d0
created tests for empty table for test_remove_columns.py, test_remove…
patrikguempel May 19, 2023
50f198d
created tests for empty table for test_rename.py, test_replace_column…
patrikguempel May 19, 2023
c55fb9e
fixes in from json file and from csv file regarding empty file
patrikguempel May 19, 2023
967aee1
added parametrized tests for empty tables
jxnior01 May 19, 2023
b341715
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
jxnior01 May 19, 2023
1692251
added parametrized tests for test_to_csv_file.py
jxnior01 May 19, 2023
22deaef
added tests for sort_rows and split regarding empty table,
patrikguempel May 19, 2023
514ca49
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
abd27b9
added empty table test in test_str.py
patrikguempel May 19, 2023
62647ac
empty summary for empty table
patrikguempel May 19, 2023
33356e9
added parametrized tests for test_transform_column.py
jxnior01 May 19, 2023
1ad7b0f
added parametrized tests for test_to_json_file.py
jxnior01 May 19, 2023
0362f04
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
jxnior01 May 19, 2023
c11299f
undone path test remove
patrikguempel May 19, 2023
9a892a9
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
27217b2
rewrote test_to_columns.py, added empy table check in the progress
patrikguempel May 19, 2023
b65531d
added parametrized tests for test_to_json_file.py
jxnior01 May 19, 2023
6f14390
added parametrized tests for test_to_excel_file.py
jxnior01 May 19, 2023
455b087
added empty table check for test_to_rows.py
patrikguempel May 19, 2023
4452347
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
9689b6d
added empty table check for test_transform_table.py
patrikguempel May 19, 2023
39f73d4
Merge branch 'main' into 123-check-that-methods-of-table-can-handle-a…
patrikguempel May 19, 2023
295b31e
added annotations for test_keep_only_columns.py and test_rename.py
jxnior01 May 19, 2023
75552e9
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
jxnior01 May 19, 2023
dff9623
added annotations for test_remove_columns.py
jxnior01 May 19, 2023
22bf39d
sugar for linter
patrikguempel May 19, 2023
a5f94b1
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
ed8c73b
json file will now read {} as empty, linter doesnt like empty json files
patrikguempel May 19, 2023
799bee0
path fixed
patrikguempel May 19, 2023
9921342
style: apply automated linter fixes
megalinter-bot May 19, 2023
aaecc76
fixed code coverage
patrikguempel May 19, 2023
6b64718
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
021f158
Merge branch 'main' into 123-check-that-methods-of-table-can-handle-a…
patrikguempel May 19, 2023
968d46c
fixed code coverage
patrikguempel May 19, 2023
6df249f
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 19, 2023
98d8b4c
fixed code coverage. part 3
patrikguempel May 19, 2023
9aa9a8b
semantical fix
patrikguempel May 19, 2023
83bf00a
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 24, 2023
0314518
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 25, 2023
294f637
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 25, 2023
671ddbc
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 25, 2023
cd0f95e
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 26, 2023
09bcab4
Update src/safeds/data/tabular/containers/_table.py
patrikguempel May 26, 2023
e3d3156
- removed docstring
patrikguempel May 26, 2023
9f756e1
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 26, 2023
7f71b41
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 123-che…
patrikguempel May 26, 2023
6411a8b
fixed error message foe empty table regarding scatter plots
patrikguempel May 26, 2023
e1b153b
linter sugar
patrikguempel May 26, 2023
48fa111
style: apply automated linter fixes
megalinter-bot May 26, 2023
1ad970e
fixed error messages and
patrikguempel May 26, 2023
7931810
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 26, 2023
b34718d
fixed codecov
patrikguempel May 26, 2023
5000c5a
style: apply automated linter fixes
megalinter-bot May 26, 2023
fc0c3cf
Merge branch 'main' into 123-check-that-methods-of-table-can-handle-a…
patrikguempel May 26, 2023
20237a2
Merge branch 'main' into 123-check-that-methods-of-table-can-handle-a…
patrikguempel May 26, 2023
9d16661
fixed that the original table is overwritten in table#remove_rows and…
patrikguempel May 26, 2023
7730e4b
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 26, 2023
df63986
style: apply automated linter fixes
megalinter-bot May 26, 2023
f0d50f7
Merge branch 'main' of https://github.com/Safe-DS/Stdlib into 123-che…
patrikguempel May 26, 2023
da34b22
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 26, 2023
1fd81ae
fixed test remove outliers
patrikguempel May 26, 2023
7f72438
style: apply automated linter fixes
megalinter-bot May 26, 2023
a92aeee
fixed tests
patrikguempel May 26, 2023
9670a63
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel May 26, 2023
64dbbcf
style: apply automated linter fixes
megalinter-bot May 26, 2023
5142bdb
Merge branch 'main' into 123-check-that-methods-of-table-can-handle-a…
zzril Jun 5, 2023
23b16f7
added missing tests
patrikguempel Jun 6, 2023
f9d7c46
Merge remote-tracking branch 'origin/123-check-that-methods-of-table-…
patrikguempel Jun 6, 2023
e3c4251
style: apply automated linter fixes
megalinter-bot Jun 6, 2023
f22ad59
style: apply automated linter fixes
megalinter-bot Jun 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 97 additions & 17 deletions src/safeds/data/tabular/containers/_table.py
patrikguempel marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,14 @@ def from_csv_file(path: str | Path) -> Table:
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
try:
if path.exists():
with path.open() as f:
if f.read().replace("\n", "") == "":
return Table()

return Table._from_pandas_dataframe(pd.read_csv(path))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception
else:
raise FileNotFoundError(f'File "{path}" does not exist')

@staticmethod
def from_excel_file(path: str | Path) -> Table:
Expand Down Expand Up @@ -164,10 +168,14 @@ def from_json_file(path: str | Path) -> Table:
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
try:
if path.exists():
with path.open() as f:
if f.read().replace("\n", "") in ("", "{}"):
return Table()

return Table._from_pandas_dataframe(pd.read_json(path))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception
else:
raise FileNotFoundError(f'File "{path}" does not exist')

@staticmethod
def from_dict(data: dict[str, list[Any]]) -> Table:
Expand Down Expand Up @@ -351,6 +359,8 @@ def __eq__(self, other: Any) -> bool:
return self.column_names == other.column_names
table1 = self.sort_columns()
table2 = other.sort_columns()
if table1.number_of_rows == 0 and table2.number_of_rows == 0:
return table1.column_names == table2.column_names
return table1._schema == table2._schema and table1._data.equals(table2._data)

def __repr__(self) -> str:
Expand Down Expand Up @@ -528,6 +538,44 @@ def summary(self) -> Table:
result : Table
The table with statistics.
"""
if self.number_of_columns == 0:
return Table(
{
"metrics": [
"maximum",
"minimum",
"mean",
"mode",
"median",
"sum",
"variance",
"standard deviation",
"idness",
"stability",
],
},
)
elif self.number_of_rows == 0:
table = Table(
{
"metrics": [
"maximum",
"minimum",
"mean",
"mode",
"median",
"sum",
"variance",
"standard deviation",
"idness",
"stability",
],
},
)
for name in self.column_names:
table = table.add_column(Column(name, ["-", "-", "-", "-", "-", "-", "-", "-", "-", "-"]))
return table

columns = self.to_columns()
result = pd.DataFrame()
statistics = {}
Expand Down Expand Up @@ -587,7 +635,7 @@ def add_column(self, column: Column) -> Table:
if self.has_column(column.name):
raise DuplicateColumnNameError(column.name)

if column._data.size != self.number_of_rows:
if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0:
raise ColumnSizeError(str(self.number_of_rows), str(column._data.size))

result = self._data.copy()
Expand Down Expand Up @@ -626,7 +674,7 @@ def add_columns(self, columns: list[Column] | Table) -> Table:
if column.name in result.columns:
raise DuplicateColumnNameError(column.name)

if column._data.size != self.number_of_rows:
if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0:
raise ColumnSizeError(str(self.number_of_rows), str(column._data.size))

result[column.name] = column._data
Expand All @@ -637,6 +685,7 @@ def add_row(self, row: Row) -> Table:
Add a row to the table.

This table is not modified.
If the table happens to be empty beforehand, respective features will be added automatically.

Parameters
----------
Expand All @@ -653,12 +702,27 @@ def add_row(self, row: Row) -> Table:
SchemaMismatchError
If the schema of the row does not match the table schema.
"""
if self._schema != row.schema:
int_columns = []
result = self.remove_columns([]) # clone
if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64), row.column_names))
if result.number_of_columns == 0:
for column in row.column_names:
result._data[column] = Column(column, [])
result._schema = Schema._from_pandas_dataframe(result._data)
elif result.column_names != row.column_names:
raise SchemaMismatchError
elif result._schema != row.schema:
raise SchemaMismatchError

new_df = pd.concat([self._data, row._data]).infer_objects()
new_df.columns = self.column_names
return Table._from_pandas_dataframe(new_df)
new_df = pd.concat([result._data, row._data]).infer_objects()
new_df.columns = result.column_names
result = Table._from_pandas_dataframe(new_df)

for column in int_columns:
result = result.replace_column(column, result.get_column(column).transform(lambda it: int(it)))

return result

def add_rows(self, rows: list[Row] | Table) -> Table:
"""
Expand All @@ -683,16 +747,30 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
"""
patrikguempel marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(rows, Table):
rows = rows.to_rows()
result = self._data
int_columns = []
result = self.remove_columns([]) # clone
for row in rows:
if self._schema != row.schema:
if result.number_of_rows == 0:
int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64), row.column_names))
if result.number_of_columns == 0:
for column in row.column_names:
result._data[column] = Column(column, [])
result._schema = Schema._from_pandas_dataframe(result._data)
elif result.column_names != row.column_names:
raise SchemaMismatchError
elif result._schema != row.schema:
raise SchemaMismatchError

row_frames = (row._data for row in rows)

result = pd.concat([result, *row_frames]).infer_objects()
result.columns = self.column_names
return Table._from_pandas_dataframe(result)
new_df = pd.concat([result._data, *row_frames]).infer_objects()
new_df.columns = result.column_names
result = Table._from_pandas_dataframe(new_df)

for column in int_columns:
result = result.replace_column(column, result.get_column(column).transform(lambda it: int(it)))

return result

def filter_rows(self, query: Callable[[Row], bool]) -> Table:
"""
Expand Down Expand Up @@ -1092,6 +1170,8 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]:
"""
if percentage_in_first < 0 or percentage_in_first > 1:
raise ValueError("The given percentage is not between 0 and 1")
if self.number_of_rows == 0:
return Table(), Table()
return (
self.slice_rows(0, round(percentage_in_first * self.number_of_rows)),
self.slice_rows(round(percentage_in_first * self.number_of_rows)),
Expand Down
Binary file added tests/resources/empty_excel_file.xlsx
Binary file not shown.
Empty file.
1 change: 1 addition & 0 deletions tests/resources/emptytable.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 12 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,22 @@
Column("col3", [0, -1, -2]),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2]}),
),
(
Table({}),
Column("col3", []),
Table({"col3": []}),
),
(
Table({}),
Column("col3", [1]),
Table({"col3": [1]}),
),
],
ids=["String", "Integer"],
ids=["String", "Integer", "empty with empty column", "empty with filled column"],
)
def test_should_add_column(table1: Table, column: Column, expected: Table) -> None:
table1 = table1.add_column(column)
assert table1.schema == expected.schema
# assert table1.schema == expected.schema
Marsmaennchen221 marked this conversation as resolved.
Show resolved Hide resolved
assert table1 == expected


Expand Down
14 changes: 12 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,22 @@
[Column("col3", [0, -1, -2]), Column("col4", ["a", "b", "c"])],
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
),
(
Table({}),
[Column("col3", []), Column("col4", [])],
Table({"col3": [], "col4": []}),
),
(
Table({}),
[Column("col3", [1]), Column("col4", [2])],
Table({"col3": [1], "col4": [2]}),
),
],
ids=["add 2 columns"],
ids=["add 2 columns", "empty with empty column", "empty with filled column"],
)
def test_should_add_columns(table1: Table, columns: list[Column], expected: Table) -> None:
patrikguempel marked this conversation as resolved.
Show resolved Hide resolved
table1 = table1.add_columns(columns)
assert table1.schema == expected.schema
# assert table1.schema == expected.schema
Marsmaennchen221 marked this conversation as resolved.
Show resolved Hide resolved
assert table1 == expected


Expand Down
23 changes: 16 additions & 7 deletions tests/safeds/data/tabular/containers/_table/test_add_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,30 @@


@pytest.mark.parametrize(
("table", "row"),
("table", "row", "expected"),
[
(Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Row({"col1": 5, "col2": 6})),
(
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Row({"col1": 5, "col2": 6}),
Table({"col1": [1, 2, 1, 5], "col2": [1, 2, 4, 6]}),
),
(Table({"col2": [], "col4": []}), Row({"col2": 5, "col4": 6}), Table({"col2": [5], "col4": [6]})),
(Table(), Row({"col2": 5, "col4": 6}), Table({"col2": [5], "col4": [6]})),
],
ids=["added row"],
ids=["added row", "added row to empty column", "empty row to empty table"],
patrikguempel marked this conversation as resolved.
Show resolved Hide resolved
)
def test_should_add_row(table: Table, row: Row) -> None:
def test_should_add_row(table: Table, row: Row, expected: Table) -> None:
table = table.add_row(row)
assert table.number_of_rows == 4
assert table.get_row(3) == row
assert table.schema == row._schema
assert table == expected


def test_should_raise_error_if_row_schema_invalid() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
row = Row({"col1": 5, "col2": "Hallo"})
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
table1.add_row(row)


def test_should_raise_schema_mismatch() -> None:
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_row(Row({"beer": None, "rips": None}))
15 changes: 14 additions & 1 deletion tests/safeds/data/tabular/containers/_table/test_add_rows.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
from _pytest.python_api import raises
from safeds.data.tabular.containers import Row, Table
from safeds.exceptions import SchemaMismatchError

Expand All @@ -11,8 +12,13 @@
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table({"col1": ["a", "b", "c", "d", "e"], "col2": [1, 2, 4, 6, 8]}),
),
(
Table(),
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table({"col1": ["d", "e"], "col2": [6, 8]}),
),
],
ids=["Rows with string and integer values"],
ids=["Rows with string and integer values", "empty"],
)
def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None:
patrikguempel marked this conversation as resolved.
Show resolved Hide resolved
table1 = table1.add_rows(rows)
Expand Down Expand Up @@ -42,3 +48,10 @@ def test_should_raise_error_if_row_schema_invalid() -> None:
row = [Row({"col1": 2, "col2": 4}), Row({"col1": 5, "col2": "Hallo"})]
with pytest.raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
table1.add_rows(row)


def test_should_raise_schema_mismatch() -> None:
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_rows([Row({"a": None, "b": None}), Row({"beer": None, "rips": None})])
with raises(SchemaMismatchError, match=r"Failed because at least two schemas didn't match."):
Table({"a": [], "b": []}).add_rows([Row({"beer": None, "rips": None}), Row({"a": None, "b": None})])
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
("table", "expected"),
[
(Table({"col1": [1], "col2": [1]}), ["col1", "col2"]),
(Table({"col": [], "gg": []}), ["col", "gg"]),
(Table(), []),
],
ids=["Integer", "empty"],
ids=["Integer", "rowless", "empty"],
)
def test_should_compare_column_names(table: Table, expected: list) -> None:
assert table.column_names == expected
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({"col1": Integer(), "col2": Integer()})),
),
(
Table(),
"col1",
1,
Table._from_pandas_dataframe(pd.DataFrame(), Schema({})),
),
],
ids=["filter for col1 = 1", "empty table"],
ids=["filter for col1 = 1", "no finding", "empty table"],
)
def test_should_filter_rows(table1: Table, filter_column: str, filter_value: ColumnType, table2: Table) -> None:
table1 = table1.filter_rows(lambda row: row.get_value(filter_column) == filter_value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,19 @@
from tests.helpers import resolve_resource_path


@pytest.mark.parametrize("path", ["table.csv", Path("table.csv")], ids=["by String", "by path"])
def test_should_create_table_from_csv_file(path: str | Path) -> None:
table1 = Table.from_csv_file(resolve_resource_path(path))
table2 = Table({"A": [1], "B": [2]})
assert table1.schema == table2.schema
assert table1 == table2
@pytest.mark.parametrize(
("path", "expected"),
[
("table.csv", Table({"A": [1], "B": [2]})),
(Path("table.csv"), Table({"A": [1], "B": [2]})),
("emptytable.csv", Table()),
],
ids=["by String", "by path", "empty"],
)
def test_should_create_table_from_csv_file(path: str | Path, expected: Table) -> None:
table = Table.from_csv_file(resolve_resource_path(path))
assert table.schema == expected.schema
assert table == expected


@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
},
),
),
(resolve_resource_path("./empty_excel_file.xlsx"), Table()),
],
ids=["string path", "object path"],
ids=["string path", "object path", "empty file"],
)
def test_should_create_table_from_excel_file(path: str | Path, expected: Table) -> None:
table = Table.from_excel_file(path)
Expand Down
Loading