From dbbe0e3a6ebd2f22c386c13a3cc53184e531f837 Mon Sep 17 00:00:00 2001 From: Alex Senger <91055000+alex-senger@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:23:03 +0200 Subject: [PATCH] feat: improve `table.summary`. Catch `ValueError` thrown by `column.stability` (#390) Closes #320 ### Summary of Changes The stability of a column with only null values should just be skipped and displayed in the returned table as a minus: - --------- Co-authored-by: patrikguempel <128832338+patrikguempel@users.noreply.github.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- src/safeds/data/tabular/containers/_column.py | 6 +++++ src/safeds/data/tabular/containers/_table.py | 2 +- .../containers/_column/test_stability.py | 10 +++++++-- .../tabular/containers/_table/test_summary.py | 22 ++++++++++++++++++- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py index ae42ace06..a4f198f06 100644 --- a/src/safeds/data/tabular/containers/_column.py +++ b/src/safeds/data/tabular/containers/_column.py @@ -504,6 +504,8 @@ def stability(self) -> float: \frac{\text{number of occurrences of most common non-null value}}{\text{number of non-null values}} $$ + The stability cannot be calculated for a column with only null values. + Returns ------- stability : float @@ -516,6 +518,10 @@ def stability(self) -> float: """ if self._data.size == 0: raise ColumnSizeError("> 0", "0") + + if self.all(lambda x: x is None): + raise ValueError("Stability cannot be calculated for a column with only null values.") + return self._data.value_counts()[self.mode()[0]] / self._data.count() def standard_deviation(self) -> float: diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index b62423cab..e07117b3c 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -600,7 +600,7 @@ def summary(self) -> Table: for function in statistics.values(): try: values.append(str(function())) - except NonNumericColumnError: + except (NonNumericColumnError, ValueError): values.append("-") result = pd.concat([result, pd.DataFrame(values)], axis=1) diff --git a/tests/safeds/data/tabular/containers/_column/test_stability.py b/tests/safeds/data/tabular/containers/_column/test_stability.py index d1bb893d2..7de0050d9 100644 --- a/tests/safeds/data/tabular/containers/_column/test_stability.py +++ b/tests/safeds/data/tabular/containers/_column/test_stability.py @@ -25,7 +25,13 @@ def test_should_return_stability_of_column(values: list[Any], expected: float) - assert column.stability() == expected -def test_should_raise_if_column_is_empty() -> None: +def test_should_raise_column_size_error_if_column_is_empty() -> None: column: Column[Any] = Column("A", []) - with pytest.raises(ColumnSizeError): + with pytest.raises(ColumnSizeError, match="Expected a column of size > 0 but got column of size 0."): + column.stability() + + +def test_should_raise_value_error_if_column_contains_only_none() -> None: + column: Column[Any] = Column("A", [None, None]) + with pytest.raises(ValueError, match="Stability cannot be calculated for a column with only null values."): column.stability() diff --git a/tests/safeds/data/tabular/containers/_table/test_summary.py b/tests/safeds/data/tabular/containers/_table/test_summary.py index 460bc287c..f4abaa988 100644 --- a/tests/safeds/data/tabular/containers/_table/test_summary.py +++ b/tests/safeds/data/tabular/containers/_table/test_summary.py @@ -112,8 +112,28 @@ }, ), ), + ( + Table({"col": [None, None]}), + Table( + { + "metrics": [ + "maximum", + "minimum", + "mean", + "mode", + "median", + "sum", + "variance", + "standard deviation", + "idness", + "stability", + ], + "col": ["-", "-", "-", "[]", "-", "-", "-", "-", "0.0", "-"], + }, + ), + ), ], - ids=["Column of integers and Column of characters", "empty", "empty with columns"], + ids=["Column of integers and Column of characters", "empty", "empty with columns", "Column of None"], ) def test_should_make_summary(table: Table, expected: Table) -> None: assert expected.schema == table.summary().schema