diff --git a/Runtime/safe-ds/safeds/data/tabular/_column.py b/Runtime/safe-ds/safeds/data/tabular/_column.py index c19433ed7..3d9c7ac85 100644 --- a/Runtime/safe-ds/safeds/data/tabular/_column.py +++ b/Runtime/safe-ds/safeds/data/tabular/_column.py @@ -331,10 +331,10 @@ def mode(self) -> Any: Returns ------- - mode : - The mode value. + List : + Returns a list with the most common values. """ - return self._column._data.mode()[0] + return self._column._data.mode().tolist() def median(self) -> float: """ @@ -443,7 +443,7 @@ def stability(self) -> float: if self._column._data.size == 0: raise ColumnSizeError("> 0", "0") return ( - self._column._data.value_counts()[self._column.statistics.mode()] + self._column._data.value_counts()[self._column.statistics.mode()[0]] / self._column._data.count() ) diff --git a/Runtime/safe-ds/safeds/data/tabular/transformation/_imputer.py b/Runtime/safe-ds/safeds/data/tabular/transformation/_imputer.py index 2e4e8d8ea..2fcb76570 100644 --- a/Runtime/safe-ds/safeds/data/tabular/transformation/_imputer.py +++ b/Runtime/safe-ds/safeds/data/tabular/transformation/_imputer.py @@ -4,7 +4,7 @@ from typing import Any, Optional import pandas as pd -from safeds.data.tabular import Table +from safeds.data.tabular import ColumnStatistics, Table from sklearn.impute import SimpleImputer @@ -86,6 +86,13 @@ def fit(self, table: Table, column_names: Optional[list[str]] = None) -> None: if column_names is None: column_names = table.schema.get_column_names() + if self._imp.strategy == "most_frequent": + for name in column_names: + if 1 < len(ColumnStatistics(table.get_column(name)).mode()): + raise IndexError( + "There are multiple frequent values in a column given for the Imputer" + ) + self._column_names = column_names indices = [ table.schema._get_column_index_by_name(name) for name in self._column_names diff --git a/Runtime/safe-ds/tests/data/tabular/_column/test_mode.py b/Runtime/safe-ds/tests/data/tabular/_column/test_mode.py index fd6fc1bd2..721af5073 100644 --- a/Runtime/safe-ds/tests/data/tabular/_column/test_mode.py +++ b/Runtime/safe-ds/tests/data/tabular/_column/test_mode.py @@ -5,10 +5,16 @@ def test_mode_valid() -> None: table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4, 3]})) column = table.get_column("col1") - assert column.statistics.mode() == 3 + assert column.statistics.mode() == [3] def test_mode_valid_str() -> None: table = Table(pd.DataFrame(data={"col1": ["1", "2", "3", "4", "3"]})) column = table.get_column("col1") - assert column.statistics.mode() == "3" + assert column.statistics.mode() == ["3"] + + +def test_mode_valid_list() -> None: + table = Table(pd.DataFrame(data={"col1": ["1", "4", "3", "4", "3"]})) + column = table.get_column("col1") + assert column.statistics.mode() == ["3", "4"] diff --git a/Runtime/safe-ds/tests/data/tabular/_table/test_summary.py b/Runtime/safe-ds/tests/data/tabular/_table/test_summary.py index 823a47c14..b7b4e8cd2 100644 --- a/Runtime/safe-ds/tests/data/tabular/_table/test_summary.py +++ b/Runtime/safe-ds/tests/data/tabular/_table/test_summary.py @@ -25,7 +25,7 @@ def test_summary() -> None: "2", "1", str(4.0 / 3), - "1", + "[1]", "1.0", "4", str(1.0 / 3), @@ -38,7 +38,7 @@ def test_summary() -> None: "-", "-", "-", - "a", + "['a', 'b', 'c']", "-", "-", "-", @@ -50,5 +50,4 @@ def test_summary() -> None: } ) ) - assert truth == table.summary() diff --git a/Runtime/safe-ds/tests/data/tabular/transformation/_imputer/test_imputer.py b/Runtime/safe-ds/tests/data/tabular/transformation/_imputer/test_imputer.py index ba56e9338..6fe2e0723 100644 --- a/Runtime/safe-ds/tests/data/tabular/transformation/_imputer/test_imputer.py +++ b/Runtime/safe-ds/tests/data/tabular/transformation/_imputer/test_imputer.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import pytest from safeds.data.tabular import Table from safeds.data.tabular.transformation import Imputer @@ -23,12 +24,19 @@ def test_imputer_median() -> None: def test_imputer_mode() -> None: - table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]})) + table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 2, 4, 5]})) column = table.get_column("col1") imp = Imputer(Imputer.Strategy.Mode()) new_table = imp.fit_transform(table) - assert new_table.get_column("col1")._data[0] == column.statistics.mode() + assert new_table.get_column("col1")._data[0] == column.statistics.mode()[0] + + +def test_imputer_mode_invalid() -> None: + table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]})) + imp = Imputer(Imputer.Strategy.Mode()) + with pytest.raises(IndexError): + imp.fit_transform(table) def test_imputer_constant() -> None: