Skip to content

Commit

Permalink
added error to Imputer_Mode when there are multiple most frequent val…
Browse files Browse the repository at this point in the history
…ues in a Column
  • Loading branch information
Gerhardsa0 committed Jan 31, 2023
1 parent 848252b commit 133d95c
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from typing import Any, Optional

import pandas as pd
from safeds.data.tabular import Table
from sklearn.impute import SimpleImputer

from safeds.data.tabular import Table, ColumnStatistics


class ImputerStrategy(ABC):
@abstractmethod
Expand Down Expand Up @@ -86,6 +87,11 @@ def fit(self, table: Table, column_names: Optional[list[str]] = None) -> None:
if column_names is None:
column_names = table.schema.get_column_names()

if self._imp.strategy == "most_frequent":
for name in column_names:
if 1 < len(ColumnStatistics(table.get_column(name)).mode()):
raise IndexError("There are multiple frequent values in a column given for the Imputer")

self._column_names = column_names
indices = [
table.schema._get_column_index_by_name(name) for name in self._column_names
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
import pandas as pd
import pytest

from safeds.data.tabular import Table
from safeds.data.tabular.transformation import Imputer

Expand Down Expand Up @@ -31,6 +33,13 @@ def test_imputer_mode() -> None:
assert new_table.get_column("col1")._data[0] == column.statistics.mode()[0]


def test_imputer_mode_invalid() -> None:
table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
imp = Imputer(Imputer.Strategy.Mode())
with pytest.raises(IndexError):
imp.fit_transform(table)


def test_imputer_constant() -> None:
table = Table(pd.DataFrame(data={"col1": [np.nan, 2, 3, 4, 5]}))
imp = Imputer(Imputer.Strategy.Constant(0))
Expand Down

0 comments on commit 133d95c

Please sign in to comment.