Skip to content

Commit

Permalink
feat: convert between a dict and a Table (#198)
Browse files Browse the repository at this point in the history
Closes #197.

### Summary of Changes

Added two new methods to `Table`:
* `from_dict` can create a `Table` from a `dict`
* `to_dict` can convert a `Table` to a `dict`

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lars-reimann and megalinter-bot authored Apr 17, 2023
1 parent 459ab75 commit 2a5089e
Show file tree
Hide file tree
Showing 62 changed files with 676 additions and 612 deletions.
49 changes: 48 additions & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,42 @@ def from_json_file(path: str) -> Table:
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception

@staticmethod
def from_dict(data: dict[str, list[Any]]) -> Table:
"""
Create a table from a dictionary that maps column names to column values.
Parameters
----------
data : dict[str, list[Any]]
The data.
Returns
-------
table : Table
The generated table.
Raises
------
ColumnLengthMismatchError
If columns have different lengths.
"""
# Validation
expected_length: int | None = None
for column_values in data.values():
if expected_length is None:
expected_length = len(column_values)
elif len(column_values) != expected_length:
raise ColumnLengthMismatchError(
"\n".join(f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()),
)

# Implementation
dataframe: DataFrame = pd.DataFrame()
for column_name, column_values in data.items():
dataframe[column_name] = column_values
return Table(dataframe)

@staticmethod
def from_columns(columns: list[Column]) -> Table:
"""
Expand All @@ -137,7 +173,7 @@ def from_columns(columns: list[Column]) -> Table:
for column in columns:
if column._data.size != columns[0]._data.size:
raise ColumnLengthMismatchError(
"\n".join([f"{column.name}: {column._data.size}" for column in columns]),
"\n".join(f"{column.name}: {column._data.size}" for column in columns),
)
dataframe[column.name] = column._data

Expand Down Expand Up @@ -1103,6 +1139,17 @@ def to_json_file(self, path: str) -> None:
data_to_json.columns = self._schema.get_column_names()
data_to_json.to_json(path)

def to_dict(self) -> dict[str, list[Any]]:
"""
Return a dictionary that maps column names to column values.
Returns
-------
data : dict[str, list[Any]]
Dictionary representation of the table.
"""
return {column_name: list(self.get_column(column_name)) for column_name in self.get_column_names()}

def to_columns(self) -> list[Column]:
"""
Return a list of the columns.
Expand Down
3 changes: 3 additions & 0 deletions src/safeds/data/tabular/exceptions/_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from __future__ import annotations


class UnknownColumnNameError(KeyError):
"""
Exception raised for trying to access an invalid column name.
Expand Down
9 changes: 4 additions & 5 deletions tests/safeds/data/tabular/containers/_column/test_boxplot.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
import _pytest
import matplotlib.pyplot as plt
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.exceptions import NonNumericColumnError


def test_boxplot_complex() -> None:
with pytest.raises(TypeError): # noqa: PT012
table = Table(pd.DataFrame(data={"A": [1, 2, complex(1, -2)]}))
table = Table.from_dict({"A": [1, 2, complex(1, -2)]})
table.get_column("A").boxplot()


def test_boxplot_non_numeric() -> None:
table = Table(pd.DataFrame(data={"A": [1, 2, "A"]}))
table = Table.from_dict({"A": [1, 2, "A"]})
with pytest.raises(NonNumericColumnError):
table.get_column("A").boxplot()


def test_boxplot_float(monkeypatch: _pytest.monkeypatch) -> None:
monkeypatch.setattr(plt, "show", lambda: None)
table = Table(pd.DataFrame(data={"A": [1, 2, 3.5]}))
table = Table.from_dict({"A": [1, 2, 3.5]})
table.get_column("A").boxplot()


def test_boxplot_int(monkeypatch: _pytest.monkeypatch) -> None:
monkeypatch.setattr(plt, "show", lambda: None)
table = Table(pd.DataFrame(data={"A": [1, 2, 3]}))
table = Table.from_dict({"A": [1, 2, 3]})
table.get_column("A").boxplot()
9 changes: 4 additions & 5 deletions tests/safeds/data/tabular/containers/_column/test_column.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import pandas as pd
from safeds.data.tabular.containers import Column


def test_from_columns() -> None:
column1 = Column("A", pd.Series([1, 4]))
column2 = Column("B", pd.Series([2, 5]))
column1 = Column("A", [1, 4])
column2 = Column("B", [2, 5])

assert column1._type == column2._type


def test_from_columns_negative() -> None:
column1 = Column("A", pd.Series([1, 4]))
column2 = Column("B", pd.Series(["2", "5"]))
column1 = Column("A", [1, 4])
column2 = Column("B", ["2", "5"])

assert column1._type != column2._type
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
import pandas as pd
from safeds.data.tabular.containers import Column


def test_column_property_all_positive() -> None:
column = Column("col1", pd.Series([1, 1, 1]))
column = Column("col1", [1, 1, 1])
assert column.all(lambda value: value == 1)


def test_column_property_all_negative() -> None:
column = Column("col1", pd.Series([1, 2, 1]))
column = Column("col1", [1, 2, 1])
assert not column.all(lambda value: value == 1)


def test_column_property_any_positive() -> None:
column = Column("col1", pd.Series([1, 2, 1]))
column = Column("col1", [1, 2, 1])
assert column.any(lambda value: value == 1)


def test_column_property_any_negative() -> None:
column = Column("col1", pd.Series([1, 2, 1]))
column = Column("col1", [1, 2, 1])
assert not column.any(lambda value: value == 3)


def test_column_property_none_positive() -> None:
column = Column("col1", pd.Series([1, 2, 1]))
column = Column("col1", [1, 2, 1])
assert column.none(lambda value: value == 3)


def test_column_property_none_negative() -> None:
column = Column("col1", pd.Series([1, 2, 1]))
column = Column("col1", [1, 2, 1])
assert not column.none(lambda value: value == 1)
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Column
from safeds.data.tabular.exceptions import ColumnLengthMismatchError, NonNumericColumnError


def test_correlation_with() -> None:
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series([2, 3, 4, 5]))
column1 = Column("A", [1, 2, 3, 4])
column2 = Column("B", [2, 3, 4, 5])
actual_corr = column1.correlation_with(column2)
expected_corr = column1._data.corr(column2._data)
assert actual_corr == expected_corr


def test_correlation_with_raises_if_column_is_not_numeric() -> None:
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series(["a", "b", "c", "d"]))
column1 = Column("A", [1, 2, 3, 4])
column2 = Column("B", ["a", "b", "c", "d"])
with pytest.raises(NonNumericColumnError):
column1.correlation_with(column2)


def test_correlation_with_raises_if_column_lengths_differ() -> None:
column1 = Column("A", pd.Series([1, 2, 3, 4]))
column2 = Column("B", pd.Series([2]))
column1 = Column("A", [1, 2, 3, 4])
column2 = Column("B", [2])
with pytest.raises(ColumnLengthMismatchError):
column1.correlation_with(column2)
12 changes: 9 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_count.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pytest
from safeds.data.tabular.containers import Column


def test_count_valid() -> None:
column = Column("col1", [1, 2, 3, 4, 5])
assert column.count() == 5
@pytest.mark.parametrize(
("column", "expected"),
[
(Column("col1", [1, 2, 3, 4, 5]), 5),
],
)
def test_count_valid(column: Column, expected: int) -> None:
assert column.count() == expected
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import numpy as np
import pandas as pd
from safeds.data.tabular.containers import Table


def test_count_null_values_valid() -> None:
table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4, 5], "col2": [None, None, 1, np.nan, np.nan]}))
empty_table = Table(pd.DataFrame(data={"col1": []}))
table = Table.from_dict({"col1": [1, 2, 3, 4, 5], "col2": [None, None, 1, np.nan, np.nan]})
empty_table = Table.from_dict({"col1": []})
column1 = table.get_column("col1")
column2 = table.get_column("col2")
empty_column = empty_table.get_column("col1")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import typing
from __future__ import annotations

from typing import TYPE_CHECKING

import pytest
from safeds.data.tabular.containers import Column

if TYPE_CHECKING:
from typing import Any


@pytest.mark.parametrize(
("values", "unique_values"),
[([1, 1, 2, 3], [1, 2, 3]), (["a", "b", "b", "c"], ["a", "b", "c"]), ([], [])],
)
def test_get_unique_values(values: list[typing.Any], unique_values: list[typing.Any]) -> None:
def test_get_unique_values(values: list[Any], unique_values: list[Any]) -> None:
column: Column = Column("", values)
extracted_unique_values: list[typing.Any] = column.get_unique_values()
extracted_unique_values = column.get_unique_values()

assert extracted_unique_values == unique_values
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Column
from safeds.data.tabular.exceptions import IndexOutOfBoundsError


def test_get_value_valid() -> None:
column = Column("testColumn", pd.Series([0, "1"]))
column = Column("testColumn", [0, "1"])
assert column.get_value(0) == 0
assert column.get_value(1) == "1"


def test_get_value_invalid() -> None:
column = Column("testColumn", pd.Series([0, "1"]))
column = Column("testColumn", [0, "1"])
with pytest.raises(IndexOutOfBoundsError):
column.get_value(-1)

Expand Down
5 changes: 2 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_getitem.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Column
from safeds.data.tabular.exceptions import IndexOutOfBoundsError


def test_getitem_valid() -> None:
column = Column("testColumn", pd.Series([0, "1"]))
column = Column("testColumn", [0, "1"])
assert column[0] == 0
assert column[1] == "1"


# noinspection PyStatementEffect
def test_getitem_invalid() -> None:
column = Column("testColumn", pd.Series([0, "1"]))
column = Column("testColumn", [0, "1"])
with pytest.raises(IndexOutOfBoundsError):
column[-1]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import numpy as np
import pandas as pd
import pytest
from safeds.data.tabular.containers import Column

Expand All @@ -15,7 +13,7 @@
)
def test_has_missing_values(values: list, expected: bool) -> None:
if len(values) == 0:
column = Column("A", pd.Series(values, dtype=np.dtype("float64")))
column = Column("A", values)
else:
column = Column("A", pd.Series(values))
column = Column("A", values)
assert column.has_missing_values() == expected
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import _pytest
import matplotlib.pyplot as plt
import pandas as pd
from safeds.data.tabular.containers import Table


def test_histogram(monkeypatch: _pytest.monkeypatch) -> None:
monkeypatch.setattr(plt, "show", lambda: None)
table = Table(pd.DataFrame(data={"A": [1, 2, 3]}))
table = Table.from_dict({"A": [1, 2, 3]})
table.get_column("A").histogram()
5 changes: 2 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_idness.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Column
from safeds.data.tabular.exceptions import ColumnSizeError
Expand All @@ -9,12 +8,12 @@
[(["A", "B"], 1), (["A", "A", "A", "B"], 0.5)],
)
def test_idness_valid(values: list[str], result: float) -> None:
column: Column = Column("test_idness_valid", pd.Series(values))
column = Column("test_idness_valid", values)
idness = column.idness()
assert idness == result


def test_idness_invalid() -> None:
column = Column("test_idness_invalid", pd.Series([], dtype=int))
column = Column("test_idness_invalid", [])
with pytest.raises(ColumnSizeError):
column.idness()
5 changes: 2 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_maximum.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.exceptions import NonNumericColumnError


def test_maximum_invalid() -> None:
table = Table(pd.DataFrame(data={"col1": ["col1_1", 2]}))
table = Table.from_dict({"col1": ["col1_1", 2]})
column = table.get_column("col1")
with pytest.raises(NonNumericColumnError):
column.maximum()


def test_maximum_valid() -> None:
table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4]}))
table = Table.from_dict({"col1": [1, 2, 3, 4]})
column = table.get_column("col1")
assert column.maximum() == 4
5 changes: 2 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_mean.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import pandas as pd
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.exceptions import NonNumericColumnError


def test_mean_invalid() -> None:
table = Table(pd.DataFrame(data={"col1": ["col1_1", 2]}))
table = Table.from_dict({"col1": ["col1_1", 2]})
column = table.get_column("col1")
with pytest.raises(NonNumericColumnError):
column.mean()


def test_mean_valid() -> None:
table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4]}))
table = Table.from_dict({"col1": [1, 2, 3, 4]})
column = table.get_column("col1")
assert column.mean() == 2.5
Loading

0 comments on commit 2a5089e

Please sign in to comment.