Skip to content

Commit

Permalink
feat: improve error handling of table (#308)
Browse files Browse the repository at this point in the history
Closes #147.

### Summary of Changes

feat: Added `WrongFileExtensionError`

* Validate inputs of a function as the first step
* Raise an appropriate exception with an appropriate message if issues
are found
* Document in the docstring that the exception is raised and under which
condition
* Test that the correct exception with the correct message is raised


Co-authored-by: Marsmaennchen221
<[email protected]>

---------

Co-authored-by: Alexander Gréus <[email protected]>
Co-authored-by: megalinter-bot <[email protected]>
Co-authored-by: Alexander <[email protected]>
Co-authored-by: Philip Gutberlet <[email protected]>
  • Loading branch information
5 people authored May 25, 2023
1 parent 5c7a662 commit ef87cc4
Show file tree
Hide file tree
Showing 34 changed files with 390 additions and 111 deletions.
125 changes: 98 additions & 27 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
NonNumericColumnError,
SchemaMismatchError,
UnknownColumnNameError,
WrongFileExtensionError,
)

from ._column import Column
Expand Down Expand Up @@ -92,9 +93,12 @@ def from_csv_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not a csv file.
"""
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
try:
return Table._from_pandas_dataframe(pd.read_csv(path))
except FileNotFoundError as exception:
Expand All @@ -105,6 +109,8 @@ def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.
Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`.
Parameters
----------
path : str | Path
Expand All @@ -119,9 +125,13 @@ def from_excel_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not an Excel file.
"""
path = Path(path)
excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"]
if path.suffix not in excel_extensions:
raise WrongFileExtensionError(path, excel_extensions)
try:
return Table._from_pandas_dataframe(
pd.read_excel(path, engine="openpyxl", usecols=lambda colname: "Unnamed" not in colname),
Expand All @@ -148,9 +158,12 @@ def from_json_file(path: str | Path) -> Table:
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
WrongFileExtensionError
If the file is not a JSON file.
"""
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
try:
return Table._from_pandas_dataframe(pd.read_json(path))
except FileNotFoundError as exception:
Expand Down Expand Up @@ -197,14 +210,20 @@ def from_columns(columns: list[Column]) -> Table:
------
ColumnLengthMismatchError
If any of the column sizes does not match with the others.
DuplicateColumnNameError
If multiple columns have the same name.
"""
dataframe: DataFrame = pd.DataFrame()
column_names = []

for column in columns:
if column._data.size != columns[0]._data.size:
raise ColumnLengthMismatchError(
"\n".join(f"{column.name}: {column._data.size}" for column in columns),
)
if column.name in column_names:
raise DuplicateColumnNameError(column.name)
column_names.append(column.name)
dataframe[column.name] = column._data

return Table._from_pandas_dataframe(dataframe)
Expand Down Expand Up @@ -328,6 +347,8 @@ def __eq__(self, other: Any) -> bool:
return NotImplemented
if self is other:
return True
if self.number_of_rows == 0 and other.number_of_rows == 0:
return self.column_names == other.column_names
table1 = self.sort_columns()
table2 = other.sort_columns()
return table1._schema == table2._schema and table1._data.equals(table2._data)
Expand Down Expand Up @@ -463,7 +484,7 @@ def get_column_type(self, column_name: str) -> ColumnType:
Raises
------
ColumnNameError
UnknownColumnNameError
If the specified target column name does not exist.
"""
return self._schema.get_column_type(column_name)
Expand Down Expand Up @@ -627,6 +648,10 @@ def add_row(self, row: Row) -> Table:
table : Table
A new table with the added row at the end.
Raises
------
SchemaMismatchError
If the schema of the row does not match the table schema.
"""
if self._schema != row.schema:
raise SchemaMismatchError
Expand All @@ -650,6 +675,11 @@ def add_rows(self, rows: list[Row] | Table) -> Table:
-------
result : Table
A new table which combines the original table and the given rows.
Raises
------
SchemaMismatchError
If the schema of on of the row does not match the table schema.
"""
if isinstance(rows, Table):
rows = rows.to_rows()
Expand Down Expand Up @@ -705,7 +735,7 @@ def keep_only_columns(self, column_names: list[str]) -> Table:
Raises
------
ColumnNameError
UnknownColumnNameError
If any of the given columns does not exist.
"""
invalid_columns = []
Expand Down Expand Up @@ -737,7 +767,7 @@ def remove_columns(self, column_names: list[str]) -> Table:
Raises
------
ColumnNameError
UnknownColumnNameError
If any of the given columns does not exist.
"""
invalid_columns = []
Expand Down Expand Up @@ -851,7 +881,7 @@ def rename_column(self, old_name: str, new_name: str) -> Table:
Raises
------
ColumnNameError
UnknownColumnNameError
If the specified old target column name does not exist.
DuplicateColumnNameError
If the specified new target column name already exists.
Expand Down Expand Up @@ -960,7 +990,7 @@ def slice_rows(
Raises
------
ValueError
IndexOutOfBoundsError
If the index is out of bounds.
"""
if start is None:
Expand All @@ -969,8 +999,10 @@ def slice_rows(
if end is None:
end = self.number_of_rows

if start < 0 or end < 0 or start >= self.number_of_rows or end > self.number_of_rows or end < start:
raise ValueError("The given index is out of bounds")
if end < start:
raise IndexOutOfBoundsError(slice(start, end))
if start < 0 or end < 0 or start > self.number_of_rows or end > self.number_of_rows:
raise IndexOutOfBoundsError(start if start < 0 or start > self.number_of_rows else end)

new_df = self._data.iloc[start:end:step]
new_df.columns = self._schema.column_names
Expand Down Expand Up @@ -1053,10 +1085,13 @@ def split(self, percentage_in_first: float) -> tuple[Table, Table]:
A tuple containing the two resulting tables. The first table has the specified size, the second table
contains the rest of the data.
Raises
------
ValueError:
if the 'percentage_in_first' is not between 0 and 1
"""
if percentage_in_first <= 0 or percentage_in_first >= 1:
raise ValueError("the given percentage is not in range")
if percentage_in_first < 0 or percentage_in_first > 1:
raise ValueError("The given percentage is not between 0 and 1")
return (
self.slice_rows(0, round(percentage_in_first * self.number_of_rows)),
self.slice_rows(round(percentage_in_first * self.number_of_rows)),
Expand All @@ -1079,6 +1114,13 @@ def tag_columns(self, target_name: str, feature_names: list[str] | None = None)
-------
tagged_table : TaggedTable
A new tagged table with the given target and feature names.
Raises
------
ValueError
If the target column is also a feature column.
ValueError
If no feature columns are specified.
"""
from ._tagged_table import TaggedTable

Expand Down Expand Up @@ -1241,10 +1283,11 @@ def plot_lineplot(self, x_column_name: str, y_column_name: str) -> Image:
UnknownColumnNameError
If either of the columns do not exist.
"""
if not self.has_column(x_column_name):
raise UnknownColumnNameError([x_column_name])
if not self.has_column(y_column_name):
raise UnknownColumnNameError([y_column_name])
if not self.has_column(x_column_name) or not self.has_column(y_column_name):
raise UnknownColumnNameError(
([x_column_name] if not self.has_column(x_column_name) else [])
+ ([y_column_name] if not self.has_column(y_column_name) else []),
)

fig = plt.figure()
ax = sns.lineplot(
Expand Down Expand Up @@ -1288,10 +1331,11 @@ def plot_scatterplot(self, x_column_name: str, y_column_name: str) -> Image:
UnknownColumnNameError
If either of the columns do not exist.
"""
if not self.has_column(x_column_name):
raise UnknownColumnNameError([x_column_name])
if not self.has_column(y_column_name):
raise UnknownColumnNameError([y_column_name])
if not self.has_column(x_column_name) or not self.has_column(y_column_name):
raise UnknownColumnNameError(
([x_column_name] if not self.has_column(x_column_name) else [])
+ ([y_column_name] if not self.has_column(y_column_name) else []),
)

fig = plt.figure()
ax = sns.scatterplot(
Expand Down Expand Up @@ -1399,8 +1443,16 @@ def to_csv_file(self, path: str | Path) -> None:
----------
path : str | Path
The path to the output file.
Raises
------
WrongFileExtensionError
If the file is not a csv file.
"""
Path(path).parent.mkdir(parents=True, exist_ok=True)
path = Path(path)
if path.suffix != ".csv":
raise WrongFileExtensionError(path, ".csv")
path.parent.mkdir(parents=True, exist_ok=True)
data_to_csv = self._data.copy()
data_to_csv.columns = self._schema.column_names
data_to_csv.to_csv(path, index=False)
Expand All @@ -1409,19 +1461,30 @@ def to_excel_file(self, path: str | Path) -> None:
"""
Write the data from the table into an Excel file.
Valid file extensions are `.xls`, '.xlsx', `.xlsm`, `.xlsb`, `.odf`, `.ods` and `.odt`.
If the file and/or the directories do not exist, they will be created. If the file already exists, it will be
overwritten.
Parameters
----------
path : str | Path
The path to the output file.
Raises
------
WrongFileExtensionError
If the file is not an Excel file.
"""
path = Path(path)
excel_extensions = [".xls", ".xlsx", ".xlsm", ".xlsb", ".odf", ".ods", ".odt"]
if path.suffix not in excel_extensions:
raise WrongFileExtensionError(path, excel_extensions)

# Create Excel metadata in the file
tmp_table_file = openpyxl.Workbook()
tmp_table_file.save(path)

Path(path).parent.mkdir(parents=True, exist_ok=True)
path.parent.mkdir(parents=True, exist_ok=True)
data_to_excel = self._data.copy()
data_to_excel.columns = self._schema.column_names
data_to_excel.to_excel(path)
Expand All @@ -1437,8 +1500,16 @@ def to_json_file(self, path: str | Path) -> None:
----------
path : str | Path
The path to the output file.
Raises
------
WrongFileExtensionError
If the file is not a JSON file.
"""
Path(path).parent.mkdir(parents=True, exist_ok=True)
path = Path(path)
if path.suffix != ".json":
raise WrongFileExtensionError(path, ".json")
path.parent.mkdir(parents=True, exist_ok=True)
data_to_json = self._data.copy()
data_to_json.columns = self._schema.column_names
data_to_json.to_json(path)
Expand Down
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def get_column_type(self, column_name: str) -> ColumnType:
Raises
------
ColumnNameError
UnknownColumnNameError
If the specified column name does not exist.
Examples
Expand Down
2 changes: 2 additions & 0 deletions src/safeds/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TransformerNotFittedError,
UnknownColumnNameError,
ValueNotPresentWhenFittedError,
WrongFileExtensionError,
)
from safeds.exceptions._ml import (
DatasetContainsTargetError,
Expand All @@ -31,6 +32,7 @@
"TransformerNotFittedError",
"UnknownColumnNameError",
"ValueNotPresentWhenFittedError",
"WrongFileExtensionError",
# ML exceptions
"DatasetContainsTargetError",
"DatasetMissesFeaturesError",
Expand Down
22 changes: 21 additions & 1 deletion src/safeds/exceptions/_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path


class UnknownColumnNameError(KeyError):
"""
Expand Down Expand Up @@ -47,7 +52,10 @@ class IndexOutOfBoundsError(IndexError):
"""

def __init__(self, index: int | slice):
super().__init__(f"There is no element at index '{index}'.")
if isinstance(index, int):
super().__init__(f"There is no element at index '{index}'.")
else:
super().__init__(f"There is no element in the range [{index.start}, {index.stop}]")


class ColumnSizeError(Exception):
Expand Down Expand Up @@ -92,3 +100,15 @@ class ValueNotPresentWhenFittedError(Exception):

def __init__(self, value: str, column: str) -> None:
super().__init__(f"Value not present in the table the transformer was fitted on: \n{value} in column {column}.")


class WrongFileExtensionError(Exception):
"""Exception raised when the file has the wrong file extension."""

def __init__(self, file: str | Path, file_extension: str | list[str]) -> None:
super().__init__(
(
f"The file {file} has a wrong file extension. Please provide a file with the following extension(s):"
f" {file_extension}"
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def test_should_add_column(table1: Table, column: Column, expected: Table) -> No

def test_should_raise_error_if_column_name_exists() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(DuplicateColumnNameError):
with pytest.raises(DuplicateColumnNameError, match=r"Column 'col1' already exists."):
table1.add_column(Column("col1", ["a", "b", "c"]))


def test_should_raise_error_if_column_size_invalid() -> None:
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(ColumnSizeError):
with pytest.raises(ColumnSizeError, match=r"Expected a column of size 3 but got column of size 4."):
table1.add_column(Column("col3", ["a", "b", "c", "d"]))
Loading

0 comments on commit ef87cc4

Please sign in to comment.