diff --git a/poetry.lock b/poetry.lock index 95999ca74..9eb14e49a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "anyio" @@ -617,6 +617,18 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + [[package]] name = "exceptiongroup" version = "1.1.1" @@ -1936,6 +1948,21 @@ files = [ {file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"}, ] +[[package]] +name = "openpyxl" +version = "3.1.2" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "openpyxl-3.1.2-py2.py3-none-any.whl", hash = "sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5"}, + {file = "openpyxl-3.1.2.tar.gz", hash = "sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184"}, +] + +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "23.0" @@ -3272,4 +3299,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "b68a97800116202f4d49e76bb6d02224897e69c04827626f0605ac086c479e1b" +content-hash = "b85a191f5e4d210385b0bba9aca6296350f32987c0017b3a48a9c3c350a6b0cb" diff --git a/pyproject.toml b/pyproject.toml index c7441ad37..26c3f3385 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ pandas = "^2.0.0" pillow = "^9.5.0" scikit-learn = "^1.2.0" seaborn = "^0.12.2" +openpyxl = "^3.1.2" [tool.poetry.group.dev.dependencies] pytest = "^7.2.1" diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 1ee389aa1..015afe946 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt import numpy as np +import openpyxl import pandas as pd import seaborn as sns from pandas import DataFrame @@ -84,6 +85,33 @@ def from_csv_file(path: str | Path) -> Table: except FileNotFoundError as exception: raise FileNotFoundError(f'File "{path}" does not exist') from exception + @staticmethod + def from_excel_file(path: str | Path) -> Table: + """ + Read data from an Excel file into a table. + + Parameters + ---------- + path : str | Path + The path to the Excel file. + + Returns + ------- + table : Table + The table created from the Excel file. + + Raises + ------ + FileNotFoundError + If the specified file does not exist. + ValueError + If the file could not be read. + """ + try: + return Table(pd.read_excel(path, engine="openpyxl", usecols=lambda colname: "Unnamed" not in colname)) + except FileNotFoundError as exception: + raise FileNotFoundError(f'File "{path}" does not exist') from exception + @staticmethod def from_json_file(path: str | Path) -> Table: """ @@ -1242,6 +1270,27 @@ def to_csv_file(self, path: str | Path) -> None: data_to_csv.columns = self._schema.column_names data_to_csv.to_csv(path, index=False) + def to_excel_file(self, path: str | Path) -> None: + """ + Write the data from the table into an Excel file. + + If the file and/or the directories do not exist, they will be created. If the file already exists, it will be + overwritten. + + Parameters + ---------- + path : str | Path + The path to the output file. + """ + # Create Excel metadata in the file + tmp_table_file = openpyxl.Workbook() + tmp_table_file.save(path) + + Path(path).parent.mkdir(parents=True, exist_ok=True) + data_to_excel = self._data.copy() + data_to_excel.columns = self._schema.column_names + data_to_excel.to_excel(path) + def to_json_file(self, path: str | Path) -> None: """ Write the data from the table into a JSON file. diff --git a/tests/resources/dummy_excel_file.xlsx b/tests/resources/dummy_excel_file.xlsx new file mode 100644 index 000000000..bf39ce52b Binary files /dev/null and b/tests/resources/dummy_excel_file.xlsx differ diff --git a/tests/safeds/data/tabular/containers/_table/test_from_excel_file.py b/tests/safeds/data/tabular/containers/_table/test_from_excel_file.py new file mode 100644 index 000000000..6a890b18e --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_from_excel_file.py @@ -0,0 +1,40 @@ +from pathlib import Path + +import pytest +from safeds.data.tabular.containers import Table + +from tests.helpers import resolve_resource_path + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ( + resolve_resource_path("./dummy_excel_file.xlsx"), + Table.from_dict( + { + "A": [1], + "B": [2], + }, + ), + ), + ( + Path(resolve_resource_path("./dummy_excel_file.xlsx")), + Table.from_dict( + { + "A": [1], + "B": [2], + }, + ), + ), + ], + ids=["string path", "object path"], +) +def test_should_create_table_from_excel_file(path: str | Path, expected: Table) -> None: + table = Table.from_excel_file(path) + assert table == expected + + +def test_should_raise_if_file_not_found() -> None: + with pytest.raises(FileNotFoundError): + Table.from_excel_file(resolve_resource_path("test_table_from_excel_file_invalid.xls")) diff --git a/tests/safeds/data/tabular/containers/_table/test_to_excel_file.py b/tests/safeds/data/tabular/containers/_table/test_to_excel_file.py new file mode 100644 index 000000000..d3f44f1e8 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_to_excel_file.py @@ -0,0 +1,26 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile + +from safeds.data.tabular.containers import Table + + +def test_should_create_csv_file_from_table_by_str() -> None: + table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]}) + with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file: + tmp_table_file.close() + with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file: + table.to_excel_file(tmp_file.name) + with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file: + table_r = Table.from_excel_file(tmp_file.name) + assert table == table_r + + +def test_should_create_csv_file_from_table_by_path() -> None: + table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]}) + with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file: + tmp_table_file.close() + with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file: + table.to_excel_file(Path(tmp_file.name)) + with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file: + table_r = Table.from_excel_file(Path(tmp_file.name)) + assert table == table_r