From b1cb91e517a788b8445b48472f977ea8ef19019a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Sun, 4 Aug 2024 12:14:05 +0400 Subject: [PATCH] feat(python): Support passing `Worksheet` objects to the `write_excel` method (#18031) --- py-polars/polars/dataframe/frame.py | 48 +++++++++++++++---- .../polars/io/spreadsheet/_write_utils.py | 30 ++++++++++-- py-polars/tests/unit/io/test_spreadsheet.py | 44 +++++++++++++---- 3 files changed, 102 insertions(+), 20 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 90f78be3fa7e..d8bfec28aefa 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -124,7 +124,7 @@ import torch from great_tables import GT from hvplot.plotting.core import hvPlotTabularPolars - from xlsxwriter import Workbook + from xlsxwriter import Workbook, Worksheet from polars import DataType, Expr, LazyFrame, Series from polars._typing import ( @@ -2802,8 +2802,8 @@ def write_avro( def write_excel( self, - workbook: Workbook | IO[bytes] | Path | str | None = None, - worksheet: str | None = None, + workbook: str | Workbook | IO[bytes] | Path | None = None, + worksheet: str | Worksheet | None = None, *, position: tuple[int, int] | str = "A1", table_style: str | dict[str, Any] | None = None, @@ -2838,14 +2838,15 @@ def write_excel( Parameters ---------- - workbook : Workbook + workbook : {str, Workbook} String name or path of the workbook to create, BytesIO object to write into, or an open `xlsxwriter.Workbook` object that has not been closed. If None, writes to a `dataframe.xlsx` workbook in the working directory. - worksheet : str - Name of target worksheet; if None, writes to "Sheet1" when creating a new - workbook (note that writing to an existing workbook requires a valid - existing -or new- worksheet name). + worksheet : {str, Worksheet} + Name of target worksheet or an `xlsxwriter.Worksheet` object (in which + case `workbook` must be the parent `xlsxwriter.Workbook` object); if None, + writes to "Sheet1" when creating a new workbook (note that writing to an + existing workbook requires a valid existing -or new- worksheet name). position : {str, tuple} Table position in Excel notation (eg: "A1"), or a (row,col) integer tuple. table_style : {str, dict} @@ -3154,6 +3155,37 @@ def write_excel( ... hide_gridlines=True, ... sheet_zoom=125, ... ) + + Create and reference a Worksheet object directly, adding a basic chart. + Taking advantage of structured references to set chart series values and + categories is strongly recommended so that you do not have to calculate + cell positions with respect to the frame data and worksheet: + + >>> with Workbook("basic_chart.xlsx") as wb: # doctest: +SKIP + ... # create worksheet object and write frame data to it + ... ws = wb.add_worksheet("demo") + ... df.write_excel( + ... workbook=wb, + ... worksheet=ws, + ... table_name="DataTable", + ... table_style="Table Style Medium 26", + ... hide_gridlines=True, + ... ) + ... # create chart object, point to the written table + ... # data using structured references, and style it + ... chart = wb.add_chart({"type": "column"}) + ... chart.set_title({"name": "Example Chart"}) + ... chart.set_legend({"none": True}) + ... chart.set_style(38) + ... chart.add_series( + ... { # note the use of structured references + ... "values": "=DataTable[points]", + ... "categories": "=DataTable[id]", + ... "data_labels": {"value": True}, + ... } + ... ) + ... # add chart to the worksheet + ... ws.insert_chart("D1", chart) """ # noqa: W505 from polars.io.spreadsheet._write_utils import ( _unpack_multi_column_dict, diff --git a/py-polars/polars/io/spreadsheet/_write_utils.py b/py-polars/polars/io/spreadsheet/_write_utils.py index 6509eaa54390..8e089ede8ee0 100644 --- a/py-polars/polars/io/spreadsheet/_write_utils.py +++ b/py-polars/polars/io/spreadsheet/_write_utils.py @@ -522,15 +522,36 @@ def _xl_setup_table_options( return table_style, table_options +def _xl_worksheet_in_workbook( + wb: Workbook, ws: Worksheet, *, return_worksheet: bool = False +) -> bool | Worksheet: + if any(ws is sheet for sheet in wb.worksheets()): + return ws if return_worksheet else True + msg = f"the given workbook object {wb.filename!r} is not the parent of worksheet {ws.name!r}" + raise ValueError(msg) + + def _xl_setup_workbook( - workbook: Workbook | BytesIO | Path | str | None, worksheet: str | None = None + workbook: Workbook | BytesIO | Path | str | None, + worksheet: str | Worksheet | None = None, ) -> tuple[Workbook, Worksheet, bool]: """Establish the target excel workbook and worksheet.""" from xlsxwriter import Workbook + from xlsxwriter.worksheet import Worksheet if isinstance(workbook, Workbook): wb, can_close = workbook, False - ws = wb.get_worksheet_by_name(name=worksheet) + ws = ( + worksheet + if ( + isinstance(worksheet, Worksheet) + and _xl_worksheet_in_workbook(wb, worksheet) + ) + else wb.get_worksheet_by_name(name=worksheet) + ) + elif isinstance(worksheet, Worksheet): + msg = f"worksheet object requires the parent workbook object; found workbook={workbook!r}" + raise TypeError(msg) else: workbook_options = { "nan_inf_to_errors": True, @@ -550,7 +571,10 @@ def _xl_setup_workbook( ws, can_close = None, True if ws is None: - ws = wb.add_worksheet(name=worksheet) + if isinstance(worksheet, Worksheet): + ws = _xl_worksheet_in_workbook(wb, worksheet, return_worksheet=True) + else: + ws = wb.add_worksheet(name=worksheet) return wb, ws, can_close diff --git a/py-polars/tests/unit/io/test_spreadsheet.py b/py-polars/tests/unit/io/test_spreadsheet.py index 10280a7f23be..66c23ce44c1a 100644 --- a/py-polars/tests/unit/io/test_spreadsheet.py +++ b/py-polars/tests/unit/io/test_spreadsheet.py @@ -775,21 +775,19 @@ def test_excel_sparklines(engine: ExcelSpreadsheetEngine) -> None: def test_excel_write_multiple_tables() -> None: from xlsxwriter import Workbook - # note: checks that empty tables don't error on write - df1 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64}) - df2 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64}) - df3 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64}) - df4 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64}) + # note: also checks that empty tables don't error on write + df = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64}) + # write multiple frames to multiple worksheets xls = BytesIO() with Workbook(xls) as wb: - df1.write_excel(workbook=wb, worksheet="sheet1", position="A1") - df2.write_excel(workbook=wb, worksheet="sheet1", position="A6") - df3.write_excel(workbook=wb, worksheet="sheet2", position="A1") + df.write_excel(workbook=wb, worksheet="sheet1", position="A1") + df.write_excel(workbook=wb, worksheet="sheet1", position="A6") + df.write_excel(workbook=wb, worksheet="sheet2", position="A1") # validate integration of externally-added formats fmt = wb.add_format({"bg_color": "#ffff00"}) - df4.write_excel( + df.write_excel( workbook=wb, worksheet="sheet3", position="A1", @@ -811,6 +809,34 @@ def test_excel_write_multiple_tables() -> None: assert pl.read_excel(xls, sheet_name="sheet3").rows() == [] +def test_excel_write_worksheet_object() -> None: + # write to worksheet object + from xlsxwriter import Workbook + + df = pl.DataFrame({"colx": ["aaa", "bbb", "ccc"], "coly": [-1234, 0, 5678]}) + + with Workbook(xls := BytesIO()) as wb: + ws = wb.add_worksheet("frame_data") + df.write_excel(wb, worksheet=ws) + ws.hide_zero() + + assert_frame_equal(df, pl.read_excel(xls, sheet_name="frame_data")) + + with pytest.raises( # noqa: SIM117 + ValueError, + match="the given workbook object .* is not the parent of worksheet 'frame_data'", + ): + with Workbook(BytesIO()) as wb: + df.write_excel(wb, worksheet=ws) + + with pytest.raises( # noqa: SIM117 + TypeError, + match="worksheet object requires the parent workbook object; found workbook=None", + ): + with Workbook(BytesIO()) as wb: + df.write_excel(None, worksheet=ws) + + def test_excel_freeze_panes() -> None: from xlsxwriter import Workbook