From 8371d18218f854fce4c22104eb78eee9c14c62d8 Mon Sep 17 00:00:00 2001 From: Thomas Guillet Date: Wed, 20 Sep 2023 18:57:32 +0200 Subject: [PATCH] BUG: manage raw ods file without cell cache (#55219) * BUG: manage raw ods file without cell cache * fixup! BUG: manage raw ods file without cell cache --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/io/excel/_odfreader.py | 12 +----------- .../tests/io/data/excel/test_unempty_cells.ods | Bin 0 -> 2307 bytes pandas/tests/io/excel/test_odf.py | 11 +++++++++++ 4 files changed, 13 insertions(+), 11 deletions(-) create mode 100644 pandas/tests/io/data/excel/test_unempty_cells.ods diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index da2e30edc80ea..b97198c36891c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -223,6 +223,7 @@ Bug fixes - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) - Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`) - Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`) +- Bug in :meth:`pandas.read_excel` with a ODS file without cached formatted cell for float values (:issue:`55219`) Categorical ^^^^^^^^^^^ diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 8016dbbaf7f42..277f64f636731 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -150,7 +150,7 @@ def get_sheet_data( max_row_len = len(table_row) row_repeat = self._get_row_repeat(sheet_row) - if self._is_empty_row(sheet_row): + if len(table_row) == 0: empty_rows += row_repeat else: # add blank rows to our table @@ -182,16 +182,6 @@ def _get_column_repeat(self, cell) -> int: return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) - def _is_empty_row(self, row) -> bool: - """ - Helper function to find empty rows - """ - for column in row.childNodes: - if len(column.childNodes) > 0: - return False - - return True - def _get_cell_value(self, cell) -> Scalar | NaTType: from odf.namespaces import OFFICENS diff --git a/pandas/tests/io/data/excel/test_unempty_cells.ods b/pandas/tests/io/data/excel/test_unempty_cells.ods new file mode 100644 index 0000000000000000000000000000000000000000..52458753bae061138b3e8d65d403f1eab8b68086 GIT binary patch literal 2307 zcmZ`*dpMKrAK$DwrRL4FlBCjB^hGM0eX48Xf<5YQwnNrOPd;N1yV{c#vPS%XCM z!=T+s*Dx3|>K{(YpPc4?0#LVz%SsFYkYBr>u`^LOw>H(pq48J`42i4}i1R5)Y{IuF z%bes2W1^APVT;Fd64`w4kc8uC=~-74$BsFYVmVljEqZ5B7lvb3sPm(c)~ua~I}uKe zecd5ZpODcDV=IE1fw6q!*mFV|Xnea{<5uTjvu`2(UJu*1kG3Z(@4w8ew#!3(%edS6 zqqK&Z*R~}%Rw~Njm^y2Fx~+n0d&i;NYGRu#IwAj&Ey_DaDNmQs1zqq2trT1!Wy*?W z$(~6S#_4-F?bd6BW_a-n_zuupnLfYU~?W+ zx)~po>ofsxbt~lgSmSQfwHbaiLv0G`A-IY2gmJ$4O5b5=`GS>GG$swlQ*3URO?xb5 zty_tYlFWxraKbHoKey4RwGV*L(=DWxemmRK43Ap%RxP8cU-a(&k5rbG=X2?C-L&xt zw*Da@5_G!Sw0AG0p3>{Jf(m7nkJj=s{Xr1y3=&J8KnGlg_YYr9j=5()IVuc&%27p} z*g<{d+;Cu-kZuq`RMk_@E%H)x>Ob%p6S1cwvftbEV4nLao-H#RIuk?66Q@`j%Z ziORAvytH$bacAnLIEd<=ozvay&Jgf9$b6tuePYdPZBs;LRffwMYNkSz%@%67IB;BW z`@4VR&C40tH&il;zgH=88;P+wwflH}#yC#fA2=3DJQ{RwYj;?mxN#PiYgM}#oM-GRW7f@V;c^TA%_VtoZo)*2gIL}fJM-xV&pYZ?4qjTbw;Z`Dg_R8G z5r%uBq*iKvs;bGu=O4VSKYxTHTyS}gnh0mZhB8fqG^Ljz)e*UI2ZA}F$N=75P;GaY zF4nC>XKNmq5;~c1(2wAPrH-8~ulvUHVU^Kw6J=C88)oO^fKM-Vy4ViUFE_YZ;S-xZOlg@R zL0I)t1f7cPVB!iqW1DqdteUSnc58}T)Jc@^*o?vmqS;aw@9RDL5}rl={?`-_W*Djr z(+b`b`#>kT9qCc2+59P;joy;~mVZ=dT;g%@dtAYcD>$oRmK_v1>;KRt;&*){r?jCi z@QFtcWo9q|Xe^)z#1X#4!zS9a+e8-!l~`n$T9s;rQt zZgNR?Ka^;6VyZ}r2KbQjR$s@Vj+K;=lr+R`8anJe-q)lM&x*ZsB_ha}4>!N(?Iv9s zZ8cXQM!BP#SoO^q!wRu;SZy_Qo;EX>bTr_l_zEE?Bv1?r`u}k*I?z8ysL0FK!+jm~ z?}7b43;+-cv=dGLHNH1eY)rg$3J1~BqJN5?`L_|Z(Pr0CFi~=@o9;&3#-?4z0l~kt WFbWA4IXD0S7G27s7hy`QJ^c-V>i0YV literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py index 25079b235d332..9d49718bec357 100644 --- a/pandas/tests/io/excel/test_odf.py +++ b/pandas/tests/io/excel/test_odf.py @@ -48,3 +48,14 @@ def test_read_newlines_between_xml_elements_table(): result = pd.read_excel("test_newlines.ods") tm.assert_frame_equal(result, expected) + + +def test_read_unempty_cells(): + expected = pd.DataFrame( + [1, np.nan, 3, np.nan, 5], + columns=["Column 1"], + ) + + result = pd.read_excel("test_unempty_cells.ods") + + tm.assert_frame_equal(result, expected)