Skip to content

Commit

Permalink
Difference between calamine and openpyxl readers fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
MdRiyazulIslam committed Jul 5, 2024
1 parent dcb5494 commit 5bca4b1
Showing 1 changed file with 18 additions and 14 deletions.
32 changes: 18 additions & 14 deletions pandas/io/excel/_calamine.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
TYPE_CHECKING,
Any,
Union,
cast,
)

from pandas._typing import Scalar
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

Expand All @@ -28,13 +30,11 @@

from pandas._typing import (
FilePath,
NaTType,
ReadBuffer,
Scalar,
StorageOptions,
)

_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
_CellValueT = Union[int, float, str, bool, time, date, datetime, timedelta]


class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
Expand Down Expand Up @@ -75,8 +75,7 @@ def load_workbook(
from python_calamine import load_workbook

return load_workbook(
filepath_or_buffer,
**engine_kwargs,
filepath_or_buffer, **engine_kwargs # type: ignore[arg-type]
)

@property
Expand All @@ -99,26 +98,31 @@ def get_sheet_by_index(self, index: int) -> CalamineSheet:

def get_sheet_data(
self, sheet: CalamineSheet, file_rows_needed: int | None = None
) -> list[list[Scalar | NaTType | time]]:
def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
) -> list[list[Scalar]]:
def _convert_cell(value: _CellValueT) -> Scalar:
# Avoid explicit conversion to pd.Timestamp and pd.Timedelta
if isinstance(value, float):
val = int(value)
if val == value:
return val
else:
return value
elif isinstance(value, date):
return pd.Timestamp(value)
return value
elif isinstance(value, timedelta):
return pd.Timedelta(value)
elif isinstance(value, time):
return value
elif isinstance(value, time):
# cast needed here because Scalar doesn't include datetime.time
return cast(Scalar, value)

return value

rows: list[list[_CellValue]] = sheet.to_python(
skip_empty_area=False, nrows=file_rows_needed
)
data = [[_convert_cell(cell) for cell in row] for row in rows]
rows: list[list[_CellValueT]] = sheet.to_python(skip_empty_area=False)
data: list[list[Scalar]] = []

for row in rows:
data.append([_convert_cell(cell) for cell in row])
if file_rows_needed is not None and len(data) >= file_rows_needed:
break

return data

0 comments on commit 5bca4b1

Please sign in to comment.