Skip to content

Commit

Permalink
feat: iterator over rust range
Browse files Browse the repository at this point in the history
  • Loading branch information
dimastbk committed Nov 29, 2023
1 parent 9da3d7c commit 5068152
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 4 deletions.
6 changes: 4 additions & 2 deletions python/python_calamine/_python_calamine.pyi
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from __future__ import annotations

import enum
import typing
from datetime import date, datetime, time, timedelta
from os import PathLike
from typing import Protocol

ValueT = int | float | str | bool | time | date | datetime | timedelta

class ReadBuffer(Protocol):
class ReadBuffer(typing.Protocol):
def seek(self) -> int: ...
def read(self) -> bytes: ...

Expand Down Expand Up @@ -57,6 +57,8 @@ class CalamineSheet:
By default, calamine skips empty rows/cols before data.
For suppress this behaviour, set `skip_empty_area` to `False`.
"""
def iter_rows(self) -> typing.Iterator[list[ValueT]]:
"""Retunrning data from sheet as iterator of lists."""

class CalamineWorkbook:
path: str | None
Expand Down
2 changes: 1 addition & 1 deletion src/types/cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use pyo3::prelude::*;
/// https://learn.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system
static EXCEL_1900_1904_DIFF: f64 = 1462.0;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum CellValue {
Int(i64),
Float(f64),
Expand Down
49 changes: 48 additions & 1 deletion src/types/sheet.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::fmt::Display;

use calamine::{DataType, Range, SheetType, SheetVisible};
use calamine::{DataType, Range, Rows, SheetType, SheetVisible};
use pyo3::class::basic::CompareOp;
use pyo3::prelude::*;

Expand Down Expand Up @@ -191,4 +191,51 @@ impl CalamineSheet {
.map(|row| row.iter().map(|x| x.into()).collect())
.collect())
}

fn iter_rows(&self) -> CalamineCellIterator {
CalamineCellIterator::from_range(&self.range)
}
}

#[pyclass]
pub struct CalamineCellIterator {
position: u32,
start: (u32, u32),
empty_row: Vec<CellValue>,
iter: Rows<'static, DataType>,
}

impl CalamineCellIterator {
fn from_range(range: &Range<DataType>) -> CalamineCellIterator {
let mut empty_row = Vec::with_capacity(range.width());
for _ in 0..range.width() {
empty_row.push(CellValue::String("".to_string()))
}
CalamineCellIterator {
empty_row,
position: 0,
start: range.start().unwrap(),
iter: unsafe {
std::mem::transmute::<Rows<'_, DataType>, Rows<'static, DataType>>(range.rows())
},
}
}
}

#[pymethods]
impl CalamineCellIterator {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}

fn __next__(mut slf: PyRefMut<'_, Self>) -> Option<Vec<CellValue>> {
slf.position += 1;
if slf.position > slf.start.0 {
slf.iter
.next()
.map(|r| r.iter().map(|c| c.into()).collect())
} else {
Some(slf.empty_row.clone())
}
}
}
24 changes: 24 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,30 @@ def test_xlsx_read():
assert [] == reader.get_sheet_by_index(1).to_python(skip_empty_area=False)


def test_xlsx_iter_rows():
names = ["Sheet1", "Sheet2", "Sheet3"]
data = [
["", "", "", "", "", "", "", "", "", ""],
[
"String",
1,
1.1,
True,
False,
date(2010, 10, 10),
datetime(2010, 10, 10, 10, 10, 10),
time(10, 10, 10),
timedelta(hours=10, minutes=10, seconds=10, microseconds=100000),
timedelta(hours=255, minutes=10, seconds=10),
],
]

reader = CalamineWorkbook.from_object(PATH / "base.xlsx")

assert names == reader.sheet_names
assert data == list(reader.get_sheet_by_index(0).iter_rows())


def test_nrows():
reader = CalamineWorkbook.from_object(PATH / "base.xlsx")
sheet = reader.get_sheet_by_name("Sheet3")
Expand Down

0 comments on commit 5068152

Please sign in to comment.