Skip to content

Commit

Permalink
Add manual iterator implementations for custom vec iterables (#1107)
Browse files Browse the repository at this point in the history
* Add manual iterator implementations for custom vec iterables

This adds manual `Iterator` implementors for the custom vec `Iterable`
pyclasses, which were previously using the Python default iterator
implementation that attempts calls to `__getitem__` with successive
integers until an `IndexError` is raised.

Doing this means that we no longer have any overhead from converting
Python-space integer indices to Rust-space ones, because we just store
a pointer to the backing vec internally and yield from that using an
index we track ourselves.

* Add release note

* Remove `__len__` from custom iterators

This matches the Python behaviour for the implicit sequence-protocol
iterators; they have only `__length_hint__` and not `__len__`.

* Add reversed iterators

* Add explicit tests

* Fixup release note
  • Loading branch information
jakelishman authored Feb 21, 2024
1 parent a9d8686 commit bf3f703
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 3 deletions.
10 changes: 10 additions & 0 deletions releasenotes/notes/custom-iterators-dce8557a8f87e8c0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
features:
- |
Rustworkx functions that return custom iterable objects, such as
:meth:`.PyDiGraph.node_indices`, now each have an associated custom
iterator and reversed-iterator object for these. This provides a speedup
of approximately 40% for iterating through the custom iterables.
These types are not directly nameable or constructable from Python space, and other than the
performance improvement, the behavior should largely not be noticable from Python space.
2 changes: 2 additions & 0 deletions rustworkx/rustworkx.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,8 @@ class _RustworkxCustomVecIter(Generic[_T_co], Sequence[_T_co], ABC):
def __ne__(self, other: object) -> bool: ...
def __setstate__(self, state: Sequence[_T_co]) -> None: ...
def __array__(self, _dt: np.dtype | None = ...) -> np.ndarray: ...
def __iter__(self) -> Iterator[_T_co]: ...
def __reversed__(self) -> Iterator[_T_co]: ...

class _RustworkxCustomHashMapIter(Generic[_S, _T_co], Mapping[_S, _T_co], ABC):
def __init__(self) -> None: ...
Expand Down
136 changes: 133 additions & 3 deletions src/iterators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
// There are two useful macros to quickly define a new custom return type:
//
// :`custom_vec_iter_impl` holds a `Vec<T>` and can be used as a
// read-only sequence/list. To use it, you should specify the name of the new type,
// the name of the vector that holds the data, the type `T` and a docstring.
// read-only sequence/list. To use it, you should specify the name of the new type for the
// iterable, a name for that new type's iterator, a name for the new type's reversed iterator, the
// name of the vector that holds the data, the type `T` and a docstring.
//
// e.g `custom_vec_iter_impl!(MyReadOnlyType, data, (usize, f64), "Docs");`
// defines a new type named `MyReadOnlyType` that holds a vector called `data`
Expand Down Expand Up @@ -473,7 +474,7 @@ impl PyConvertToPyArray for Vec<(usize, usize, PyObject)> {
}

macro_rules! custom_vec_iter_impl {
($name:ident, $data:ident, $T:ty, $doc:literal) => {
($name:ident, $iter:ident, $reversed:ident, $data:ident, $T:ty, $doc:literal) => {
#[doc = $doc]
#[pyclass(module = "rustworkx", sequence)]
#[derive(Clone)]
Expand Down Expand Up @@ -580,6 +581,20 @@ macro_rules! custom_vec_iter_impl {
}
}

fn __iter__(self_: Py<Self>, py: Python) -> $iter {
$iter {
inner: Some(self_.clone_ref(py)),
index: 0,
}
}

fn __reversed__(self_: Py<Self>, py: Python) -> $reversed {
$reversed {
inner: Some(self_.clone_ref(py)),
index: 0,
}
}

fn __array__(&self, py: Python, _dt: Option<&PyArrayDescr>) -> PyResult<PyObject> {
// Note: we accept the dtype argument on the signature but
// effictively do nothing with it to let Numpy handle the conversion itself
Expand All @@ -594,11 +609,114 @@ macro_rules! custom_vec_iter_impl {
PyGCProtocol::__clear__(self)
}
}

#[doc = concat!("Custom iterator class for :class:`.", stringify!($name), "`")]
// No module because this isn't constructable from Python space, and is only exposed as an
// implementation detail.
#[pyclass]
pub struct $iter {
inner: Option<Py<$name>>,
index: usize,
}

#[pymethods]
impl $iter {
fn __next__(&mut self, py: Python) -> Option<Py<PyAny>> {
let data = self.inner.as_ref().unwrap().borrow(py);
if self.index < data.$data.len() {
let out = data.$data[self.index].clone().into_py(py);
self.index += 1;
Some(out)
} else {
None
}
}

fn __iter__(self_: Py<Self>) -> Py<Self> {
// Python iterators typically just return themselves from this, though in principle
// we could return a separate object that iterates starting from the same point.
self_
}

fn __length_hint__(&self, py: Python) -> usize {
self.inner
.as_ref()
.unwrap()
.borrow(py)
.$data
.len()
.saturating_sub(self.index)
}

fn __traverse__(&self, vis: PyVisit) -> Result<(), PyTraverseError> {
if let Some(obj) = self.inner.as_ref() {
vis.call(obj)?
}
Ok(())
}

fn __clear__(&mut self) {
self.inner = None;
}
}

#[doc = concat!("Custom reversed iterator class for :class:`.", stringify!($name), "`")]
// No module because this isn't constructable from Python space, and is only exposed as an
// implementation detail.
#[pyclass]
pub struct $reversed {
inner: Option<Py<$name>>,
index: usize,
}

#[pymethods]
impl $reversed {
fn __next__(&mut self, py: Python) -> Option<Py<PyAny>> {
let data = self.inner.as_ref().unwrap().borrow(py);
let len = data.$data.len();
if self.index < len {
let out = data.$data[len - self.index - 1].clone().into_py(py);
self.index += 1;
Some(out)
} else {
None
}
}

fn __iter__(self_: Py<Self>) -> Py<Self> {
// Python iterators typically just return themselves from this, though in principle
// we could return a separate object that iterates starting from the same point.
self_
}

fn __length_hint__(&self, py: Python) -> usize {
self.inner
.as_ref()
.unwrap()
.borrow(py)
.$data
.len()
.saturating_sub(self.index)
}

fn __traverse__(&self, vis: PyVisit) -> Result<(), PyTraverseError> {
if let Some(obj) = self.inner.as_ref() {
vis.call(obj)?
}
Ok(())
}

fn __clear__(&mut self) {
self.inner = None;
}
}
};
}

custom_vec_iter_impl!(
BFSSuccessors,
BFSSuccessorsIter,
BFSSuccessorsRev,
bfs_successors,
(PyObject, Vec<PyObject>),
"A custom class for the return from :func:`rustworkx.bfs_successors`
Expand Down Expand Up @@ -651,6 +769,8 @@ impl PyGCProtocol for BFSSuccessors {

custom_vec_iter_impl!(
BFSPredecessors,
BFSPredecessorsIter,
BFSPredecessorsRev,
bfs_predecessors,
(PyObject, Vec<PyObject>),
"A custom class for the return from :func:`rustworkx.bfs_predecessors`
Expand Down Expand Up @@ -703,6 +823,8 @@ impl PyGCProtocol for BFSPredecessors {

custom_vec_iter_impl!(
NodeIndices,
NodeIndicesIter,
NodeIndicesRev,
nodes,
usize,
"A custom class for the return of node indices
Expand Down Expand Up @@ -735,6 +857,8 @@ impl PyGCProtocol for NodeIndices {}

custom_vec_iter_impl!(
EdgeList,
EdgeListIter,
EdgeListRev,
edges,
(usize, usize),
"A custom class for the return of edge lists
Expand Down Expand Up @@ -773,6 +897,8 @@ impl PyGCProtocol for EdgeList {}

custom_vec_iter_impl!(
WeightedEdgeList,
WeightedEdgeListIter,
WeightedEdgeListRev,
edges,
(usize, usize, PyObject),
"A custom class for the return of edge lists with weights
Expand Down Expand Up @@ -823,6 +949,8 @@ impl PyGCProtocol for WeightedEdgeList {

custom_vec_iter_impl!(
EdgeIndices,
EdgeIndicesIter,
EdgeIndicesRev,
edges,
usize,
"A custom class for the return of edge indices
Expand Down Expand Up @@ -875,6 +1003,8 @@ impl PyDisplay for EdgeList {

custom_vec_iter_impl!(
Chains,
ChainsIter,
ChainsRev,
chains,
EdgeList,
"A custom class for the return of a list of list of edges.
Expand Down
44 changes: 44 additions & 0 deletions tests/test_custom_return_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ def test_slices(self):
slice_return = successors[0:3:2]
self.assertEqual([("a", ["c", "b"])], slice_return)

def test_iter(self):
self.dag.add_child(self.node_a, "c", "edge")
successors = rustworkx.bfs_successors(self.dag, 0)
self.assertEqual(list(successors), list(iter(successors)))

def test_reversed(self):
self.dag.add_child(self.node_a, "c", "edge")
successors = rustworkx.bfs_successors(self.dag, 0)
self.assertEqual(list(successors)[::-1], list(reversed(successors)))


class TestNodeIndicesComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -174,6 +184,10 @@ def test_slices_negatives(self):
self.assertEqual([2, 3], slice_return)
self.assertEqual([], indices[-1:-2])

def test_iter(self):
indices = self.dag.node_indices()
self.assertEqual(list(indices), list(iter(indices)))

def test_reversed(self):
indices = self.dag.node_indices()
reversed_slice = indices[::-1]
Expand Down Expand Up @@ -352,6 +366,14 @@ def test_slices(self):
slice_return = edges[0:-1]
self.assertEqual([0, 1], slice_return)

def test_iter(self):
indices = self.dag.edge_indices()
self.assertEqual(list(indices), list(iter(indices)))

def test_reversed(self):
indices = self.dag.edge_indices()
self.assertEqual(list(indices)[::-1], list(reversed(indices)))


class TestEdgeListComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -425,6 +447,14 @@ def test_numpy_conversion():
np.asarray(res, dtype=np.uintp), np.array([[0, 1], [0, 2], [0, 3], [0, 4]])
)

def test_iter(self):
edges = self.dag.edge_list()
self.assertEqual(list(edges), list(iter(edges)))

def test_reversed(self):
edges = self.dag.edge_list()
self.assertEqual(list(edges)[::-1], list(reversed(edges)))


class TestWeightedEdgeListComparisons(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -500,6 +530,14 @@ def test_numpy_conversion(self):
np.asarray(self.dag.weighted_edge_list()), np.array([(0, 1, "Edgy")], dtype=object)
)

def test_iter(self):
edges = self.dag.weighted_edge_list()
self.assertEqual(list(edges), list(iter(edges)))

def test_reversed(self):
edges = self.dag.weighted_edge_list()
self.assertEqual(list(edges)[::-1], list(reversed(edges)))


class TestPathMapping(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -1387,6 +1425,12 @@ def test_numpy_conversion(self):
# this test assumes the array is 1-dimensional which avoids issues with jagged arrays
self.assertTrue(np.asarray(self.chains).shape, (1,))

def test_iter(self):
self.assertEqual(list(self.chains), list(iter(self.chains)))

def test_reversed(self):
self.assertEqual(list(self.chains)[::-1], list(reversed(self.chains)))


class TestProductNodeMap(unittest.TestCase):
def setUp(self):
Expand Down

0 comments on commit bf3f703

Please sign in to comment.