From 5d53c5960770b36992030f80e8a3ac31c3e44791 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Sat, 2 Mar 2024 20:42:37 +0000 Subject: [PATCH 1/2] Optimise data transfer in `TopologicalSorter` This optimises the creation of the `list` objects returned from `TopologicalSorter.get_ready` (by avoiding a temporary `Vec`) and allows `done` to accept a single index, avoiding the need for the user to allocate many small lists in several real-world uses. --- ...toposort-done-single-5d5db2c654e5b498.yaml | 7 + rustworkx/rustworkx.pyi | 2 +- src/toposort.rs | 138 ++++++++++-------- tests/digraph/test_toposort.py | 22 +++ 4 files changed, 107 insertions(+), 62 deletions(-) create mode 100644 releasenotes/notes/toposort-done-single-5d5db2c654e5b498.yaml diff --git a/releasenotes/notes/toposort-done-single-5d5db2c654e5b498.yaml b/releasenotes/notes/toposort-done-single-5d5db2c654e5b498.yaml new file mode 100644 index 000000000..bd9107a70 --- /dev/null +++ b/releasenotes/notes/toposort-done-single-5d5db2c654e5b498.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + :meth:`.TopologicalSorter.done` now accepts single integers, in addition to lists of integers. + This can be a sizeable performance improvement for algorithms that iterate through nodes, and + only conditionally mark them as `done`; there is no longer a need to allocate a temporary Python + array. diff --git a/rustworkx/rustworkx.pyi b/rustworkx/rustworkx.pyi index 6cb67efd9..16cdbab21 100644 --- a/rustworkx/rustworkx.pyi +++ b/rustworkx/rustworkx.pyi @@ -295,7 +295,7 @@ class TopologicalSorter: ) -> None: ... def is_active(self) -> bool: ... def get_ready(self) -> list[int]: ... - def done(self, nodes: Sequence[int]) -> None: ... + def done(self, nodes: int | Sequence[int]) -> None: ... # isomorpism diff --git a/src/toposort.rs b/src/toposort.rs index 4e9107273..2b8911372 100644 --- a/src/toposort.rs +++ b/src/toposort.rs @@ -17,6 +17,7 @@ use hashbrown::HashMap; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::PyList; use pyo3::Python; use petgraph::stable_graph::NodeIndex; @@ -84,7 +85,8 @@ pub struct TopologicalSorter { node2state: HashMap, num_passed_out: usize, num_finished: usize, - reverse: bool, + in_dir: petgraph::Direction, + out_dir: petgraph::Direction, } #[pymethods] @@ -105,7 +107,7 @@ impl TopologicalSorter { } } - let (in_dir, _) = traversal_directions(reverse); + let (in_dir, out_dir) = traversal_directions(reverse); let mut predecessor_count = HashMap::new(); let ready_nodes = if let Some(initial) = initial { let dag = &dag.borrow(py); @@ -145,7 +147,8 @@ impl TopologicalSorter { node2state: HashMap::new(), num_passed_out: 0, num_finished: 0, - reverse, + in_dir, + out_dir, }) } @@ -167,16 +170,15 @@ impl TopologicalSorter { /// /// :returns: A list of node indices of all the ready nodes. /// :rtype: List - fn get_ready(&mut self) -> Vec { - let mut out = Vec::with_capacity(self.ready_nodes.len()); - for nx in &self.ready_nodes { - out.push(nx.index()); - self.node2state.insert(*nx, NodeState::Ready); - } - - self.ready_nodes.clear(); - self.num_passed_out += out.len(); - out + fn get_ready<'py>(&mut self, py: Python<'py>) -> Bound<'py, PyList> { + self.num_passed_out += self.ready_nodes.len(); + PyList::new_bound( + py, + self.ready_nodes.drain(..).map(|nx| { + self.node2state.insert(nx, NodeState::Ready); + nx.index() + }), + ) } /// Marks a set of nodes returned by "get_ready" as processed. @@ -184,7 +186,8 @@ impl TopologicalSorter { /// This method unblocks any successor of each node in *nodes* for being returned /// in the future by a call to "get_ready". /// - /// :param list nodes: A list of node indices to mark as done. + /// :param nodes: A node index or list of node indices to mark as done. + /// :type nodes: int | list[int] /// /// :raises `ValueError`: If any node in *nodes* has already been marked as /// processed by a previous call to this method or node has not yet been returned @@ -193,60 +196,73 @@ impl TopologicalSorter { /// of the nodes given to :meth:`done`. This can only happen if the ``initial`` nodes had /// even a partial topological ordering amongst themselves, which is not a valid /// starting input. - fn done(&mut self, py: Python, nodes: Vec) -> PyResult<()> { - let dag = &self.dag.borrow(py); - let (in_dir, out_dir) = traversal_directions(self.reverse); - for node in nodes { - let node = NodeIndex::new(node); - match self.node2state.get_mut(&node) { - None => { - return Err(PyValueError::new_err(format!( - "node {} was not passed out (still not ready).", - node.index() - ))); - } - Some(NodeState::Done) => { - return Err(PyValueError::new_err(format!( - "node {} was already marked done.", - node.index() - ))); - } - Some(state) => { - debug_assert_eq!(*state, NodeState::Ready); - *state = NodeState::Done; - } + fn done(&mut self, nodes: &Bound) -> PyResult<()> { + if let Ok(node) = nodes.extract::() { + self.done_single(nodes.py(), NodeIndex::new(node)) + } else if let Ok(nodes) = nodes.downcast::() { + for node in nodes { + self.done_single(nodes.py(), NodeIndex::new(node.extract()?))? + } + Ok(()) + } else { + for node in nodes.iter()? { + self.done_single(nodes.py(), NodeIndex::new(node?.extract()?))? + } + Ok(()) + } + } +} + +impl TopologicalSorter { + #[inline(always)] + fn done_single(&mut self, py: Python, node: NodeIndex) -> PyResult<()> { + let dag = self.dag.borrow(py); + match self.node2state.get_mut(&node) { + None => { + return Err(PyValueError::new_err(format!( + "node {} was not passed out (still not ready).", + node.index() + ))); + } + Some(NodeState::Done) => { + return Err(PyValueError::new_err(format!( + "node {} was already marked done.", + node.index() + ))); + } + Some(state) => { + debug_assert_eq!(*state, NodeState::Ready); + *state = NodeState::Done; } + } - for succ in dag.graph.neighbors_directed(node, out_dir) { - match self.predecessor_count.entry(succ) { - Entry::Occupied(mut entry) => { - let in_degree = entry.get_mut(); - if *in_degree == 0 { - return Err(PyValueError::new_err( - "at least one initial node is reachable from another", - )); - } else if *in_degree == 1 { - self.ready_nodes.push(succ); - entry.remove_entry(); - } else { - *in_degree -= 1; - } + for succ in dag.graph.neighbors_directed(node, self.out_dir) { + match self.predecessor_count.entry(succ) { + Entry::Occupied(mut entry) => { + let in_degree = entry.get_mut(); + if *in_degree == 0 { + return Err(PyValueError::new_err( + "at least one initial node is reachable from another", + )); + } else if *in_degree == 1 { + self.ready_nodes.push(succ); + entry.remove_entry(); + } else { + *in_degree -= 1; } - Entry::Vacant(entry) => { - let in_degree = dag.graph.neighbors_directed(succ, in_dir).count() - 1; - - if in_degree == 0 { - self.ready_nodes.push(succ); - } else { - entry.insert(in_degree); - } + } + Entry::Vacant(entry) => { + let in_degree = dag.graph.neighbors_directed(succ, self.in_dir).count() - 1; + + if in_degree == 0 { + self.ready_nodes.push(succ); + } else { + entry.insert(in_degree); } } } - - self.num_finished += 1; } - + self.num_finished += 1; Ok(()) } } diff --git a/tests/digraph/test_toposort.py b/tests/digraph/test_toposort.py index ce499844d..b54515f74 100644 --- a/tests/digraph/test_toposort.py +++ b/tests/digraph/test_toposort.py @@ -45,6 +45,28 @@ def test_topo_sort(self): nodes = sorter.get_ready() self.assertEqual(nodes, []) + def test_topo_sort_single_indices(self): + sorter = rustworkx.TopologicalSorter(self.graph) + nodes = sorter.get_ready() + self.assertEqual(set(nodes), {0, 1}) + sorter.done(0) + sorter.done(1) + nodes = sorter.get_ready() + self.assertEqual(set(nodes), {2}) + sorter.done(2) + nodes = sorter.get_ready() + self.assertEqual(set(nodes), {3, 4}) + + # Try not calling 'done' on everything, and going a little out-of-order. + sorter.done(3) + self.assertEqual(set(sorter.get_ready()), {5}) + sorter.done(5) + self.assertEqual(set(sorter.get_ready()), set()) + self.assertTrue(sorter.is_active()) + sorter.done(4) + self.assertEqual(set(sorter.get_ready()), set()) + self.assertFalse(sorter.is_active()) + def test_topo_sort_do_not_emit_if_node_has_undone_preds(self): sorter = rustworkx.TopologicalSorter(self.graph) nodes = sorter.get_ready() From becd5bcd705ab5878c2612e9f5500b1f3c996e2c Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Fri, 5 Apr 2024 18:50:38 +0100 Subject: [PATCH 2/2] Allow `TopologicalSorter` without error checking For representative examples in Qiskit, this reduces the runtime of interacting with the topological sorter by around 15-20%. --- .../toposort-check-args-1378bab51e4172a3.yaml | 8 +++ rustworkx/rustworkx.pyi | 1 + src/toposort.rs | 64 +++++++++++-------- 3 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 releasenotes/notes/toposort-check-args-1378bab51e4172a3.yaml diff --git a/releasenotes/notes/toposort-check-args-1378bab51e4172a3.yaml b/releasenotes/notes/toposort-check-args-1378bab51e4172a3.yaml new file mode 100644 index 000000000..e491bc568 --- /dev/null +++ b/releasenotes/notes/toposort-check-args-1378bab51e4172a3.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + :class:`.TopologicalSorter` now has a ``check_args`` keyword argument, which can be set to + ``False`` to disable the runtime detection of invalid arguments to + :meth:`~.TopologicalSorter.done`. This provides a memory and runtime improvement to the online + sorter, at the cost that the results will be undefined and likely meaningless if invalid values + are given. diff --git a/rustworkx/rustworkx.pyi b/rustworkx/rustworkx.pyi index 16cdbab21..32f2e26d3 100644 --- a/rustworkx/rustworkx.pyi +++ b/rustworkx/rustworkx.pyi @@ -292,6 +292,7 @@ class TopologicalSorter: *, reverse: bool = ..., initial: Iterable[int] | None = ..., + check_args: bool = ..., ) -> None: ... def is_active(self) -> bool: ... def get_ready(self) -> list[int]: ... diff --git a/src/toposort.rs b/src/toposort.rs index 2b8911372..cc518d814 100644 --- a/src/toposort.rs +++ b/src/toposort.rs @@ -77,12 +77,17 @@ enum NodeState { /// It is a :exc:`ValueError` to give an `initial` set where the nodes have even a partial /// topological order between themselves, though this might not appear until some call /// to :meth:`done`. +/// :param bool check_args: If ``True`` (the default), then all arguments to :meth:`done` are +/// checked for validity, and a :exc:`ValueError` is raised if any were not ready, already +/// done, or not indices of the circuit. If ``False``, the tracking for this is disabled, +/// which can provide a meaningful performance and memory improvement, but the results will +/// be undefined if invalid values are given. #[pyclass(module = "rustworkx")] pub struct TopologicalSorter { dag: Py, ready_nodes: Vec, predecessor_count: HashMap, - node2state: HashMap, + node2state: Option>, num_passed_out: usize, num_finished: usize, in_dir: petgraph::Direction, @@ -92,13 +97,14 @@ pub struct TopologicalSorter { #[pymethods] impl TopologicalSorter { #[new] - #[pyo3(signature=(dag, /, check_cycle=true, *, reverse=false, initial=None))] + #[pyo3(signature=(dag, /, check_cycle=true, *, reverse=false, initial=None, check_args=true))] fn new( py: Python, dag: Py, check_cycle: bool, reverse: bool, initial: Option<&Bound>, + check_args: bool, ) -> PyResult { { let dag = &dag.borrow(py); @@ -144,7 +150,7 @@ impl TopologicalSorter { dag, ready_nodes, predecessor_count, - node2state: HashMap::new(), + node2state: check_args.then(HashMap::new), num_passed_out: 0, num_finished: 0, in_dir, @@ -172,13 +178,17 @@ impl TopologicalSorter { /// :rtype: List fn get_ready<'py>(&mut self, py: Python<'py>) -> Bound<'py, PyList> { self.num_passed_out += self.ready_nodes.len(); - PyList::new_bound( - py, - self.ready_nodes.drain(..).map(|nx| { - self.node2state.insert(nx, NodeState::Ready); - nx.index() - }), - ) + if let Some(node2state) = self.node2state.as_mut() { + PyList::new_bound( + py, + self.ready_nodes.drain(..).map(|nx| { + node2state.insert(nx, NodeState::Ready); + nx.index() + }), + ) + } else { + PyList::new_bound(py, self.ready_nodes.drain(..).map(|nx| nx.index())) + } } /// Marks a set of nodes returned by "get_ready" as processed. @@ -217,22 +227,24 @@ impl TopologicalSorter { #[inline(always)] fn done_single(&mut self, py: Python, node: NodeIndex) -> PyResult<()> { let dag = self.dag.borrow(py); - match self.node2state.get_mut(&node) { - None => { - return Err(PyValueError::new_err(format!( - "node {} was not passed out (still not ready).", - node.index() - ))); - } - Some(NodeState::Done) => { - return Err(PyValueError::new_err(format!( - "node {} was already marked done.", - node.index() - ))); - } - Some(state) => { - debug_assert_eq!(*state, NodeState::Ready); - *state = NodeState::Done; + if let Some(node2state) = self.node2state.as_mut() { + match node2state.get_mut(&node) { + None => { + return Err(PyValueError::new_err(format!( + "node {} was not passed out (still not ready).", + node.index() + ))); + } + Some(NodeState::Done) => { + return Err(PyValueError::new_err(format!( + "node {} was already marked done.", + node.index() + ))); + } + Some(state) => { + debug_assert_eq!(*state, NodeState::Ready); + *state = NodeState::Done; + } } }