Skip to content

Commit

Permalink
feat: Efficient LCA algorithm for the hierarchy (#138)
Browse files Browse the repository at this point in the history
Implement the lowest-common-ancestor algorithm on hierarchies, using the
"binary lifting" algorithm.
This is needed for the mermaid rendering, but it's also a useful
algorithm to have around. (There's already been talk of using it in
hugr).

Precomputation time: `O(n log d)`
`lca(a,b)` query time: `O(log d)`
`is_ancestor(a,b)` query time: `O(1)`
Memory: `O(n log d)`

where n=#nodes and d=max depth.

There's also a constant time algorithm based on RMQ, but it doesn't tend
to be much faster than this one. This impl is much simpler.
  • Loading branch information
aborgna-q authored Jul 2, 2024
1 parent 88b7af9 commit a5be34e
Show file tree
Hide file tree
Showing 3 changed files with 290 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ rand = { version = "0.8.5", optional = true }
petgraph = { version = "0.6.3", optional = true }
delegate = "0.12.0"
context-iterators = "0.2.0"
itertools = "0.13.0"

[features]
pyo3 = ["dep:pyo3"]
Expand Down
2 changes: 2 additions & 0 deletions src/algorithms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
mod convex;
mod dominators;
mod lca;
mod post_order;
mod toposort;

pub use convex::{ConvexChecker, TopoConvexChecker};
pub use dominators::{dominators, dominators_filtered, DominatorTree};
pub use lca::{lca, LCA};
pub use post_order::{postorder, postorder_filtered, PostOrder};
pub use toposort::{toposort, toposort_filtered, TopoSort};
287 changes: 287 additions & 0 deletions src/algorithms/lca.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
//! Lowest common ancestor algorithm on hierarchy forests.
use crate::{Hierarchy, NodeIndex, PortView, UnmanagedDenseMap};

/// Constructs a data structure that allows efficient queries of the lowest
/// common ancestor of two nodes in a hierarchy forest.
///
/// Given two nodes `a` and `b`, the lowest common ancestor is the node that is
/// an ancestor of both `a` and `b` and has the greatest depth.
///
/// This algorithm is based on binary lifting. The precomputation takes
/// `O(n log n)` time, where `n` is the number of nodes in the hierarchy.
/// Each `LCA::lca` query takes `O(log n)` time.
pub fn lca(graph: impl PortView, hierarchy: &Hierarchy) -> LCA {
LCA::new(graph, hierarchy)
}

/// A precomputed data structure for lowest common ancestor queries between
/// nodes in a hierarchy.
#[derive(Debug, Default, Clone)]
pub struct LCA {
/// For each node, stores the timestamp of the first visit in a depth-first
/// traversal of the hierarchy.
first_visit: UnmanagedDenseMap<NodeIndex, usize>,
/// For each node, stores the timestamp of the last visit in a depth-first
/// traversal of the hierarchy.
last_visit: UnmanagedDenseMap<NodeIndex, usize>,
/// For each node, stores the 1st, 2nd, 4th, 8th, ... ancestor.
climb_nodes: UnmanagedDenseMap<NodeIndex, Vec<NodeIndex>>,
}

impl LCA {
/// Initializes the lowest common ancestor data structure.
///
/// Complexity: O(n log n)
pub fn new(graph: impl PortView, hierarchy: &Hierarchy) -> Self {
let capacity = graph.node_capacity();
let mut lca = LCA {
first_visit: UnmanagedDenseMap::with_capacity(capacity),
last_visit: UnmanagedDenseMap::with_capacity(capacity),
climb_nodes: UnmanagedDenseMap::with_capacity(capacity),
};

// Traverse the hierarchy in a depth-first order, filling the
// `first_visit`, and `last_visit` arrays.
let mut timestamp = 0;
let mut stack = vec![];
for root in graph.nodes_iter() {
// We start with an empty stack
debug_assert!(stack.is_empty());

if !hierarchy.is_root(root) {
continue;
}
stack.push(DFSState::Visit {
node: root,
parent: None,
});
while let Some(state) = stack.pop() {
match state {
DFSState::Visit { node, parent } => {
lca.first_visit[node] = timestamp;
timestamp += 1;

// Compute the climb nodes.
// That is, the 1st, 2nd, 4th, 8th, ... ancestor.
let climb: Vec<NodeIndex> = (0..)
.scan(parent, |prev, i| {
// The 2^i ancestor of `node`.
let ith_parent = (*prev)?;
*prev = lca.climb_nodes[ith_parent].get(i).copied();
Some(ith_parent)
})
.collect();
if !climb.is_empty() {
lca.climb_nodes[node] = climb;
}

stack.push(DFSState::Finish { node });
for child in hierarchy.children(node) {
stack.push(DFSState::Visit {
node: child,
parent: Some(node),
});
}
}
DFSState::Finish { node } => {
lca.last_visit[node] = timestamp;
timestamp += 1;
}
}
}
}

lca
}

/// Returns `true` if `a` is an ancestor of `b` in the hierarchy.
///
/// If `a` and `b` are the same node, returns `true`.
///
/// Complexity: O(1)
pub fn is_ancestor(&self, a: NodeIndex, b: NodeIndex) -> bool {
self.first_visit[a] <= self.first_visit[b] && self.last_visit[a] >= self.last_visit[b]
}

/// Returns the root of the hierarchy that contains the given node.
///
/// Complexity: O(log n)
pub fn root(&self, node: NodeIndex) -> NodeIndex {
let mut u = node;
while let Some(&v) = self.climb_nodes[u].last() {
u = v;
}
u
}

/// Given two nodes, returns the lowest common ancestor in the hierarchy.
///
/// If the nodes are not in the same tree, returns `None`.
///
/// Complexity: O(log n)
pub fn lca(&self, a: NodeIndex, b: NodeIndex) -> Option<NodeIndex> {
if self.is_ancestor(a, b) {
return Some(a);
}
if self.is_ancestor(b, a) {
return Some(b);
}
// The nodes are in different trees.
if self.root(a) != self.root(b) {
return None;
}

// Search the ancestors of `a` to find the lowest common ancestor with `b`.
// We start by finding an ancestor of `a` that is not an ancestor of `b`,
// but has an ancestor in its climb nodes. We call this node `u`.
//
// Once we find this, we can do binary search on the nodes to find the LCA.

// Invariant: `u` is an ancestor of `a` (or `a`), but not an ancestor of `b`.
//
// Find a `u` where the last ancestor is an ancestor of `b`.
let mut u = itertools::iterate(Some(a), |u| {
u.and_then(|u| self.climb_nodes[u].last().copied())
})
.take_while(|u| u.is_some_and(|u| !self.is_ancestor(u, b)))
.last()??;

// Invariant: The 2^i ancestor of `u` is an ancestor of `b`.
//
// Do a binary search on the ancestors to find the LCA.
// On each iteration, we start by knowing that the LCA is in the `2^i` ancestors of `u`.
// We then decrement `i` and update `u` if needed.
let mut i = self.climb_nodes[u].len() - 1;
while i > 0 {
i -= 1;
// The updated 2^i ancestor of `u`. This is the middle point of the binary search.
let v = self.climb_nodes[u][i];
if !self.is_ancestor(v, b) {
// The 2^i ancestor of `u` is not an ancestor of `b` so the LCA must be between
// the 2^i and 2^{i+1} ancestor of `u`. Hence update `u` to the 2^i ancestor.
u = v;
// Ensure `i` is within bounds.
i = i.max(self.climb_nodes[u].len() - 1);
}
}

Some(self.climb_nodes[u][0])
}
}

/// States for the depth-first search ran during the precomputation of the
/// LCA data structure.
#[derive(Debug, Clone, Copy, Hash)]
enum DFSState {
/// Visit a node for the first time.
Visit {
node: NodeIndex,
parent: Option<NodeIndex>,
},
/// Return from visiting a node.
Finish { node: NodeIndex },
}

#[cfg(test)]
mod test {
use crate::{PortGraph, PortMut};

use super::*;
use rstest::{fixture, rstest};

/// A simple hierarchy with some nodes and edges.
#[fixture]
fn test_hierarchy() -> (PortGraph, Hierarchy) {
let mut graph = PortGraph::with_capacity(16, 0);
for _ in 0..16 {
graph.add_node(0, 0);
}

let mut hier = Hierarchy::with_capacity(16);

let edges = [
// 0 -> {
// 1 -> {
// 3 -> 4 -> 5 -> 6,
// 7,
// },
// 2 -> 8 -> {9, 10},
// }
(0, 1),
(0, 2),
(1, 3),
(3, 4),
(4, 5),
(5, 6),
(1, 7),
(2, 8),
(8, 9),
(8, 10),
// 11 -> {12, 13}
(11, 12),
(11, 13),
// 14 and 15 are independent nodes.
];
for (parent, node) in edges {
hier.push_child(NodeIndex::new(node), NodeIndex::new(parent))
.unwrap();
}

(graph, hier)
}

#[rstest]
fn lca(test_hierarchy: (PortGraph, Hierarchy)) {
let lca = LCA::new(&test_hierarchy.0, &test_hierarchy.1);

// Little helper to convert node indexes.
let n = NodeIndex::new;

assert_eq!(lca.lca(n(5), n(10)), Some(n(0)));
assert_eq!(lca.lca(n(10), n(5)), Some(n(0)));
assert_eq!(lca.lca(n(6), n(10)), Some(n(0)));
assert_eq!(lca.lca(n(10), n(6)), Some(n(0)));

// Test the roots
for node in 0..=10 {
assert_eq!(lca.root(n(node)), n(0));
}
for node in 11..=13 {
assert_eq!(lca.root(n(node)), n(11));
}
for node in 14..=15 {
assert_eq!(lca.root(n(node)), n(node));
}

// Test the lowest common ancestors
assert_eq!(lca.lca(n(0), n(0)), Some(n(0)));
assert_eq!(lca.lca(n(0), n(1)), Some(n(0)));
assert_eq!(lca.lca(n(0), n(9)), Some(n(0)));
assert_eq!(lca.lca(n(1), n(0)), Some(n(0)));
assert_eq!(lca.lca(n(9), n(0)), Some(n(0)));
assert_eq!(lca.lca(n(0), n(11)), None);
assert_eq!(lca.lca(n(0), n(12)), None);
assert_eq!(lca.lca(n(0), n(14)), None);
assert_eq!(lca.lca(n(11), n(0)), None);
assert_eq!(lca.lca(n(12), n(0)), None);
assert_eq!(lca.lca(n(14), n(0)), None);

assert_eq!(lca.lca(n(14), n(14)), Some(n(14)));
assert_eq!(lca.lca(n(14), n(15)), None);

assert_eq!(lca.lca(n(1), n(2)), Some(n(0)));
assert_eq!(lca.lca(n(7), n(8)), Some(n(0)));
assert_eq!(lca.lca(n(7), n(10)), Some(n(0)));
assert_eq!(lca.lca(n(10), n(7)), Some(n(0)));
assert_eq!(lca.lca(n(5), n(9)), Some(n(0)));
assert_eq!(lca.lca(n(9), n(5)), Some(n(0)));
assert_eq!(lca.lca(n(6), n(9)), Some(n(0)));
assert_eq!(lca.lca(n(9), n(6)), Some(n(0)));

assert_eq!(lca.lca(n(2), n(10)), Some(n(2)));
assert_eq!(lca.lca(n(10), n(2)), Some(n(2)));

assert_eq!(lca.lca(n(9), n(12)), None);
}
}

0 comments on commit a5be34e

Please sign in to comment.