revset_graph: place new heads as close to fork point as possible

The idea is simple. New heads are ignored until the node dependency resolution stuck. Then, only heads that will unblock the visit will be queued. Closes jj-vcs#242
yuja · Jul 23, 2023 · 564dc27 · 564dc27
1 parent 0fc9f65
commit 564dc27
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 31 deletions.
diff --git a/lib/src/revset_graph.rs b/lib/src/revset_graph.rs
@@ -15,6 +15,7 @@
 #![allow(missing_docs)]
 
 use std::collections::{HashMap, HashSet};
+use std::mem;
 
 use crate::backend::CommitId;
 
@@ -107,6 +108,10 @@ impl Iterator for ReverseRevsetGraphIterator {
 /// branches will be visited. At merge point, the second (or the last) ancestor
 /// branch will be visited first. This is practically [the same as Git][Git].
 ///
+/// The branch containing the first commit in the input iterator will be emitted
+/// first. It is often the working-copy ancestor branch. The other head branches
+/// won't be enqueued eagerly, and will be emitted as late as possible.
+///
 /// [Git]: https://github.blog/2022-08-30-gits-database-internals-ii-commit-history-queries/#topological-sorting
 #[derive(Clone, Debug)]
 pub struct TopoGroupedRevsetGraphIterator<I> {
@@ -117,6 +122,8 @@ pub struct TopoGroupedRevsetGraphIterator<I> {
     emittable_ids: Vec<CommitId>,
     /// List of new head nodes found while processing unpopulated nodes.
     new_head_ids: Vec<CommitId>,
+    /// Set of nodes which may be ancestors of `new_head_ids`.
+    blocked_ids: HashSet<CommitId>,
 }
 
 #[derive(Clone, Debug, Default)]
@@ -139,6 +146,7 @@ where
             nodes: HashMap::new(),
             emittable_ids: Vec::new(),
             new_head_ids: Vec::new(),
+            blocked_ids: HashSet::new(),
         }
     }
 
@@ -169,6 +177,29 @@ where
         Some(())
     }
 
+    fn flush_new_heads(&mut self) {
+        assert!(!self.new_head_ids.is_empty());
+        if self.blocked_ids.is_empty() || self.new_head_ids.len() <= 1 {
+            // Orphaned (or no choice), pick the first head
+            self.emittable_ids.push(self.new_head_ids.remove(0));
+        } else {
+            // Queue heads which are reachable from the emittable sub graph
+            let mut to_visit: Vec<&CommitId> = self.blocked_ids.iter().collect();
+            let mut reached: HashSet<&CommitId> = self.blocked_ids.iter().collect();
+            while let Some(id) = to_visit.pop() {
+                let node = self.nodes.get(id).unwrap();
+                to_visit.extend(node.child_ids.iter().filter(|id| reached.insert(id)));
+            }
+            // TODO: Use drain_filter() if gets stabilized
+            let (new_reachable, new_unreachable) = mem::take(&mut self.new_head_ids)
+                .into_iter()
+                .partition::<Vec<_>, _>(|id| reached.contains(id));
+            assert!(!new_reachable.is_empty(), "blocking head should exist");
+            self.emittable_ids.extend(new_reachable.into_iter().rev());
+            self.new_head_ids = new_unreachable;
+        }
+    }
+
     #[must_use]
     fn next_node(&mut self) -> Option<(CommitId, Vec<RevsetGraphEdge>)> {
         // Based on Kahn's algorithm
@@ -177,7 +208,8 @@ where
                 let current_node = self.nodes.get_mut(current_id).unwrap();
                 if !current_node.child_ids.is_empty() {
                     // New children populated after emitting the other
-                    self.emittable_ids.pop().unwrap();
+                    let current_id = self.emittable_ids.pop().unwrap();
+                    self.blocked_ids.insert(current_id);
                     continue;
                 }
                 let Some(edges) = current_node.edges.take() else {
@@ -192,12 +224,16 @@ where
                     let parent_node = self.nodes.get_mut(parent_id).unwrap();
                     parent_node.child_ids.remove(&current_id);
                     if parent_node.child_ids.is_empty() {
-                        self.emittable_ids.push(parent_id.clone());
+                        let reusable_id = self.blocked_ids.take(parent_id);
+                        let parent_id = reusable_id.unwrap_or_else(|| parent_id.clone());
+                        self.emittable_ids.push(parent_id);
+                    } else {
+                        self.blocked_ids.insert(parent_id.clone());
                     }
                 }
                 return Some((current_id, edges));
             } else if !self.new_head_ids.is_empty() {
-                self.emittable_ids.extend(self.new_head_ids.drain(..).rev());
+                self.flush_new_heads();
             } else {
                 // Populate the first or orphan head
                 self.populate_one()?;
@@ -369,19 +405,29 @@ mod tests {
         A
 
         "###);
-        // TODO
+        // D-A is found earlier than B-A, but B is emitted first because it belongs to
+        // the emitting branch.
         insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
         E  direct(B)
         │
+        │ C  direct(B)
+        ├─╯
+        B  direct(A)
+        │
         │ D  direct(A)
-        │ │
-        │ │ C  direct(B)
-        ├───╯
-        B │  direct(A)
         ├─╯
         A
 
         "###);
+
+        // E can be lazy, then D and C will be queued.
+        let mut iter = topo_grouped(graph.iter().cloned().peekable());
+        assert_eq!(iter.next().unwrap().0, id('E'));
+        assert_eq!(iter.input_iter.peek().unwrap().0, id('D'));
+        assert_eq!(iter.next().unwrap().0, id('C'));
+        assert_eq!(iter.input_iter.peek().unwrap().0, id('B'));
+        assert_eq!(iter.next().unwrap().0, id('B'));
+        assert_eq!(iter.input_iter.peek().unwrap().0, id('A'));
     }
 
     #[test]
@@ -466,26 +512,98 @@ mod tests {
         A
 
         "###);
-        // TODO
         insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
         I  direct(E)
         │
+        │ F  direct(E)
+        ├─╯
+        E  direct(C)
+        │
+        │ H  direct(C)
+        ├─╯
+        │ D  direct(C)
+        ├─╯
+        C  direct(A)
+        │
+        │ G  direct(A)
+        ├─╯
+        │ B  direct(A)
+        ├─╯
+        A
+
+        "###);
+
+        // I can be lazy, then H, G, and F will be queued.
+        let mut iter = topo_grouped(graph.iter().cloned().peekable());
+        assert_eq!(iter.next().unwrap().0, id('I'));
+        assert_eq!(iter.input_iter.peek().unwrap().0, id('H'));
+        assert_eq!(iter.next().unwrap().0, id('F'));
+        assert_eq!(iter.input_iter.peek().unwrap().0, id('E'));
+    }
+
+    #[test]
+    fn test_topo_grouped_fork_parallel() {
+        let graph = vec![
+            // Pull all sub graphs in reverse order:
+            (id('I'), vec![direct('A')]),
+            (id('H'), vec![direct('C')]),
+            (id('G'), vec![direct('E')]),
+            // Orphan sub graph G,F-E:
+            (id('F'), vec![direct('E')]),
+            (id('E'), vec![missing('Y')]),
+            // Orphan sub graph H,D-C:
+            (id('D'), vec![direct('C')]),
+            (id('C'), vec![missing('X')]),
+            // Orphan sub graph I,B-A:
+            (id('B'), vec![direct('A')]),
+            (id('A'), vec![]),
+        ];
+        insta::assert_snapshot!(format_graph(graph.iter().cloned()), @r###"
+        I  direct(A)
+        │
         │ H  direct(C)
         │ │
-        │ │ G  direct(A)
+        │ │ G  direct(E)
         │ │ │
         │ │ │ F  direct(E)
-        ├─────╯
-        E │ │  direct(C)
-        ├─╯ │
-        │ D │  direct(C)
-        ├─╯ │
-        C   │  direct(A)
-        ├───╯
+        │ │ ├─╯
+        │ │ E  missing(Y)
+        │ │ │
+        │ │ ~
+        │ │
+        │ │ D  direct(C)
+        │ ├─╯
+        │ C  missing(X)
+        │ │
+        │ ~
+        │
+        │ B  direct(A)
+        ├─╯
+        A
+
+        "###);
+        insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
+        I  direct(A)
+        │
         │ B  direct(A)
         ├─╯
         A
 
+        H  direct(C)
+        │
+        │ D  direct(C)
+        ├─╯
+        C  missing(X)
+        │
+        ~
+
+        G  direct(E)
+        │
+        │ F  direct(E)
+        ├─╯
+        E  missing(Y)
+        │
+        ~
         "###);
     }
 

diff --git a/tests/test_abandon_command.rs b/tests/test_abandon_command.rs
@@ -84,11 +84,11 @@ fn test_rebase_branch_with_merge() {
     "###);
     insta::assert_snapshot!(get_log_output(&test_env, &repo_path), @r###"
     @
+    │ ◉  b
+    ├─╯
+    ◉  a e??
     │ ◉  d e??
     │ ◉  c
-    │ │ ◉  b
-    ├───╯
-    ◉ │  a e??
     ├─╯
     ◉
     "###);

diff --git a/tests/test_duplicate_command.rs b/tests/test_duplicate_command.rs
@@ -195,16 +195,16 @@ fn test_duplicate_many() {
     ◉    9bd4389f5d47   e
     ├─╮
     ◉ │  d94e4c55a68b   d
-    │ │ ◉  c6f7f8c4512e   a
-    │ │ │ @  921dde6e55c0   e
-    │ ╭───┤
-    │ ◉ │ │  1394f625cbbd   b
-    │ │ │ ◉  ebd06dba20ec   d
-    ├─────╯
-    ◉ │ │  c0cb3a0b73e7   c
-    ├─╯ │
-    ◉   │  2443ea76b0b1   a
+    │ │ @  921dde6e55c0   e
+    │ ╭─┤
+    │ ◉ │  1394f625cbbd   b
+    │ │ ◉  ebd06dba20ec   d
     ├───╯
+    ◉ │  c0cb3a0b73e7   c
+    ├─╯
+    ◉  2443ea76b0b1   a
+    │ ◉  c6f7f8c4512e   a
+    ├─╯
     ◉  000000000000
     "###);
 

diff --git a/tests/test_templater.rs b/tests/test_templater.rs
@@ -67,11 +67,11 @@ fn test_templater_branches() {
     let output = test_env.jj_cmd_success(&workspace_root, &["log", "-T", template]);
     insta::assert_snapshot!(output, @r###"
     ◉  b1bb3766d584 branch3??
+    │ ◉  21c33875443e branch1*
+    ├─╯
     │ @  a5b4d15489cc branch2* new-branch
     │ ◉  8476341eb395 branch2@origin
     ├─╯
-    │ ◉  21c33875443e branch1*
-    ├─╯
     ◉  000000000000
     "###);
 }