Skip to content

Commit

Permalink
revset_graph: place new heads as close to fork point as possible
Browse files Browse the repository at this point in the history
The idea is simple. New heads are ignored until the node dependency resolution
stuck. Then, only heads that will unblock the visit will be queued.

Closes jj-vcs#242
  • Loading branch information
yuja committed Jul 23, 2023
1 parent 0fc9f65 commit 564dc27
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 31 deletions.
152 changes: 135 additions & 17 deletions lib/src/revset_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#![allow(missing_docs)]

use std::collections::{HashMap, HashSet};
use std::mem;

use crate::backend::CommitId;

Expand Down Expand Up @@ -107,6 +108,10 @@ impl Iterator for ReverseRevsetGraphIterator {
/// branches will be visited. At merge point, the second (or the last) ancestor
/// branch will be visited first. This is practically [the same as Git][Git].
///
/// The branch containing the first commit in the input iterator will be emitted
/// first. It is often the working-copy ancestor branch. The other head branches
/// won't be enqueued eagerly, and will be emitted as late as possible.
///
/// [Git]: https://github.blog/2022-08-30-gits-database-internals-ii-commit-history-queries/#topological-sorting
#[derive(Clone, Debug)]
pub struct TopoGroupedRevsetGraphIterator<I> {
Expand All @@ -117,6 +122,8 @@ pub struct TopoGroupedRevsetGraphIterator<I> {
emittable_ids: Vec<CommitId>,
/// List of new head nodes found while processing unpopulated nodes.
new_head_ids: Vec<CommitId>,
/// Set of nodes which may be ancestors of `new_head_ids`.
blocked_ids: HashSet<CommitId>,
}

#[derive(Clone, Debug, Default)]
Expand All @@ -139,6 +146,7 @@ where
nodes: HashMap::new(),
emittable_ids: Vec::new(),
new_head_ids: Vec::new(),
blocked_ids: HashSet::new(),
}
}

Expand Down Expand Up @@ -169,6 +177,29 @@ where
Some(())
}

fn flush_new_heads(&mut self) {
assert!(!self.new_head_ids.is_empty());
if self.blocked_ids.is_empty() || self.new_head_ids.len() <= 1 {
// Orphaned (or no choice), pick the first head
self.emittable_ids.push(self.new_head_ids.remove(0));
} else {
// Queue heads which are reachable from the emittable sub graph
let mut to_visit: Vec<&CommitId> = self.blocked_ids.iter().collect();
let mut reached: HashSet<&CommitId> = self.blocked_ids.iter().collect();
while let Some(id) = to_visit.pop() {
let node = self.nodes.get(id).unwrap();
to_visit.extend(node.child_ids.iter().filter(|id| reached.insert(id)));
}
// TODO: Use drain_filter() if gets stabilized
let (new_reachable, new_unreachable) = mem::take(&mut self.new_head_ids)
.into_iter()
.partition::<Vec<_>, _>(|id| reached.contains(id));
assert!(!new_reachable.is_empty(), "blocking head should exist");
self.emittable_ids.extend(new_reachable.into_iter().rev());
self.new_head_ids = new_unreachable;
}
}

#[must_use]
fn next_node(&mut self) -> Option<(CommitId, Vec<RevsetGraphEdge>)> {
// Based on Kahn's algorithm
Expand All @@ -177,7 +208,8 @@ where
let current_node = self.nodes.get_mut(current_id).unwrap();
if !current_node.child_ids.is_empty() {
// New children populated after emitting the other
self.emittable_ids.pop().unwrap();
let current_id = self.emittable_ids.pop().unwrap();
self.blocked_ids.insert(current_id);
continue;
}
let Some(edges) = current_node.edges.take() else {
Expand All @@ -192,12 +224,16 @@ where
let parent_node = self.nodes.get_mut(parent_id).unwrap();
parent_node.child_ids.remove(&current_id);
if parent_node.child_ids.is_empty() {
self.emittable_ids.push(parent_id.clone());
let reusable_id = self.blocked_ids.take(parent_id);
let parent_id = reusable_id.unwrap_or_else(|| parent_id.clone());
self.emittable_ids.push(parent_id);
} else {
self.blocked_ids.insert(parent_id.clone());
}
}
return Some((current_id, edges));
} else if !self.new_head_ids.is_empty() {
self.emittable_ids.extend(self.new_head_ids.drain(..).rev());
self.flush_new_heads();
} else {
// Populate the first or orphan head
self.populate_one()?;
Expand Down Expand Up @@ -369,19 +405,29 @@ mod tests {
A
"###);
// TODO
// D-A is found earlier than B-A, but B is emitted first because it belongs to
// the emitting branch.
insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
E direct(B)
│ C direct(B)
├─╯
B direct(A)
│ D direct(A)
│ │
│ │ C direct(B)
├───╯
B │ direct(A)
├─╯
A
"###);

// E can be lazy, then D and C will be queued.
let mut iter = topo_grouped(graph.iter().cloned().peekable());
assert_eq!(iter.next().unwrap().0, id('E'));
assert_eq!(iter.input_iter.peek().unwrap().0, id('D'));
assert_eq!(iter.next().unwrap().0, id('C'));
assert_eq!(iter.input_iter.peek().unwrap().0, id('B'));
assert_eq!(iter.next().unwrap().0, id('B'));
assert_eq!(iter.input_iter.peek().unwrap().0, id('A'));
}

#[test]
Expand Down Expand Up @@ -466,26 +512,98 @@ mod tests {
A
"###);
// TODO
insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
I direct(E)
│ F direct(E)
├─╯
E direct(C)
│ H direct(C)
├─╯
│ D direct(C)
├─╯
C direct(A)
│ G direct(A)
├─╯
│ B direct(A)
├─╯
A
"###);

// I can be lazy, then H, G, and F will be queued.
let mut iter = topo_grouped(graph.iter().cloned().peekable());
assert_eq!(iter.next().unwrap().0, id('I'));
assert_eq!(iter.input_iter.peek().unwrap().0, id('H'));
assert_eq!(iter.next().unwrap().0, id('F'));
assert_eq!(iter.input_iter.peek().unwrap().0, id('E'));
}

#[test]
fn test_topo_grouped_fork_parallel() {
let graph = vec![
// Pull all sub graphs in reverse order:
(id('I'), vec![direct('A')]),
(id('H'), vec![direct('C')]),
(id('G'), vec![direct('E')]),
// Orphan sub graph G,F-E:
(id('F'), vec![direct('E')]),
(id('E'), vec![missing('Y')]),
// Orphan sub graph H,D-C:
(id('D'), vec![direct('C')]),
(id('C'), vec![missing('X')]),
// Orphan sub graph I,B-A:
(id('B'), vec![direct('A')]),
(id('A'), vec![]),
];
insta::assert_snapshot!(format_graph(graph.iter().cloned()), @r###"
I direct(A)
│ H direct(C)
│ │
│ │ G direct(A)
│ │ G direct(E)
│ │ │
│ │ │ F direct(E)
├─────╯
E │ │ direct(C)
├─╯ │
│ D │ direct(C)
├─╯ │
C │ direct(A)
├───╯
│ │ ├─╯
│ │ E missing(Y)
│ │ │
│ │ ~
│ │
│ │ D direct(C)
│ ├─╯
│ C missing(X)
│ │
│ ~
│ B direct(A)
├─╯
A
"###);
insta::assert_snapshot!(format_graph(topo_grouped(graph.iter().cloned())), @r###"
I direct(A)
│ B direct(A)
├─╯
A
H direct(C)
│ D direct(C)
├─╯
C missing(X)
~
G direct(E)
│ F direct(E)
├─╯
E missing(Y)
~
"###);
}

Expand Down
6 changes: 3 additions & 3 deletions tests/test_abandon_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ fn test_rebase_branch_with_merge() {
"###);
insta::assert_snapshot!(get_log_output(&test_env, &repo_path), @r###"
@
│ ◉ b
├─╯
◉ a e??
│ ◉ d e??
│ ◉ c
│ │ ◉ b
├───╯
◉ │ a e??
├─╯
"###);
Expand Down
18 changes: 9 additions & 9 deletions tests/test_duplicate_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,16 +195,16 @@ fn test_duplicate_many() {
◉ 9bd4389f5d47 e
├─╮
◉ │ d94e4c55a68b d
│ │ ◉ c6f7f8c4512e a
│ │ │ @ 921dde6e55c0 e
│ ╭───┤
│ ◉ │ │ 1394f625cbbd b
│ │ │ ◉ ebd06dba20ec d
├─────╯
◉ │ │ c0cb3a0b73e7 c
├─╯ │
◉ │ 2443ea76b0b1 a
│ │ @ 921dde6e55c0 e
│ ╭─┤
│ ◉ │ 1394f625cbbd b
│ │ ◉ ebd06dba20ec d
├───╯
◉ │ c0cb3a0b73e7 c
├─╯
◉ 2443ea76b0b1 a
│ ◉ c6f7f8c4512e a
├─╯
◉ 000000000000
"###);

Expand Down
4 changes: 2 additions & 2 deletions tests/test_templater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ fn test_templater_branches() {
let output = test_env.jj_cmd_success(&workspace_root, &["log", "-T", template]);
insta::assert_snapshot!(output, @r###"
◉ b1bb3766d584 branch3??
│ ◉ 21c33875443e branch1*
├─╯
│ @ a5b4d15489cc branch2* new-branch
│ ◉ 8476341eb395 branch2@origin
├─╯
│ ◉ 21c33875443e branch1*
├─╯
◉ 000000000000
"###);
}
Expand Down

0 comments on commit 564dc27

Please sign in to comment.