From ea7f3f5f707c9955f3080f6a036094e53fb6f814 Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Sat, 25 Nov 2023 19:04:39 -0800 Subject: [PATCH] rewrite.rs: refactor `new_parents` to depend only on `parent_mapping` Previously, the function relied on both the `self.parent_mapping` and `self.rebased`. If `(A,B)` was in `parent_mapping` and `(B,C)` was in `rebased`, `new_parents` would map `A` to `C`. Now, `self.rebased` is ignored by `new_parents`. In the same situation, DescendantRebaser is changed so that both `(A,B)` and `(B,C)` are in `parent_mapping` before. `new_parents` now applies `parent_mapping` repeatedly, and will map `A` to `C` in this situation. ## Cons - The semantics are changed; `new_parents` now panics if `self.parent_mapping` contain cycles. AFAICT, such cycles never happen in `jj` anyway, except for one test that I had to fix. I think it's a sensible restriction to live with; if you do want to swap children of two commits, you can call `rebase_descendants` twice. ## Pros - I find the new logic much easier to reason about. I plan to extract it into a function, to be used in refactors for `jj rebase -r` and `jj new --after`. It will make it much easier to have a correct implementation of `jj rebase -r --after`, even when rebasing onto a descendant. - The de-duplication is no longer O(n^2). I tried to keep the common case fast. ## Alternatives - We could make `jj rebase` and `jj new` use a separate function with the algorithm shown here, without changing DescendantRebaser. I believe that the new algorithm makes DescendatRebaser easier to understand, though, and it feels more elegant to reduce code duplication. - The de-duplication optimization here is independent of other changes, and could be used on its own. --- Cargo.lock | 1 + lib/Cargo.toml | 1 + lib/src/rewrite.rs | 80 ++++++++++++++++++++++++++------------- lib/tests/test_rewrite.rs | 20 +++------- 4 files changed, 60 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 30dab906fb..374135cae2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1668,6 +1668,7 @@ dependencies = [ "gix", "glob", "hex", + "indexmap", "insta", "itertools 0.11.0", "maplit", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 9f5d65a4e0..f83d80759f 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -33,6 +33,7 @@ git2 = { workspace = true } gix = { workspace = true } glob = { workspace = true } hex = { workspace = true } +indexmap = {workspace = true} itertools = { workspace = true } maplit = { workspace = true } once_cell = { workspace = true } diff --git a/lib/src/rewrite.rs b/lib/src/rewrite.rs index af9e05c019..bc6c0597c3 100644 --- a/lib/src/rewrite.rs +++ b/lib/src/rewrite.rs @@ -18,6 +18,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use futures::StreamExt; +use indexmap::IndexSet; use itertools::Itertools; use pollster::FutureExt; use tracing::instrument; @@ -379,34 +380,49 @@ impl<'settings, 'repo> DescendantRebaser<'settings, 'repo> { &self.rebased } + /// Panics if `parent_mapping` contains cycles fn new_parents(&self, old_ids: &[CommitId]) -> Vec { - // This should be a set, but performance of a vec is much better since we expect - // 99% of commits to have <= 2 parents. - let mut new_ids = vec![]; - let mut add_parent = |id: &CommitId| { - // This can trigger if we abandon an empty commit, as both the empty commit and - // its parent are succeeded by the same commit. - if !new_ids.contains(id) { - new_ids.push(id.clone()); - } - }; - for old_id in old_ids { - if let Some(new_parent_ids) = self.parent_mapping.get(old_id) { - for new_parent_id in new_parent_ids { - // The new parent may itself have been rebased earlier in the process - if let Some(newer_parent_id) = self.rebased.get(new_parent_id) { - add_parent(newer_parent_id); - } else { - add_parent(new_parent_id); + let mut new_ids: Vec = old_ids.into(); + let mut iterations = 0; + loop { + let mut made_replacements = false; + // TODO(ilyagr): (Maybe?) optimize common case of replacements all + // being singletons + let previousy_new_ids = new_ids.split_off(0); + for id in previousy_new_ids.into_iter() { + match self.parent_mapping.get(&id) { + None => new_ids.push(id), + Some(replacements) => { + assert!( + !replacements.is_empty(), + "Found empty value for key {:?} in the parent mapping", + id + ); + made_replacements = true; + new_ids.extend(replacements.iter().cloned()) } - } - } else if let Some(new_parent_id) = self.rebased.get(old_id) { - add_parent(new_parent_id); - } else { - add_parent(old_id); - }; + }; + } + if !made_replacements { + break; + } + iterations += 1; + assert!( + iterations <= self.parent_mapping.len(), + "cycle detected in the parent mapping" + ); + } + match new_ids.as_slice() { + // The first two cases are an optimization for the common case of commits with <=2 + // parents + [_singleton] => new_ids, + [a, b] if a != b => new_ids, + _ => { + // De-duplicate ids while preserving the order + let new_ids_set: IndexSet = new_ids.into_iter().collect(); + new_ids_set.into_iter().collect() + } } - new_ids } fn ref_target_update(old_id: CommitId, new_ids: Vec) -> (RefTarget, RefTarget) { @@ -540,8 +556,18 @@ impl<'settings, 'repo> DescendantRebaser<'settings, 'repo> { &new_parents, &self.options, )?; - self.rebased - .insert(old_commit_id.clone(), new_commit.id().clone()); + assert_eq!( + self.rebased + .insert(old_commit_id.clone(), new_commit.id().clone()), + None, + "Trying to rebase the same commit in two different ways", + ); + assert_eq!( + self.parent_mapping + .insert(old_commit_id.clone(), vec![new_commit.id().clone()]), + None, + "Trying to rebase the same commit in two different ways", + ); self.update_references(old_commit_id, vec![new_commit.id().clone()], true)?; return Ok(Some(RebasedDescendant { old_commit, diff --git a/lib/tests/test_rewrite.rs b/lib/tests/test_rewrite.rs index bbd6718714..d4c362babf 100644 --- a/lib/tests/test_rewrite.rs +++ b/lib/tests/test_rewrite.rs @@ -647,13 +647,14 @@ fn test_rebase_descendants_multiple_sideways() { } #[test] +#[should_panic(expected = "cycle detected")] fn test_rebase_descendants_multiple_swap() { let settings = testutils::user_settings(); let test_repo = TestRepo::init(); let repo = &test_repo.repo; // Commit B was replaced by commit D. Commit D was replaced by commit B. - // Commit C and commit E should swap places. + // This results in an infinite loop and a panic // // C E // B D @@ -663,9 +664,9 @@ fn test_rebase_descendants_multiple_swap() { let mut graph_builder = CommitGraphBuilder::new(&settings, tx.mut_repo()); let commit_a = graph_builder.initial_commit(); let commit_b = graph_builder.commit_with_parents(&[&commit_a]); - let commit_c = graph_builder.commit_with_parents(&[&commit_b]); + let _commit_c = graph_builder.commit_with_parents(&[&commit_b]); let commit_d = graph_builder.commit_with_parents(&[&commit_a]); - let commit_e = graph_builder.commit_with_parents(&[&commit_d]); + let _commit_e = graph_builder.commit_with_parents(&[&commit_d]); let mut rebaser = DescendantRebaser::new( &settings, @@ -676,18 +677,7 @@ fn test_rebase_descendants_multiple_swap() { }, hashset! {}, ); - let new_commit_c = assert_rebased(rebaser.rebase_next().unwrap(), &commit_c, &[&commit_d]); - let new_commit_e = assert_rebased(rebaser.rebase_next().unwrap(), &commit_e, &[&commit_b]); - assert!(rebaser.rebase_next().unwrap().is_none()); - assert_eq!(rebaser.rebased().len(), 2); - - assert_eq!( - *tx.mut_repo().view().heads(), - hashset! { - new_commit_c.id().clone(), - new_commit_e.id().clone() - } - ); + let _ = rebaser.rebase_next(); // Panics because of the cycle } #[test]