Skip to content

Commit

Permalink
prefixes: allow resolving shorter ids within a revset
Browse files Browse the repository at this point in the history
In large repos, the unique prefixes can get somewhat long (~6 hex
digits seems typical in the Linux repo), which makes them less useful
for manually entering on the CLI. The user typically cares most about
a small set of commits, so it would be nice to give shorter unique ids
to those. That's what Mercurial enables with its
`experimental.revisions.disambiguatewithin` config. This commit
provides an implementation of that feature in `IdPrefixContext`.

In very large repos, it can also be slow to calculate the unique
prefixes, especially if it involves a request to a server. This
feature becomes much more important in such repos.
  • Loading branch information
martinvonz committed May 12, 2023
1 parent f3ebcaa commit 21d2a12
Show file tree
Hide file tree
Showing 2 changed files with 307 additions and 3 deletions.
109 changes: 106 additions & 3 deletions lib/src/id_prefix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,138 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::rc::Rc;

use once_cell::unsync::OnceCell;

use crate::backend::{self, ChangeId, CommitId, ObjectId};
use crate::index::{HexPrefix, PrefixResolution};
use crate::op_store::WorkspaceId;
use crate::repo::Repo;
use crate::revset::{DefaultSymbolResolver, RevsetExpression, RevsetIteratorExt};

struct PrefixDisambiguationError;

struct DisambiguationData {
expression: Rc<RevsetExpression>,
workspace_id: Option<WorkspaceId>,
// TODO: We shouldn't have to duplicate the CommitId as value
indexes: OnceCell<Indexes>,
}

struct Indexes {
commit_index: IdIndex<CommitId, CommitId>,
change_index: IdIndex<ChangeId, CommitId>,
}

impl DisambiguationData {
fn indexes(&self, repo: &dyn Repo) -> Result<&Indexes, PrefixDisambiguationError> {
self.indexes.get_or_try_init(|| {
let symbol_resolver = DefaultSymbolResolver::new(repo, self.workspace_id.as_ref());
let resolved_expression = self
.expression
.clone()
.resolve_user_expression(repo, &symbol_resolver)
.map_err(|_| PrefixDisambiguationError)?;
let revset = resolved_expression
.evaluate(repo)
.map_err(|_| PrefixDisambiguationError)?;

// TODO: We should be able to get the change IDs from the revset, without having
// to read the whole commit objects
let mut commit_id_vec = vec![];
let mut change_id_vec = vec![];
for commit in revset.iter().commits(repo.store()) {
let commit = commit.map_err(|_| PrefixDisambiguationError)?;
commit_id_vec.push((commit.id().clone(), commit.id().clone()));
change_id_vec.push((commit.change_id().clone(), commit.id().clone()));
}
Ok(Indexes {
commit_index: IdIndex::from_vec(commit_id_vec),
change_index: IdIndex::from_vec(change_id_vec),
})
})
}
}

pub struct IdPrefixContext<'repo> {
repo: &'repo dyn Repo,
disambiguation: Option<DisambiguationData>,
}

impl IdPrefixContext<'_> {
pub fn new(repo: &dyn Repo) -> IdPrefixContext {
IdPrefixContext { repo }
IdPrefixContext {
repo,
disambiguation: None,
}
}

pub fn disambiguate_within(
mut self,
expression: Rc<RevsetExpression>,
workspace_id: Option<WorkspaceId>,
) -> Self {
self.disambiguation = Some(DisambiguationData {
workspace_id,
expression,
indexes: OnceCell::new(),
});
self
}

fn disambiguation_indexes(&self) -> Option<&Indexes> {
// TODO: propagate errors instead of treating them as if no revset was specified
self.disambiguation
.as_ref()
.and_then(|disambiguation| disambiguation.indexes(self.repo).ok())
}

/// Resolve an unambiguous commit ID prefix.
pub fn resolve_commit_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
if let Some(indexes) = self.disambiguation_indexes() {
let resolution = indexes.commit_index.resolve_prefix(prefix);
if let PrefixResolution::SingleMatch(mut ids) = resolution {
assert_eq!(ids.len(), 1);
return PrefixResolution::SingleMatch(ids.pop().unwrap());
}
}
self.repo.index().resolve_prefix(prefix)
}

/// Returns the shortest length of a prefix of `commit_id` that
/// can still be resolved by `resolve_commit_prefix()`.
pub fn shortest_commit_prefix_len(&self, commit_id: &CommitId) -> usize {
if let Some(indexes) = self.disambiguation_indexes() {
// TODO: Avoid the double lookup here (has_key() + shortest_unique_prefix_len())
if indexes.commit_index.has_key(commit_id) {
return indexes.commit_index.shortest_unique_prefix_len(commit_id);
}
}
self.repo
.index()
.shortest_unique_commit_id_prefix_len(commit_id)
}

/// Resolve an unambiguous change ID prefix to the commit IDs in the revset.
pub fn resolve_change_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> {
if let Some(indexes) = self.disambiguation_indexes() {
let resolution = indexes.change_index.resolve_prefix(prefix);
if let PrefixResolution::SingleMatch(ids) = resolution {
return PrefixResolution::SingleMatch(ids);
}
}
self.repo.resolve_change_id_prefix(prefix)
}

/// Returns the shortest length of a prefix of `change_id` that
/// can still be resolved by `resolve_change_prefix()`.
pub fn shortest_change_prefix_len(&self, change_id: &ChangeId) -> usize {
if let Some(indexes) = self.disambiguation_indexes() {
if indexes.change_index.has_key(change_id) {
return indexes.change_index.shortest_unique_prefix_len(change_id);
}
}
self.repo.shortest_unique_change_id_prefix_len(change_id)
}
}
Expand All @@ -71,6 +169,10 @@ where
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&V) -> U,
) -> PrefixResolution<Vec<U>> {
if prefix.min_prefix_bytes().is_empty() {
// We consider an empty prefix ambiguous even if the index has a single entry.
return PrefixResolution::AmbiguousMatch;
}
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_entries: Option<Vec<_>> = range
Expand Down Expand Up @@ -135,7 +237,8 @@ where
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
.unwrap_or(0)
// Even if the key is the only one in the index, we require at least one digit.
.unwrap_or(1)
}
}

Expand Down Expand Up @@ -214,7 +317,7 @@ mod tests {
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0
1
);

let id_index = IdIndex::from_vec(vec![
Expand Down
201 changes: 201 additions & 0 deletions lib/tests/test_id_prefix.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// Copyright 2023 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use itertools::Itertools;
use jujutsu_lib::backend::{CommitId, MillisSinceEpoch, ObjectId, Signature, Timestamp};
use jujutsu_lib::id_prefix::IdPrefixContext;
use jujutsu_lib::index::HexPrefix;
use jujutsu_lib::index::PrefixResolution::{AmbiguousMatch, NoMatch, SingleMatch};
use jujutsu_lib::repo::Repo;
use jujutsu_lib::revset::RevsetExpression;
use testutils::TestRepo;

#[test]
fn test_id_prefix() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init(true);
let repo = &test_repo.repo;
let root_commit_id = repo.store().root_commit_id();
let root_change_id = repo.store().root_change_id();

let mut tx = repo.start_transaction(&settings, "test");
let mut create_commit = |parent_id: &CommitId| {
let signature = Signature {
name: "Some One".to_string(),
email: "[email protected]".to_string(),
timestamp: Timestamp {
timestamp: MillisSinceEpoch(0),
tz_offset: 0,
},
};
tx.mut_repo()
.new_commit(
&settings,
vec![parent_id.clone()],
repo.store().empty_tree_id().clone(),
)
.set_author(signature.clone())
.set_committer(signature)
.write()
.unwrap()
};
let mut commits = vec![create_commit(root_commit_id)];
for _ in 0..25 {
commits.push(create_commit(commits.last().unwrap().id()));
}
let repo = tx.commit();

// Print the commit IDs and change IDs for reference
let commit_prefixes = commits
.iter()
.enumerate()
.map(|(i, commit)| format!("{} {}", &commit.id().hex()[..3], i))
.sorted()
.join("\n");
insta::assert_snapshot!(commit_prefixes, @r###"
11a 5
214 24
2a6 2
33e 14
3be 16
3ea 18
593 20
5d3 1
5f6 13
676 3
7b6 25
7da 9
81c 10
87e 12
997 21
9f7 22
a0e 4
a55 19
ac4 23
c18 17
ce9 0
d42 6
d9d 8
eec 15
efe 7
fa3 11
"###);
let change_prefixes = commits
.iter()
.enumerate()
.map(|(i, commit)| format!("{} {}", &commit.change_id().hex()[..3], i))
.sorted()
.join("\n");
insta::assert_snapshot!(change_prefixes, @r###"
026 9
030 13
1b5 6
26b 3
26c 8
271 10
439 2
44a 17
4e9 16
5b2 23
6c2 19
781 0
79f 14
7d5 24
86b 20
871 7
896 5
9e4 18
a2c 1
a63 22
b19 11
b93 4
bf5 21
c24 15
d64 12
fee 25
"###);

let prefix = |x| HexPrefix::new(x).unwrap();

// Without a disambiguation revset
// ---------------------------------------------------------------------------------------------
let c = IdPrefixContext::new(repo.as_ref());
assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 2);
assert_eq!(c.shortest_commit_prefix_len(commits[5].id()), 1);
assert_eq!(c.resolve_commit_prefix(&prefix("2")), AmbiguousMatch);
assert_eq!(
c.resolve_commit_prefix(&prefix("2a")),
SingleMatch(commits[2].id().clone())
);
assert_eq!(c.resolve_commit_prefix(&prefix("20")), NoMatch);
assert_eq!(c.resolve_commit_prefix(&prefix("2a0")), NoMatch);
assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 2);
assert_eq!(c.shortest_change_prefix_len(commits[6].change_id()), 1);
assert_eq!(c.resolve_change_prefix(&prefix("7")), AmbiguousMatch);
assert_eq!(
c.resolve_change_prefix(&prefix("78")),
SingleMatch(vec![commits[0].id().clone()])
);
assert_eq!(c.resolve_change_prefix(&prefix("70")), NoMatch);
assert_eq!(c.resolve_change_prefix(&prefix("780")), NoMatch);

// Disambiguate within a revset
// ---------------------------------------------------------------------------------------------
let expression =
RevsetExpression::commits(vec![commits[0].id().clone(), commits[2].id().clone()]);
let c = c.disambiguate_within(expression, None);
// The prefix is now shorter
assert_eq!(c.shortest_commit_prefix_len(commits[2].id()), 1);
// Shorter prefix within the set can be used
assert_eq!(
c.resolve_commit_prefix(&prefix("2")),
SingleMatch(commits[2].id().clone())
);
// Can still resolve commits outside the set
assert_eq!(
c.resolve_commit_prefix(&prefix("21")),
SingleMatch(commits[24].id().clone())
);
assert_eq!(c.shortest_change_prefix_len(commits[0].change_id()), 1);
assert_eq!(
c.resolve_change_prefix(&prefix("7")),
SingleMatch(vec![commits[0].id().clone()])
);

// Single commit in revset. Length 0 is unambiguous, but we pretend 1 digit is
// needed.
// ---------------------------------------------------------------------------------------------
let expression = RevsetExpression::commit(root_commit_id.clone());
let c = c.disambiguate_within(expression, None);
assert_eq!(c.shortest_commit_prefix_len(root_commit_id), 1);
assert_eq!(c.resolve_commit_prefix(&prefix("")), AmbiguousMatch);
assert_eq!(
c.resolve_commit_prefix(&prefix("0")),
SingleMatch(root_commit_id.clone())
);
assert_eq!(c.shortest_change_prefix_len(root_change_id), 1);
assert_eq!(c.resolve_change_prefix(&prefix("")), AmbiguousMatch);
assert_eq!(
c.resolve_change_prefix(&prefix("0")),
SingleMatch(vec![root_commit_id.clone()])
);

// Disambiguate within revset that fails to evaluate
// ---------------------------------------------------------------------------------------------
// TODO: Should be an error
let expression = RevsetExpression::symbol("nonexistent".to_string());
let context = c.disambiguate_within(expression, None);
assert_eq!(context.shortest_commit_prefix_len(commits[2].id()), 2);
assert_eq!(context.resolve_commit_prefix(&prefix("2")), AmbiguousMatch);
}

0 comments on commit 21d2a12

Please sign in to comment.