Skip to content

Commit

Permalink
annotate: use sorted Vec<(usize, usize)> to propagate lines to ancestors
Browse files Browse the repository at this point in the history
This isn't so complicated compared to the HashMap version, and we can handle
multiple (cur, orig1), (cur, orig2) pairs. It's also cheaper to access.
  • Loading branch information
yuja committed Oct 29, 2024
1 parent 1fd628a commit bd10245
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 22 deletions.
52 changes: 31 additions & 21 deletions lib/src/annotate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::collections::hash_map;
use std::collections::HashMap;

use bstr::BString;
use itertools::Itertools as _;
use pollster::FutureExt;

use crate::backend::BackendError;
Expand Down Expand Up @@ -61,8 +62,8 @@ type CommitSourceMap = HashMap<CommitId, Source>;
#[derive(Clone, Debug)]
struct Source {
/// Mapping of line numbers in the file at the current commit to the
/// original file.
line_map: HashMap<usize, usize>,
/// original file, sorted by the line numbers at the current commit.
line_map: Vec<(usize, usize)>,
/// File content at the current commit.
text: BString,
}
Expand All @@ -72,7 +73,7 @@ impl Source {
let tree = commit.tree()?;
let text = get_file_contents(commit.store(), file_path, &tree)?;
Ok(Source {
line_map: HashMap::new(),
line_map: Vec::new(),
text: text.into(),
})
}
Expand Down Expand Up @@ -194,20 +195,30 @@ fn process_commit(
// commit B. Then, we update local line_map to say that "Commit B line 6
// goes to line 7 of the original file". We repeat this for all lines in
// common in the two commits.
let mut current_lines = current_source.line_map.iter().copied().peekable();
let mut new_current_line_map = Vec::new();
let mut new_parent_line_map = Vec::new();
copy_same_lines_with(
&current_source.text,
&parent_source.text,
|current_line_number, parent_line_number| {
let Some(original_line_number) =
current_source.line_map.remove(&current_line_number)
else {
return;
};
parent_source
.line_map
.insert(parent_line_number, original_line_number);
|current_start, parent_start, count| {
new_current_line_map
.extend(current_lines.peeking_take_while(|&(cur, _)| cur < current_start));
while let Some((current, original)) =
current_lines.next_if(|&(cur, _)| cur < current_start + count)
{
let parent = parent_start + (current - current_start);
new_parent_line_map.push((parent, original));
}
},
);
new_current_line_map.extend(current_lines);
current_source.line_map = new_current_line_map;
parent_source.line_map = if parent_source.line_map.is_empty() {
new_parent_line_map
} else {
itertools::merge(parent_source.line_map.iter().copied(), new_parent_line_map).collect()
};
if parent_source.line_map.is_empty() {
commit_source_map.remove(parent_commit_id);
}
Expand All @@ -216,31 +227,30 @@ fn process_commit(
// Once we've looked at all parents of a commit, any leftover lines must be
// original to the current commit, so we save this information in
// original_line_map.
for &original_line_number in current_source.line_map.values() {
for (_, original_line_number) in current_source.line_map {
original_line_map[original_line_number] = Some(current_commit_id.clone());
}

Ok(())
}

/// For two files, calls `copy(current, parent)` for each line in common
/// (e.g. line 8 maps to line 9)
/// For two files, calls `copy(current_start, parent_start, count)` for each
/// range of contiguous lines in common (e.g. line 8-10 maps to line 9-11.)
fn copy_same_lines_with(
current_contents: &[u8],
parent_contents: &[u8],
mut copy: impl FnMut(usize, usize),
mut copy: impl FnMut(usize, usize, usize),
) {
let diff = Diff::by_line([current_contents, parent_contents]);
let mut current_line_counter: usize = 0;
let mut parent_line_counter: usize = 0;
for hunk in diff.hunks() {
match hunk.kind {
DiffHunkKind::Matching => {
for _ in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
copy(current_line_counter, parent_line_counter);
current_line_counter += 1;
parent_line_counter += 1;
}
let count = hunk.contents[0].split_inclusive(|b| *b == b'\n').count();
copy(current_line_counter, parent_line_counter, count);
current_line_counter += count;
parent_line_counter += count;
}
DiffHunkKind::Different => {
let current_output = hunk.contents[0];
Expand Down
57 changes: 56 additions & 1 deletion lib/tests/test_annotate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,59 @@ fn test_annotate_merge_split() {
}

#[test]
#[should_panic] // FIXME: shouldn't panic because of duplicated "1"s
fn test_annotate_merge_split_interleaved() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
let repo = &test_repo.repo;

let root_commit_id = repo.store().root_commit_id();
let file_path = RepoPath::from_internal_string("file");

// 6 "1a 4 1b 6 2a 5 2b"
// |\
// | 5 "1b 5 2b"
// | |
// 4 | "1a 4 2a"
// |/
// 3 "1a 1b 2a 2b"
// |\
// | 2 "2a 2b"
// |
// 1 "1a 1b"
let mut tx = repo.start_transaction(&settings);
let mut create_commit = create_commit_fn(tx.repo_mut(), &settings);
let content1 = "1a\n1b\n";
let content2 = "2a\n2b\n";
let content3 = "1a\n1b\n2a\n2b\n";
let content4 = "1a\n4\n2a\n";
let content5 = "1b\n5\n2b\n";
let content6 = "1a\n4\n1b\n6\n2a\n5\n2b\n";
let tree1 = create_tree(repo, &[(file_path, content1)]);
let tree2 = create_tree(repo, &[(file_path, content2)]);
let tree3 = create_tree(repo, &[(file_path, content3)]);
let tree4 = create_tree(repo, &[(file_path, content4)]);
let tree5 = create_tree(repo, &[(file_path, content5)]);
let tree6 = create_tree(repo, &[(file_path, content6)]);
let commit1 = create_commit("commit1", &[root_commit_id], tree1.id());
let commit2 = create_commit("commit2", &[root_commit_id], tree2.id());
let commit3 = create_commit("commit3", &[commit1.id(), commit2.id()], tree3.id());
let commit4 = create_commit("commit4", &[commit3.id()], tree4.id());
let commit5 = create_commit("commit5", &[commit3.id()], tree5.id());
let commit6 = create_commit("commit6", &[commit4.id(), commit5.id()], tree6.id());
drop(create_commit);

insta::assert_snapshot!(annotate(tx.repo(), &commit6, file_path), @r#"
commit1: 1a
commit4: 4
commit1: 1b
commit6: 6
commit2: 2a
commit5: 5
commit2: 2b
"#);
}

#[test]
fn test_annotate_merge_dup() {
let settings = testutils::user_settings();
let test_repo = TestRepo::init();
Expand Down Expand Up @@ -215,6 +267,9 @@ fn test_annotate_merge_dup() {
let commit4 = create_commit("commit4", &[commit2.id(), commit3.id()], tree4.id());
drop(create_commit);

// Both "1"s can be propagated to commit1 through commit2 and commit3.
// Alternatively, it's also good to interpret that one of the "1"s was
// produced at commit2, commit3, or commit4.
insta::assert_snapshot!(annotate(tx.repo(), &commit4, file_path), @r#"
commit2: 2
commit1: 1
Expand Down

0 comments on commit bd10245

Please sign in to comment.