Skip to content

Commit

Permalink
git: do not import unknown commits by read_commit(), use explicit met…
Browse files Browse the repository at this point in the history
…hod call

The main goal of this change is to enable tree-level conflict format, but it
also allows us to bulk-import commits on clone/init. I think a separate method
will help if we want to provide progress information, enable check for
.jjconflict entries under certain condition, etc.

Since git::import_refs() now depends on GitBackend type, it might be better to
remove git_repo from the function arguments.
  • Loading branch information
yuja committed Sep 8, 2023
1 parent dde1c1e commit b726657
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 56 deletions.
31 changes: 17 additions & 14 deletions lib/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use tempfile::NamedTempFile;
use thiserror::Error;

use crate::backend::{BackendError, CommitId, ObjectId};
use crate::git_backend::NO_GC_REF_NAMESPACE;
use crate::git_backend::{GitBackend, NO_GC_REF_NAMESPACE};
use crate::op_store::{BranchTarget, RefTarget, RefTargetOptionExt};
use crate::repo::{MutableRepo, Repo};
use crate::revset;
Expand Down Expand Up @@ -228,27 +228,30 @@ pub fn import_some_refs(

// Import new heads
let store = mut_repo.store();
// TODO: It might be better to obtain both git_repo and git_backend from
// mut_repo, and return error if the repo isn't backed by Git.
let git_backend = store.backend_impl().downcast_ref::<GitBackend>().unwrap();
let mut head_commits = Vec::new();
// TODO: Import commits in bulk (but we'll need to adjust error handling.)
let get_commit = |id| {
git_backend.import_head_commits([id])?;
store.get_commit(id)
};
if let Some(new_head_target) = &changed_git_head {
for id in new_head_target.added_ids() {
let commit = store
.get_commit(id)
.map_err(|err| GitImportError::MissingHeadTarget {
id: id.clone(),
err,
})?;
let commit = get_commit(id).map_err(|err| GitImportError::MissingHeadTarget {
id: id.clone(),
err,
})?;
head_commits.push(commit);
}
}
for (ref_name, (_, new_git_target)) in &changed_git_refs {
for id in new_git_target.added_ids() {
let commit =
store
.get_commit(id)
.map_err(|err| GitImportError::MissingRefAncestor {
ref_name: ref_name.to_string(),
err,
})?;
let commit = get_commit(id).map_err(|err| GitImportError::MissingRefAncestor {
ref_name: ref_name.to_string(),
err,
})?;
head_commits.push(commit);
}
}
Expand Down
113 changes: 75 additions & 38 deletions lib/src/git_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@

use std::any::Any;
use std::fmt::{Debug, Error, Formatter};
use std::fs;
use std::io::{Cursor, Read};
use std::ops::Deref;
use std::path::Path;
use std::sync::{Arc, Mutex, MutexGuard};
use std::{fs, slice};

use git2::Oid;
use itertools::Itertools;
Expand Down Expand Up @@ -200,6 +200,41 @@ impl GitBackend {
*self.cached_extra_metadata.lock().unwrap() = Some(table);
Ok(())
}

/// Imports the given commits and ancestors from the backing Git repo.
#[tracing::instrument(skip(self, head_ids))]
pub fn import_head_commits<'a>(
&self,
head_ids: impl IntoIterator<Item = &'a CommitId>,
) -> BackendResult<()> {
let table = self.cached_extra_metadata_table()?;
let mut missing_head_ids = head_ids
.into_iter()
.filter(|&id| *id != self.root_commit_id && table.get_value(id.as_bytes()).is_none())
.collect_vec();
if missing_head_ids.is_empty() {
return Ok(());
}

// These commits are imported from Git. Make our change ids persist (otherwise
// future write_commit() could reassign new change id.)
tracing::debug!(
heads_count = missing_head_ids.len(),
"import extra metadata entries"
);
let locked_repo = self.repo.lock().unwrap();
let (table, table_lock) = self.read_extra_metadata_table_locked()?;
let mut mut_table = table.start_mutation();
// Concurrent write_commit() might have updated the table before taking a lock.
missing_head_ids.retain(|&id| mut_table.get_value(id.as_bytes()).is_none());
import_extra_metadata_entries_from_heads(
&locked_repo,
&mut mut_table,
&table_lock,
&missing_head_ids,
)?;
self.save_extra_metadata_table(mut_table, &table_lock)
}
}

fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit {
Expand Down Expand Up @@ -373,17 +408,16 @@ fn import_extra_metadata_entries_from_heads(
git_repo: &git2::Repository,
mut_table: &mut MutableTable,
_table_lock: &FileLock,
head_ids: &[CommitId],
missing_head_ids: &[&CommitId],
) -> BackendResult<()> {
let mut work_ids = head_ids
.iter()
.filter(|id| mut_table.get_value(id.as_bytes()).is_none())
.cloned()
.collect_vec();
let mut work_ids = missing_head_ids.iter().map(|&id| id.clone()).collect_vec();
while let Some(id) = work_ids.pop() {
let git_commit = git_repo
.find_commit(validate_git_object_id(&id)?)
.map_err(|err| map_not_found_err(err, &id))?;
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
let commit = commit_from_git_without_root_parent(&git_commit);
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
work_ids.extend(
Expand Down Expand Up @@ -614,7 +648,6 @@ impl Backend for GitBackend {
Ok(ConflictId::from_bytes(oid.as_bytes()))
}

#[tracing::instrument(skip(self))]
fn read_commit(&self, id: &CommitId) -> BackendResult<Commit> {
if *id == self.root_commit_id {
return Ok(make_root_commit(
Expand All @@ -634,36 +667,15 @@ impl Backend for GitBackend {
};

let table = self.cached_extra_metadata_table()?;
if let Some(extras) = table.get_value(id.as_bytes()) {
deserialize_extras(&mut commit, extras);
} else {
let (table, table_lock) = self.read_extra_metadata_table_locked()?;
if let Some(extras) = table.get_value(id.as_bytes()) {
// Concurrent write_commit() would update extras before taking a lock.
deserialize_extras(&mut commit, extras);
*self.cached_extra_metadata.lock().unwrap() = Some(table);
} else {
// This commit is imported from Git. Make our change id persist (otherwise
// future write_commit() could reassign new change id.) It's likely that
// the commit is a branch head, so bulk-import metadata as much as possible.
tracing::debug!("import extra metadata entries");
let mut mut_table = table.start_mutation();
// TODO(#1624): Should we read the root tree here and check if it has a
// `.jjconflict-...` entries? That could happen if the user used `git` to e.g.
// change the description of a commit with tree-level conflicts.
mut_table.add_entry(id.to_bytes(), serialize_extras(&commit));
if commit.parents != slice::from_ref(&self.root_commit_id) {
import_extra_metadata_entries_from_heads(
&locked_repo,
&mut mut_table,
&table_lock,
&commit.parents,
)?;
}
self.save_extra_metadata_table(mut_table, &table_lock)?;
}
}

let extras =
table
.get_value(id.as_bytes())
.ok_or_else(|| BackendError::ObjectNotFound {
object_type: id.object_type(),
hash: id.hex(),
source: "Not imported from Git".into(),
})?;
deserialize_extras(&mut commit, extras);
Ok(commit)
}

Expand Down Expand Up @@ -914,7 +926,24 @@ mod tests {
// Check that the git commit above got the hash we expect
assert_eq!(git_commit_id.as_bytes(), commit_id.as_bytes());

// Add an empty commit on top
let git_commit_id2 = git_repo
.commit(
None,
&git_author,
&git_committer,
"git commit message 2",
&git_tree,
&[&git_repo.find_commit(git_commit_id).unwrap()],
)
.unwrap();
let commit_id2 = CommitId::from_bytes(git_commit_id2.as_bytes());

let store = GitBackend::init_external(store_path, &git_repo_path).unwrap();

// Import the head commit and its ancestors
store.import_head_commits([&commit_id2]).unwrap();

let commit = store.read_commit(&commit_id).unwrap();
assert_eq!(&commit.change_id, &change_id);
assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]);
Expand Down Expand Up @@ -977,6 +1006,14 @@ mod tests {
symlink.value(),
&TreeValue::Symlink(SymlinkId::from_bytes(blob2.as_bytes()))
);

let commit2 = store.read_commit(&commit_id2).unwrap();
assert_eq!(commit2.parents, vec![commit_id.clone()]);
assert_eq!(commit.predecessors, vec![]);
assert_eq!(
commit.root_tree,
MergedTreeId::Legacy(TreeId::from_bytes(root_tree_id.as_bytes()))
);
}

#[test]
Expand Down
16 changes: 12 additions & 4 deletions lib/tests/test_git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ fn git_id(commit: &Commit) -> Oid {
Oid::from_bytes(commit.id().as_bytes()).unwrap()
}

fn get_git_repo(repo: &Arc<ReadonlyRepo>) -> git2::Repository {
fn get_git_backend(repo: &Arc<ReadonlyRepo>) -> &GitBackend {
repo.store()
.backend_impl()
.downcast_ref::<GitBackend>()
.unwrap()
.git_repo_clone()
}

fn get_git_repo(repo: &Arc<ReadonlyRepo>) -> git2::Repository {
get_git_backend(repo).git_repo_clone()
}

#[test]
Expand Down Expand Up @@ -1851,6 +1854,9 @@ fn set_up_push_repos(settings: &UserSettings, temp_dir: &TempDir) -> PushTestSet
ReadonlyRepo::default_submodule_store_factory(),
)
.unwrap();
get_git_backend(&jj_repo)
.import_head_commits(&[jj_id(&initial_git_commit)])
.unwrap();
let mut tx = jj_repo.start_transaction(settings, "test");
let new_commit = create_random_commit(tx.mut_repo(), settings)
.set_parents(vec![jj_id(&initial_git_commit)])
Expand Down Expand Up @@ -2278,13 +2284,15 @@ fn test_concurrent_read_write_commit() {
barrier.wait();
while !pending_commit_ids.is_empty() {
repo = repo.reload_at_head(settings).unwrap();
let git_backend = get_git_backend(&repo);
let mut tx = repo.start_transaction(settings, &format!("reader {i}"));
pending_commit_ids = pending_commit_ids
.into_iter()
.filter_map(|commit_id| {
match repo.store().get_commit(&commit_id) {
Ok(commit) => {
match git_backend.import_head_commits([&commit_id]) {
Ok(()) => {
// update index as git::import_refs() would do
let commit = repo.store().get_commit(&commit_id).unwrap();
tx.mut_repo().add_head(&commit);
None
}
Expand Down

0 comments on commit b726657

Please sign in to comment.