From 5f4e36aff41d0824453bcd3ce62ea401b95f9755 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Fri, 1 Dec 2023 14:00:22 -0800 Subject: [PATCH] gc: implement basic GC for Git backend This adds an initial `jj util gc` command, which simply calls `git gc` when using the Git backend. That should already be useful in non-colocated repos because it's not obvious how to GC (repack) such repos. In my own jj repo, it shrunk `.jj/repo/store/` from 2.4 GiB to 780 MiB, and `jj log --ignore-working-copy` was sped up from 157 ms to 86 ms. I haven't added any tests because the functionality depends on having `git` binary on the PATH, which we don't yet depend on anywhere else. I think we'll still be able to test much of the future parts of garbage collection without a `git` binary because the interesting parts are about manipulating the Git repo before calling `git gc` on it. --- CHANGELOG.md | 4 ++++ cli/examples/custom-backend/main.rs | 4 ++++ cli/src/commands/util.rs | 13 ++++++++++++- docs/git-compatibility.md | 4 ++-- lib/src/backend.rs | 4 ++++ lib/src/git_backend.rs | 16 ++++++++++++++++ lib/src/local_backend.rs | 4 ++++ lib/src/store.rs | 4 ++++ lib/testutils/src/test_backend.rs | 4 ++++ 9 files changed, 54 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e73d6966af..4fd8c0d327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * `jj rebase` now takes the flag `--skip-empty`, which doesn't copy over commits that would become empty after a rebase. +* There is a new `jj util gc` command for cleaning up the repository storage. + For now, it simply runs `git gc` on the backing Git repo (when using the Git + backend). + ### Fixed bugs diff --git a/cli/examples/custom-backend/main.rs b/cli/examples/custom-backend/main.rs index aee6b603b8..438d7d16e4 100644 --- a/cli/examples/custom-backend/main.rs +++ b/cli/examples/custom-backend/main.rs @@ -170,4 +170,8 @@ impl Backend for JitBackend { ) -> BackendResult<(CommitId, Commit)> { self.inner.write_commit(contents, sign_with) } + + fn gc(&self) -> Result<(), Box> { + self.inner.gc() + } } diff --git a/cli/src/commands/util.rs b/cli/src/commands/util.rs index 112606b4b4..29f31fd7d4 100644 --- a/cli/src/commands/util.rs +++ b/cli/src/commands/util.rs @@ -15,15 +15,17 @@ use std::io::Write; use clap::Subcommand; +use jj_lib::repo::Repo; use tracing::instrument; -use crate::cli_util::{CommandError, CommandHelper}; +use crate::cli_util::{user_error, CommandError, CommandHelper}; use crate::ui::Ui; /// Infrequently used commands such as for generating shell completions #[derive(Subcommand, Clone, Debug)] pub(crate) enum UtilCommand { Completion(UtilCompletionArgs), + Gc(UtilGcArgs), Mangen(UtilMangenArgs), ConfigSchema(UtilConfigSchemaArgs), } @@ -56,6 +58,10 @@ pub(crate) struct UtilCompletionArgs { zsh: bool, } +/// Run backend-dependent garbage collection. +#[derive(clap::Args, Clone, Debug)] +pub(crate) struct UtilGcArgs {} + /// Print a ROFF (manpage) #[derive(clap::Args, Clone, Debug)] pub(crate) struct UtilMangenArgs {} @@ -84,6 +90,11 @@ pub(crate) fn cmd_util( clap_complete::generate(shell, &mut app, "jj", &mut buf); ui.stdout_formatter().write_all(&buf)?; } + UtilCommand::Gc(_gc_args) => { + let workspace_command = command.workspace_helper(ui)?; + let backend = workspace_command.repo().store().backend(); + backend.gc().map_err(|err| user_error(err.to_string()))?; + } UtilCommand::Mangen(_mangen_args) => { let mut buf = vec![]; let man = clap_mangen::Man::new(command.app().clone()); diff --git a/docs/git-compatibility.md b/docs/git-compatibility.md index 4a33c2513e..51375bb6cd 100644 --- a/docs/git-compatibility.md +++ b/docs/git-compatibility.md @@ -130,8 +130,8 @@ repos may require you to deal with more involved Jujutsu and Git concepts. * In co-located repos with a very large number of branches or other refs, `jj` commands can get noticeably slower because of the automatic `jj git import` - executed on each command. This can be mitigated by occasionally running `git - pack-refs --all` to speed up the import. + executed on each command. This can be mitigated by occasionally running `jj util + gc` to speed up the import (that command includes packing the Git refs). * Git tools will have trouble with revisions that contain conflicted files. While `jj` renders these files with conflict markers in the working copy, they are diff --git a/lib/src/backend.rs b/lib/src/backend.rs index a81c183eaf..f0b2bf8cd4 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -533,4 +533,8 @@ pub trait Backend: Send + Sync + Debug { contents: Commit, sign_with: Option<&mut SigningFn>, ) -> BackendResult<(CommitId, Commit)>; + + /// Perform garbage collection. + // TODO: pass in the set of commits to keep here + fn gc(&self) -> Result<(), Box>; } diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index e0126a93a8..24ba5ef800 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -94,6 +94,12 @@ impl From for BackendError { } } +#[derive(Debug, Error)] +pub enum GitGcError { + #[error("Failed to run git gc command: {0}")] + GcCommand(std::io::Error), +} + pub struct GitBackend { // While gix::Repository can be created from gix::ThreadSafeRepository, it's // cheaper to cache the thread-local instance behind a mutex than creating @@ -1007,6 +1013,16 @@ impl Backend for GitBackend { self.save_extra_metadata_table(mut_table, &table_lock)?; Ok((id, contents)) } + + fn gc(&self) -> Result<(), Box> { + let mut git = std::process::Command::new("git"); + git.env("GIT_DIR", self.git_repo_path()); + git.args(["gc"]); + // TODO: pass output to UI layer instead of printing directly here + let mut git_gc = git.spawn().map_err(GitGcError::GcCommand)?; + git_gc.wait()?; + Ok(()) + } } /// Write a tree conflict as a special tree with `.jjconflict-base-N` and diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index 09574b5924..c704d23e44 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -297,6 +297,10 @@ impl Backend for LocalBackend { .map_err(to_other_err)?; Ok((id, commit)) } + + fn gc(&self) -> Result<(), Box> { + Ok(()) + } } pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit { diff --git a/lib/src/store.rs b/lib/src/store.rs index f4810bd364..01840bb445 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -67,6 +67,10 @@ impl Store { }) } + pub fn backend(&self) -> &dyn Backend { + self.backend.as_ref() + } + pub fn backend_impl(&self) -> &dyn Any { self.backend.as_any() } diff --git a/lib/testutils/src/test_backend.rs b/lib/testutils/src/test_backend.rs index 59ab5037dc..c0657194e2 100644 --- a/lib/testutils/src/test_backend.rs +++ b/lib/testutils/src/test_backend.rs @@ -292,4 +292,8 @@ impl Backend for TestBackend { .insert(id.clone(), contents.clone()); Ok((id, contents)) } + + fn gc(&self) -> Result<(), Box> { + Ok(()) + } }