From 90ae2d0b0f693c70c1c84da535834a463b7f91c0 Mon Sep 17 00:00:00 2001 From: Emily Date: Fri, 20 Sep 2024 09:22:58 +0100 Subject: [PATCH] mailmap: add support for `.mailmap` files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These live in the repository and map email addresses and names to canonical ones, as described in [`gitmailmap(5)`]. [`gitmailmap(5)`]: https://git-scm.com/docs/gitmailmap We introduce the notion of raw signatures, representing the name and email baked in to an object’s metadata, and canonical signatures, representing the up‐to‐date identity information obtained by looking up a raw signature in a `.mailmap` file. When there are no matching entries, the raw and canonical signatures are the same. Canonical signatures should be used for the majority of purposes, such as display and querying in user interfaces and automated batch processing (e.g., to collate statistics by commit author, or email committers in batch). Generally speaking, whenever you care about *who made the commit* rather than *what data happens to be encoded in the commit itself*, they are the appropriate thing to work with. Raw signatures should usually not be surfaced to users by default unless explicitly asked for. Valid reasons to work with them include low‐level processing of commits where a `.mailmap` is not accessible or would be inappropriate to use (e.g., rewriting or duplication of commits without intent to alter metadata), automated testing, forensics (examining the raw object data stored in the backend that is used to compute a commit’s cryptographic hash), and analysis and debugging of `.mailmap` files themselves. In this model, you can think of raw signatures as being like database keys for a table mapping them to canonical signatures. In an ideal world, these keys would be opaque synthetic keys with no human meaning that are only surfaced when poking into internals; the idea is to treat them as such to the greatest extent possible given the realities of the current object model. The only signatures Jujutsu currently processes are commit authors and committers, which can be obtained in raw and canonical form with `Commit::{author,committer}_raw` and `Mailmap::{author,committer}` respectively. If Jujutsu starts to store or process immutable identity data in other contexts (e.g. support for additional metadata on commits like Git’s `Co-authored-by`/`Signed-off-by`/`Reviewed-by` trailers, or detached metadata that nonetheless must remain immutable), then the notion of raw and canonical signatures will carry over to those and the same guidelines about preferring to work with and display canonical signatures whenever reasonable will apply. This is not meant to be a comprehensive solution to identity management or obsolete the discussion in #2957. There are many possible designs of forward‐thinking author and committer identity systems that would be a lot better than `.mailmap` files, but I don’t really want to get lost in the weeds trying to solve an open research problem here. Instead, this is just an acknowledgement that any system that treats user names and emails as immutable (as Jujutsu currently does) is going to need a mapping layer to keep them updated, and both Git and Mercurial adopted `.mailmap` files, meaning they are already in wide use to address this problem. All sufficiently large open source repositories tend to grow a substantial `.mailmap` file, e.g. [Linux], [Rust], [curl], [Mesa], [Node.js], and [Git] itself. Currently, people working on these repositories with Jujutsu see and search outdated and inconsistent authorship information that contradicts what Git queries and outputs, which is at the very least somewhere between confusing and unhelpful. Even if we had a perfect orthogonal solution in the native backend, as long as we support working on Git repositories it’s a compatibility‐relevant feature. [Linux]: https://github.com/torvalds/linux/blob/f2661062f16b2de5d7b6a5c42a9a5c96326b8454/.mailmap [Rust]: https://github.com/rust-lang/rust/blob/2c243d957008f5909f7a4af19e486ea8a3814be7/.mailmap [curl]: https://github.com/curl/curl/blob/a7ec6a76abf5e29fb3f951a09d429ce5fbff250f/.mailmap [Mesa]: https://gitlab.freedesktop.org/mesa/mesa/-/blob/cdf3228f88361410175c338704908ea74dc7b8ae/.mailmap [Node.js]: https://github.com/nodejs/node/blob/4c730aed7f825af1691740663d599e9de5958f89/.mailmap [Git]: https://github.com/git/git/blob/9005149a4a77e2d3409c6127bf4fd1a0893c3495/.mailmap That said, this is not exclusive to the Git backend. The `.mailmap` name and format is perfectly generic, already shared between Git and Mercurial, and applies to all systems that bake names and emails into commits, including the current local backend. The code uses Gitoxide, but only as a convenient implementation of the file format; in a hypothetical world where the Git backend was removed without Jujutsu changing its notion of commit signatures, `gix-mailmap` could be used standalone, or replaced with a bespoke implementation. I discussed this on the Discord server and we seemed to arrive at a consensus that this would be a good feature to have for Git compatibility and as a pragmatic stop‐gap measure for the larger identity management problem, and that I should have a crack at implementing it to see how complex it would be. Happily, it turned out to be pretty simple! No major plumbing of state is required as the users of the template and revset engines already have the working copy commit close to hand to support displaying and matching `@`; I think this should be more lightweight (but admittedly less powerful) than the commit rewriting approach @arxanas floated on Discord. ## Notes on various design decisions * The `.mailmap` file is read from the working copy commit of the current workspace. This is roughly equivalent to Git reading from `$GIT_WORK_TREE/.mailmap`, or `HEAD:.mailmap` in bare repositories, and seems like the best fit for Jujutsu’s model. I briefly looked into reading it from the actual on‐disk working copy, but it seemed a lot more complicated and I’m not sure if there’s any point. I didn’t add support for Git’s `mailmap.file` and `mailmap.blob` configuration options; unlike ignores, I don’t think I’ve ever seen this feature used other than directly in a repository, and `mailmap.blob` seems to mostly be there to keep it working in bare repositories. I can imagine something like a managed corporate multi‐repo environment with a globally‐shared `mailmap.file` so if people feel like this is important to keep consistency with I can look into implementing it. But genuinely I’ve never personally seen anybody use this. * The `author`/`committer` DSL functions respect the `.mailmap`, with `*_raw` variants to ignore it. If there’s a `.mailmap` available, signatures should be mapped through it unless there’s a specific reason not to; this matches Git’s behaviour and is the main thing that makes this feature worthwhile. There is a corresponding breaking change of the external Rust API, but hopefully the new method name and documentation will nudge people towards doing the right thing. I was initially considering a keyword argument to the template and revset functions to specify whether to map or not (and even implemented keyword arguments for template functions), but I decided it was probably overkill and settled on the current separate functions. A suggestion from Discord was to add a method on signatures to the template language, e.g. `.canonical()` or `.mailmap()`. While this seems elegant to me, I still wanted the short, simple construction to be right by default, and I couldn’t think of any immediate uses outside of `.author()` and `.committer()`. If this is added later, we will still get the elegant property that `commit.{author,committer}()` is short for `commit.{author,committer}_raw().canonical()`. * The mapping to canonical signatures is one‐way, and queries only match on the canonical form. This is the same behaviour as Git. The alternative would be to consider the mapped signatures as an equivalence set and allow a query for any member to match all of them, but this would contradict what is actually displayed for the commits, violate the principles about surfacing raw signatures detailed above, and the `*_raw` functions may be more useful in such a case anyway. * There’s currently no real caching or optimization here. The `.mailmap` file is materialized and parsed whenever a template or revset context is initialized (although it’s still O(1), not parsing it for every processed commit), and `gix-mailmap` does a binary search to resolve signatures. I couldn’t measure any kind of substantial performance hit here, maybe 1‐3% percent on some `jj log` microbenchmarks, but it could just be noise; a couple times it was actually faster. --- CHANGELOG.md | 3 + Cargo.lock | 18 +++- Cargo.toml | 4 + cli/src/cli_util.rs | 9 ++ cli/src/commit_templater.rs | 21 +++- cli/tests/runner.rs | 1 + cli/tests/test_mailmap.rs | 168 ++++++++++++++++++++++++++++++++ cli/tests/test_revset_output.rs | 2 +- docs/revsets.md | 6 ++ docs/templates.md | 4 + lib/Cargo.toml | 2 + lib/src/commit.rs | 14 +++ lib/src/lib.rs | 1 + lib/src/mailmap.rs | 165 +++++++++++++++++++++++++++++++ lib/src/revset.rs | 158 ++++++++++++++++++++++++++---- lib/src/str_util.rs | 10 ++ lib/tests/test_revset.rs | 72 +++++++++++++- 17 files changed, 631 insertions(+), 27 deletions(-) create mode 100644 cli/tests/test_mailmap.rs create mode 100644 lib/src/mailmap.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index ace2489aa87..4c9bfedb238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). address unconditionally. Only ASCII case folding is currently implemented, but this will likely change in the future. +* Support for [`.mailmap`](https://git-scm.com/docs/gitmailmap) files has + been added. + ### Fixed bugs ## [0.19.0] - 2024-07-03 diff --git a/Cargo.lock b/Cargo.lock index 4ce37f87588..5bf39487bd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1138,9 +1138,9 @@ dependencies = [ [[package]] name = "gix-date" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "367ee9093b0c2b04fd04c5c7c8b6a1082713534eab537597ae343663a518fa99" +checksum = "9eed6931f21491ee0aeb922751bd7ec97b4b2fe8fbfedcb678e2a2dce5f3b8c0" dependencies = [ "bstr", "itoa", @@ -1334,6 +1334,18 @@ dependencies = [ "syn", ] +[[package]] +name = "gix-mailmap" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cb2da346958252cbc8656529f5479830a3bc6046f3d86405c9e77f71dfdf7b2" +dependencies = [ + "bstr", + "gix-actor", + "gix-date", + "thiserror", +] + [[package]] name = "gix-object" version = "0.42.2" @@ -1916,7 +1928,9 @@ dependencies = [ "futures 0.3.30", "git2", "gix", + "gix-actor", "gix-filter", + "gix-mailmap", "glob", "hex", "ignore", diff --git a/Cargo.toml b/Cargo.toml index 27ad79caa14..ee25360a235 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,10 @@ gix = { version = "0.63.0", default-features = false, features = [ "blob-diff", ] } gix-filter = "0.11.2" +# We list `gix-{actor,mailmap}` separately, as they are used by +# `jj_lib::mailmap` even when the Git backend is disabled. +gix-actor = { version = "0.31.3" } +gix-mailmap = { version = "0.23.4" } glob = "0.3.1" hex = "0.4.3" ignore = "0.4.20" diff --git a/cli/src/cli_util.rs b/cli/src/cli_util.rs index 08f54cd23e2..8c12ed1948d 100644 --- a/cli/src/cli_util.rs +++ b/cli/src/cli_util.rs @@ -41,6 +41,7 @@ use jj_lib::git_backend::GitBackend; use jj_lib::gitignore::{GitIgnoreError, GitIgnoreFile}; use jj_lib::hex_util::to_reverse_hex; use jj_lib::id_prefix::IdPrefixContext; +use jj_lib::mailmap::{read_current_mailmap, Mailmap}; use jj_lib::matchers::Matcher; use jj_lib::merge::MergedTreeValue; use jj_lib::merged_tree::MergedTree; @@ -73,6 +74,7 @@ use jj_lib::workspace::{ }; use jj_lib::{dag_walk, fileset, git, op_heads_store, op_walk, revset}; use once_cell::unsync::OnceCell; +use pollster::FutureExt; use tracing::instrument; use tracing_chrome::ChromeLayerBuilder; use tracing_subscriber::prelude::*; @@ -712,6 +714,11 @@ impl WorkspaceCommandHelper { self.repo().view().get_wc_commit_id(self.workspace_id()) } + pub fn current_mailmap(&self) -> Result { + // TODO: Consider figuring out a caching strategy for this. + Ok(read_current_mailmap(self.repo().as_ref(), self.workspace.workspace_id()).block_on()?) + } + pub fn working_copy_shared_with_git(&self) -> bool { self.working_copy_shared_with_git } @@ -996,6 +1003,8 @@ impl WorkspaceCommandHelper { self.settings.user_email(), &self.revset_extensions, Some(workspace_context), + // TODO: Consider handling errors here. + Rc::new(self.current_mailmap().unwrap_or_default()), ) } diff --git a/cli/src/commit_templater.rs b/cli/src/commit_templater.rs index 4989c7ac3dd..b9d9a9ef9df 100644 --- a/cli/src/commit_templater.rs +++ b/cli/src/commit_templater.rs @@ -468,8 +468,14 @@ fn builtin_commit_methods<'repo>() -> CommitTemplateBuildMethodFnMap<'repo, Comm Ok(L::wrap_commit_list(out_property)) }, ); + map.insert("author", |language, _build_ctx, self_property, function| { + function.expect_no_arguments()?; + let mailmap = language.revset_parse_context.mailmap().clone(); + let out_property = self_property.map(move |commit| mailmap.author(&commit)); + Ok(L::wrap_signature(out_property)) + }); map.insert( - "author", + "author_raw", |_language, _build_ctx, self_property, function| { function.expect_no_arguments()?; let out_property = self_property.map(|commit| commit.author_raw().clone()); @@ -478,6 +484,15 @@ fn builtin_commit_methods<'repo>() -> CommitTemplateBuildMethodFnMap<'repo, Comm ); map.insert( "committer", + |language, _build_ctx, self_property, function| { + function.expect_no_arguments()?; + let mailmap = language.revset_parse_context.mailmap().clone(); + let out_property = self_property.map(move |commit| mailmap.committer(&commit)); + Ok(L::wrap_signature(out_property)) + }, + ); + map.insert( + "committer_raw", |_language, _build_ctx, self_property, function| { function.expect_no_arguments()?; let out_property = self_property.map(|commit| commit.committer_raw().clone()); @@ -486,8 +501,10 @@ fn builtin_commit_methods<'repo>() -> CommitTemplateBuildMethodFnMap<'repo, Comm ); map.insert("mine", |language, _build_ctx, self_property, function| { function.expect_no_arguments()?; + let mailmap = language.revset_parse_context.mailmap().clone(); let user_email = language.revset_parse_context.user_email().to_owned(); - let out_property = self_property.map(move |commit| commit.author_raw().email == user_email); + let out_property = + self_property.map(move |commit| mailmap.author(&commit).email == user_email); Ok(L::wrap_boolean(out_property)) }); map.insert( diff --git a/cli/tests/runner.rs b/cli/tests/runner.rs index 0a05d943f37..2fdbc959c8a 100644 --- a/cli/tests/runner.rs +++ b/cli/tests/runner.rs @@ -45,6 +45,7 @@ mod test_immutable_commits; mod test_init_command; mod test_interdiff_command; mod test_log_command; +mod test_mailmap; mod test_move_command; mod test_new_command; mod test_next_prev_commands; diff --git a/cli/tests/test_mailmap.rs b/cli/tests/test_mailmap.rs new file mode 100644 index 00000000000..242da5799aa --- /dev/null +++ b/cli/tests/test_mailmap.rs @@ -0,0 +1,168 @@ +// Copyright 2024 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::common::{get_stdout_string, TestEnvironment}; + +#[test] +fn test_mailmap() { + let test_env = TestEnvironment::default(); + test_env.jj_cmd_ok(test_env.env_root(), &["git", "init", "repo"]); + let repo_path = test_env.env_root().join("repo"); + + let mut mailmap = String::new(); + let mailmap_path = repo_path.join(".mailmap"); + let mut append_mailmap = move |extra| { + mailmap.push_str(extra); + std::fs::write(&mailmap_path, &mailmap).unwrap() + }; + + let run_as = |name: &str, email: &str, args: &[&str]| { + test_env + .jj_cmd(&repo_path, args) + .env("JJ_USER", name) + .env("JJ_EMAIL", email) + .assert() + .success() + }; + + append_mailmap("# test comment\n"); + + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author"]); + insta::assert_snapshot!(stdout, @r###" + @ Test User + ◉ + "###); + + // Map an email address without any name change. + run_as("Test Üser", "TeSt.UsEr@ExAmPlE.cOm", &["new"]); + append_mailmap(" \n"); + + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author"]); + insta::assert_snapshot!(stdout, @r###" + @ Test Üser + ◉ Test User + ◉ + "###); + + // Map an email address to a new name. + run_as("West User", "xest.user@example.com", &["new"]); + append_mailmap("Fest User \n"); + + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author"]); + insta::assert_snapshot!(stdout, @r###" + @ Fest User + ◉ Test Üser + ◉ Test User + ◉ + "###); + + // Map an email address to a new name and email address. + run_as("Pest User", "pest.user@example.com", &["new"]); + append_mailmap("Best User \n"); + + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author"]); + insta::assert_snapshot!(stdout, @r###" + @ Best User + ◉ Fest User + ◉ Test Üser + ◉ Test User + ◉ + "###); + + // Map an ambiguous email address using names for disambiguation. + run_as("Rest User", "user@test", &["new"]); + run_as("Vest User", "user@test", &["new"]); + append_mailmap( + &[ + "Jest User ReSt UsEr \n", + "Zest User vEsT uSeR \n", + ] + .concat(), + ); + + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author"]); + insta::assert_snapshot!(stdout, @r###" + @ Zest User + ◉ Jest User + ◉ Best User + ◉ Fest User + ◉ Test Üser + ◉ Test User + ◉ + "###); + + // The `.mailmap` file in the current workspace’s @ commit should be used. + let stdout = test_env.jj_cmd_success(&repo_path, &["log", "-T", "author", "--at-operation=@-"]); + insta::assert_snapshot!(stdout, @r###" + @ Vest User + ◉ Rest User + ◉ Best User + ◉ Fest User + ◉ Test Üser + ◉ Test User + ◉ + "###); + + // The `author(pattern)` revset function should find mapped committers. + let stdout = test_env.jj_cmd_success( + &repo_path, + &["log", "-T", "author", "-r", "author(substring-i:bEsT)"], + ); + insta::assert_snapshot!(stdout, @r###" + ◉ Best User + │ + ~ + "###); + + // The `author(pattern)` revset function should only search the mapped form. + // This matches Git’s behaviour and the principle of not surfacing raw + // signatures by default. + let stdout = + test_env.jj_cmd_success(&repo_path, &["log", "-T", "author", "-r", "author(pest)"]); + insta::assert_snapshot!(stdout, @r###" + "###); + + // The `author_raw(pattern)` revset function should search the unmapped + // commit data. + let stdout = test_env.jj_cmd_success( + &repo_path, + &["log", "-T", "author", "-r", "author_raw(\"user@test\")"], + ); + insta::assert_snapshot!(stdout, @r###" + @ Zest User + ◉ Jest User + │ + ~ + "###); + + // `mine()` should find commits that map to the current `user.email`. + let assert = run_as( + "Tëst Üser", + "tEsT.uSeR@eXaMpLe.NeT", + &["log", "-T", "author", "-r", "mine()"], + ); + insta::assert_snapshot!(get_stdout_string(&assert), @r###" + ◉ Test Üser + ◉ Test User + │ + ~ + "###); + + // `mine()` should only search the mapped author; this may be confusing in this + // case, but matches the semantics of it expanding to `author(‹user.email›)`. + let stdout: String = + test_env.jj_cmd_success(&repo_path, &["log", "-T", "author", "-r", "mine()"]); + insta::assert_snapshot!(stdout, @r###" + "###); +} diff --git a/cli/tests/test_revset_output.rs b/cli/tests/test_revset_output.rs index 766f84f6ef0..e8aca9aa371 100644 --- a/cli/tests/test_revset_output.rs +++ b/cli/tests/test_revset_output.rs @@ -290,7 +290,7 @@ fn test_function_name_hint() { | ^-----^ | = Function "author_" doesn't exist - Hint: Did you mean "author", "my_author"? + Hint: Did you mean "author", "author_raw", "my_author"? "###); insta::assert_snapshot!(evaluate_err("my_branches"), @r###" diff --git a/docs/revsets.md b/docs/revsets.md index 8bebbac8c96..6f746aa2779 100644 --- a/docs/revsets.md +++ b/docs/revsets.md @@ -251,12 +251,18 @@ revsets (expressions) as arguments. * `author(pattern)`: Commits with the author's name or email matching the given [string pattern](#string-patterns). +* `author_raw(pattern)`: Like `author(pattern)`, but ignoring any mappings in + the [`.mailmap` file](https://git-scm.com/docs/gitmailmap). + * `mine()`: Commits where the author's email matches the email of the current user. * `committer(pattern)`: Commits with the committer's name or email matching the given [string pattern](#string-patterns). +* `committer_raw(pattern)`: Like `committer(pattern)`, but ignoring any + mappings in the [`.mailmap` file](https://git-scm.com/docs/gitmailmap). + * `empty()`: Commits modifying no files. This also includes `merges()` without user modifications and `root()`. diff --git a/docs/templates.md b/docs/templates.md index 6317925514d..5b4e4415385 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -73,7 +73,11 @@ This type cannot be printed. The following methods are defined. * `commit_id() -> CommitId` * `parents() -> List` * `author() -> Signature` +* `author_raw() -> Signature`: Like `author()`, but ignoring any mappings in + the [`.mailmap` file](https://git-scm.com/docs/gitmailmap). * `committer() -> Signature` +* `committer_raw() -> Signature`: Like `committer()`, but ignoring any mappings + in the [`.mailmap` file](https://git-scm.com/docs/gitmailmap). * `mine() -> Boolean`: Commits where the author's email matches the email of the current user. * `working_copies() -> String`: For multi-workspace repository, indicate diff --git a/lib/Cargo.toml b/lib/Cargo.toml index a577d1d17bf..e836f0ae3b4 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -45,6 +45,8 @@ futures = { workspace = true } git2 = { workspace = true, optional = true } gix = { workspace = true, optional = true } gix-filter = { workspace = true, optional = true } +gix-actor = { workspace = true } +gix-mailmap = { workspace = true } glob = { workspace = true } hex = { workspace = true } ignore = { workspace = true } diff --git a/lib/src/commit.rs b/lib/src/commit.rs index 30ce536524b..00e3226b15c 100644 --- a/lib/src/commit.rs +++ b/lib/src/commit.rs @@ -146,11 +146,25 @@ impl Commit { } /// Returns the raw author signature from the commit data. + /// + /// **Note:** You usually **should not** directly process or display this + /// information before canonicalizing it. Prefer + /// [`Mailmap::author`][`crate::mailmap::Mailmap::author`] unless you + /// care specficially about the potentially‐outdated immutable commit data, + /// or are performing low‐level operations in a context that can’t obtain a + /// [`Mailmap`][`crate::mailmap::Mailmap`]. pub fn author_raw(&self) -> &Signature { &self.data.author } /// Returns the raw committer signature from the commit data. + /// + /// **Note:** You usually **should not** directly process or display this + /// information before canonicalizing it. Prefer + /// [`Mailmap::committer`][`crate::mailmap::Mailmap::committer`] unless you + /// care specficially about the potentially‐outdated immutable commit + /// data, or are performing low‐level operations in a context that can’t + /// obtain a [`Mailmap`][`crate::mailmap::Mailmap`]. pub fn committer_raw(&self) -> &Signature { &self.data.committer } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 149956ad2ba..160b9fb4dbf 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -57,6 +57,7 @@ pub mod index; pub mod local_backend; pub mod local_working_copy; pub mod lock; +pub mod mailmap; pub mod matchers; pub mod merge; pub mod merged_tree; diff --git a/lib/src/mailmap.rs b/lib/src/mailmap.rs new file mode 100644 index 00000000000..ea6a377aee9 --- /dev/null +++ b/lib/src/mailmap.rs @@ -0,0 +1,165 @@ +// Copyright 2024 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Support for `.mailmap` files. + +use std::borrow::Cow; +use std::fmt::Debug; +use std::io::{self, Read}; + +use crate::backend::{BackendError, Signature}; +use crate::commit::Commit; +use crate::conflicts::{materialize_tree_value, MaterializedTreeValue}; +use crate::op_store::WorkspaceId; +use crate::repo::Repo; +use crate::repo_path::RepoPath; + +/// Models a `.mailmap` file, mapping email addresses and names to +/// canonical ones. +/// +/// The syntax and semantics are as described in +/// [`gitmailmap(5)`](https://git-scm.com/docs/gitmailmap). +/// +/// You can obtain the currently‐applicable [`Mailmap`] using +/// [`read_current_mailmap`]. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct Mailmap(gix_mailmap::Snapshot); + +/// Models a single replacement in a `.mailmap` file, containing an old email +/// and optionally an old name, and a new name and/or email to map them to. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Entry<'a> { + old_email: Cow<'a, str>, + old_name: Option>, + new_email: Option>, + new_name: Option>, +} + +impl Mailmap { + /// Constructs a new, empty `Mailmap`. + /// + /// Equivalent to [`Default::default()`], but makes the intent more + /// explicit. + /// + /// An empty `Mailmap` does not use any allocations, and an absent + /// `.mailmap` file is semantically equivalent to an empty one, so there is + /// usually no need to use `Option`. + pub fn empty() -> Self { + Default::default() + } + + /// Parses a `.mailmap` file, ignoring parse errors. + pub fn from_bytes(bytes: &[u8]) -> Self { + Self(gix_mailmap::Snapshot::from_bytes(bytes)) + } + + /// Reads and parses a `.mailmap` file from a reader, ignoring parse errors. + pub fn from_reader(reader: &mut R) -> io::Result { + let mut bytes = Vec::new(); + reader.read_to_end(&mut bytes)?; + Ok(Self::from_bytes(&bytes)) + } + + /// Returns the canonical signature corresponding to `raw_signature`. + /// The timestamp is left untouched. Signatures with no corresponding entry + /// are returned as‐is. + pub fn resolve(&self, raw_signature: &Signature) -> Signature { + let result = self.0.try_resolve(gix_actor::SignatureRef { + name: raw_signature.name.as_bytes().into(), + email: raw_signature.email.as_bytes().into(), + time: Default::default(), + }); + match result { + Some(canonical_signature) => Signature { + name: String::from_utf8_lossy(&canonical_signature.name).into_owned(), + email: String::from_utf8_lossy(&canonical_signature.email).into_owned(), + timestamp: raw_signature.timestamp.clone(), + }, + None => raw_signature.clone(), + } + } + + /// Returns the canonical author signature of `commit`. + pub fn author(&self, commit: &Commit) -> Signature { + self.resolve(commit.author_raw()) + } + + /// Returns the canonical committer signature of `commit`. + pub fn committer(&self, commit: &Commit) -> Signature { + self.resolve(commit.committer_raw()) + } + + /// Returns an iterator over the entries. + pub fn iter(&self) -> impl Iterator { + self.0.iter().map(|entry| Entry { + old_email: String::from_utf8_lossy(entry.old_email()), + old_name: entry.old_name().map(|s| String::from_utf8_lossy(s)), + new_email: entry.new_email().map(|s| String::from_utf8_lossy(s)), + new_name: entry.new_name().map(|s| String::from_utf8_lossy(s)), + }) + } +} + +impl<'a> Entry<'a> { + /// Returns the old email address to match against. + pub fn old_email(&self) -> &str { + &self.old_email + } + + /// Returns the old name to match against, if present. + pub fn old_name(&self) -> Option<&str> { + self.old_name.as_deref() + } + + /// Returns the canonical replacement email, if present. + pub fn new_email(&self) -> Option<&str> { + self.new_email.as_deref() + } + + /// Returns the canonical replacement name, if present. + pub fn new_name(&self) -> Option<&str> { + self.new_name.as_deref() + } +} + +/// Reads and parses the `.mailmap` file from the working‐copy commit of the +/// specified workspace. An absent `.mailmap` is treated the same way as an +/// empty file. Parse errors when reading the file are ignored, but the rest of +/// the file will still be processed. +pub async fn read_current_mailmap( + repo: &dyn Repo, + workspace_id: &WorkspaceId, +) -> Result { + let Some(commit_id) = repo.view().get_wc_commit_id(workspace_id) else { + return Ok(Mailmap::empty()); + }; + let commit = repo.store().get_commit(commit_id)?; + let tree = commit.tree()?; + let path = RepoPath::from_internal_string(".mailmap"); + let value = tree.path_value(path)?; + // We ignore symbolic links, as per `gitmailmap(5)`. + // + // TODO: Figure out how conflicts should be handled here. + // TODO: Should `MaterializedTreeValue::AccessDenied` be handled somehow? + let MaterializedTreeValue::File { mut reader, id, .. } = + materialize_tree_value(repo.store(), path, value).await? + else { + return Ok(Mailmap::empty()); + }; + Mailmap::from_reader(&mut reader).map_err(|err| BackendError::ReadFile { + path: path.to_owned(), + id, + source: err.into(), + }) +} diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 83432c21b79..c82658c0a53 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -33,6 +33,7 @@ use crate::fileset::{FilePattern, FilesetExpression}; use crate::graph::GraphEdge; use crate::hex_util::to_forward_hex; use crate::id_prefix::IdPrefixContext; +use crate::mailmap::Mailmap; use crate::object_id::{HexPrefix, PrefixResolution}; use crate::op_store::WorkspaceId; use crate::repo::Repo; @@ -437,64 +438,162 @@ impl RevsetExpression { Rc::new(RevsetExpression::Difference(self.clone(), other.clone())) } + /// Commits that are in `self` but not in any of the `others`. + pub fn minus_all( + self: &Rc, + others: &[Rc], + ) -> Rc { + if others.is_empty() { + self.clone() + } else { + RevsetExpression::minus(self, &RevsetExpression::union_all(others)) + } + } + /// Internal helper for matching on signature fields. fn signature_field( - predicate: fn(SignatureField, StringPattern) -> RevsetFilterPredicate, + raw_predicate: fn(SignatureField, StringPattern) -> RevsetFilterPredicate, field: SignatureField, pattern: StringPattern, + mailmap: &Mailmap, ) -> Rc { - RevsetExpression::filter(predicate(field, pattern)) + // Matching against a canonical signature field is the same as matching against + // the corresponding raw signature field except: + // + // * field values that may match but are mapped to a value that does not should + // be excluded; and + // + // * field values that may not match but are mapped to a value that does should + // be included. + // + // We build the expression `(raw_field(pattern) - excludes) | includes`, where: + // + // excludes = union_all { + // entry : entry ∈ mailmap, + // (old field absent | old field matches) & + // (new field present & new field doesn’t match) + // } + // + // includes = union_all { + // entry : entry ∈ mailmap, + // (old field absent | old field doesn’t match) & + // (new field present & new field matches) + // } + // + // When matching raw signatures, the `Mailmap` is empty and this reduces to + // `raw_field(pattern)`. + + // The first term is a placeholder for later replacement. + let mut includes = vec![RevsetExpression::none()]; + let mut excludes = vec![]; + + let pattern_case_insensitive = pattern.to_case_insensitive(); + + for entry in mailmap.iter() { + let make_old_signature_expr = || { + // `.mailmap` entries are always matched case‐insensitively, per + // `gitmailmap(5)`. + let email_expr: Rc = RevsetExpression::filter(raw_predicate( + SignatureField::Email, + StringPattern::exact_i(entry.old_email()), + )); + if let Some(name) = entry.old_name() { + email_expr.filtered(raw_predicate( + SignatureField::Name, + StringPattern::exact_i(name), + )) + } else { + email_expr + } + }; + + let maybe_new_field_value = match field { + SignatureField::Name => entry.new_name(), + SignatureField::Email => entry.new_email(), + }; + let Some(new_field_value) = maybe_new_field_value else { + continue; + }; + let old_field_value = match field { + SignatureField::Name => entry.old_name(), + SignatureField::Email => Some(entry.old_email()), + }; + + // We use `pattern_case_insensitive` for the old field, to respect the + // case‐insensitivity of `.mailmap` entries, but `pattern` for the new field, to + // determine whether the replacement would actually match. + let old_field_matches = + old_field_value.map(|field_value| pattern_case_insensitive.matches(field_value)); + let new_field_matches = pattern.matches(new_field_value); + + if !new_field_matches && old_field_matches.unwrap_or(true) { + excludes.push(make_old_signature_expr()); + } else if new_field_matches && !old_field_matches.unwrap_or(false) { + includes.push(make_old_signature_expr()); + } + } + + includes[0] = RevsetExpression::minus_all( + &RevsetExpression::filter(raw_predicate(field, pattern)), + &excludes, + ); + + RevsetExpression::union_all(&includes) } /// Commits with author name matching the pattern. - pub fn author_name(pattern: StringPattern) -> Rc { + pub fn author_name(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::signature_field( RevsetFilterPredicate::AuthorRaw, SignatureField::Name, pattern, + mailmap, ) } /// Commits with author email matching the pattern. - pub fn author_email(pattern: StringPattern) -> Rc { + pub fn author_email(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::signature_field( RevsetFilterPredicate::AuthorRaw, SignatureField::Email, pattern, + mailmap, ) } /// Commits with author name or email matching the pattern. - pub fn author(pattern: StringPattern) -> Rc { + pub fn author(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::union( - &RevsetExpression::author_name(pattern.clone()), - &RevsetExpression::author_email(pattern), + &RevsetExpression::author_name(pattern.clone(), mailmap), + &RevsetExpression::author_email(pattern, mailmap), ) } /// Commits with committer name matching the pattern. - pub fn committer_name(pattern: StringPattern) -> Rc { + pub fn committer_name(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::signature_field( RevsetFilterPredicate::CommitterRaw, SignatureField::Name, pattern, + mailmap, ) } /// Commits with committer email matching the pattern. - pub fn committer_email(pattern: StringPattern) -> Rc { + pub fn committer_email(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::signature_field( RevsetFilterPredicate::CommitterRaw, SignatureField::Email, pattern, + mailmap, ) } /// Commits with committer name or email matching the pattern. - pub fn committer(pattern: StringPattern) -> Rc { + pub fn committer(pattern: StringPattern, mailmap: &Mailmap) -> Rc { RevsetExpression::union( - &RevsetExpression::committer_name(pattern.clone()), - &RevsetExpression::committer_email(pattern), + &RevsetExpression::committer_name(pattern.clone(), mailmap), + &RevsetExpression::committer_email(pattern, mailmap), ) } @@ -745,24 +844,35 @@ static BUILTIN_FUNCTION_MAP: Lazy> = Lazy: RevsetFilterPredicate::Description(pattern), )) }); - map.insert("author", |function, _context| { + map.insert("author", |function, context| { + let [arg] = function.expect_exact_arguments()?; + let pattern = expect_string_pattern(arg)?; + Ok(RevsetExpression::author(pattern, context.mailmap())) + }); + map.insert("author_raw", |function, _context| { let [arg] = function.expect_exact_arguments()?; let pattern = expect_string_pattern(arg)?; - Ok(RevsetExpression::author(pattern)) + Ok(RevsetExpression::author(pattern, &Mailmap::empty())) }); map.insert("mine", |function, context| { function.expect_no_arguments()?; // Email address domains are inherently case‐insensitive, and the local‐parts // are generally (although not universally) treated as case‐insensitive too, so // we use a case‐insensitive match here. - Ok(RevsetExpression::author_email(StringPattern::exact_i( - &context.user_email, - ))) + Ok(RevsetExpression::author_email( + StringPattern::exact_i(&context.user_email), + context.mailmap(), + )) }); - map.insert("committer", |function, _context| { + map.insert("committer", |function, context| { let [arg] = function.expect_exact_arguments()?; let pattern = expect_string_pattern(arg)?; - Ok(RevsetExpression::committer(pattern)) + Ok(RevsetExpression::committer(pattern, context.mailmap())) + }); + map.insert("committer_raw", |function, _context| { + let [arg] = function.expect_exact_arguments()?; + let pattern = expect_string_pattern(arg)?; + Ok(RevsetExpression::committer(pattern, &Mailmap::empty())) }); map.insert("empty", |function, _context| { function.expect_no_arguments()?; @@ -2045,6 +2155,7 @@ pub struct RevsetParseContext<'a> { user_email: String, extensions: &'a RevsetExtensions, workspace: Option>, + mailmap: Rc, } impl<'a> RevsetParseContext<'a> { @@ -2053,12 +2164,14 @@ impl<'a> RevsetParseContext<'a> { user_email: String, extensions: &'a RevsetExtensions, workspace: Option>, + mailmap: Rc, ) -> Self { Self { aliases_map, user_email, extensions, workspace, + mailmap, } } @@ -2073,6 +2186,10 @@ impl<'a> RevsetParseContext<'a> { pub fn symbol_resolvers(&self) -> &[impl AsRef] { self.extensions.symbol_resolvers() } + + pub fn mailmap(&self) -> &Mailmap { + &self.mailmap + } } /// Workspace information needed to parse revset expression. @@ -2115,6 +2232,7 @@ mod tests { "test.user@example.com".to_string(), &extensions, None, + Rc::new(Mailmap::empty()), ); super::parse(revset_str, &context) } @@ -2143,6 +2261,7 @@ mod tests { "test.user@example.com".to_string(), &extensions, Some(workspace_ctx), + Rc::new(Mailmap::empty()), ); super::parse(revset_str, &context) } @@ -2167,6 +2286,7 @@ mod tests { "test.user@example.com".to_string(), &extensions, None, + Rc::new(Mailmap::empty()), ); super::parse_with_modifier(revset_str, &context) } diff --git a/lib/src/str_util.rs b/lib/src/str_util.rs index 90f70d117c5..7a57376a2b3 100644 --- a/lib/src/str_util.rs +++ b/lib/src/str_util.rs @@ -174,6 +174,16 @@ impl StringPattern { } } + /// Returns a version of this pattern that matches case‐insensitively. + pub fn to_case_insensitive(&self) -> Cow<'_, Self> { + match self { + StringPattern::Exact(literal) => Cow::Owned(StringPattern::exact_i(literal)), + StringPattern::Substring(needle) => Cow::Owned(StringPattern::substring_i(needle)), + StringPattern::Glob(pattern) => Cow::Owned(StringPattern::GlobI(pattern.clone())), + _ => Cow::Borrowed(self), + } + } + /// Returns true if this pattern matches the `haystack`. /// /// When matching against a case‐insensitive pattern, only ASCII case diff --git a/lib/tests/test_revset.rs b/lib/tests/test_revset.rs index 86ca9260d02..8e99b2e1472 100644 --- a/lib/tests/test_revset.rs +++ b/lib/tests/test_revset.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::path::Path; +use std::rc::Rc; use assert_matches::assert_matches; use itertools::Itertools; @@ -22,6 +23,7 @@ use jj_lib::fileset::FilesetExpression; use jj_lib::git; use jj_lib::git_backend::GitBackend; use jj_lib::graph::{GraphEdge, ReverseGraphIterator}; +use jj_lib::mailmap::{read_current_mailmap, Mailmap}; use jj_lib::object_id::ObjectId; use jj_lib::op_store::{RefTarget, RemoteRef, RemoteRefState, WorkspaceId}; use jj_lib::repo::Repo; @@ -33,6 +35,7 @@ use jj_lib::revset::{ }; use jj_lib::settings::GitSettings; use jj_lib::workspace::Workspace; +use pollster::FutureExt; use test_case::test_case; use testutils::{ create_random_commit, create_tree, write_random_commit, CommitGraphBuilder, TestRepo, @@ -45,7 +48,13 @@ fn resolve_symbol_with_extensions( symbol: &str, ) -> Result, RevsetResolutionError> { let aliases_map = RevsetAliasesMap::default(); - let context = RevsetParseContext::new(&aliases_map, String::new(), extensions, None); + let context = RevsetParseContext::new( + &aliases_map, + String::new(), + extensions, + None, + Rc::new(Mailmap::empty()), + ); let expression = parse(symbol, &context).unwrap(); assert_matches!(*expression, RevsetExpression::CommitRef(_)); let symbol_resolver = DefaultSymbolResolver::new(repo, extensions.symbol_resolvers()); @@ -179,7 +188,13 @@ fn test_resolve_symbol_commit_id() { ); let aliases_map = RevsetAliasesMap::default(); let extensions = RevsetExtensions::default(); - let context = RevsetParseContext::new(&aliases_map, settings.user_email(), &extensions, None); + let context = RevsetParseContext::new( + &aliases_map, + settings.user_email(), + &extensions, + None, + Rc::new(Mailmap::empty()), + ); assert_matches!( optimize(parse("present(04)", &context).unwrap()).resolve_user_expression(repo.as_ref(), &symbol_resolver), Err(RevsetResolutionError::AmbiguousCommitIdPrefix(s)) if s == "04" @@ -830,7 +845,11 @@ fn test_resolve_symbol_git_refs() { ); } -fn resolve_commit_ids(repo: &dyn Repo, revset_str: &str) -> Vec { +fn resolve_commit_ids_with_mailmap( + repo: &dyn Repo, + mailmap_source: &str, + revset_str: &str, +) -> Vec { let settings = testutils::user_settings(); let aliases_map = RevsetAliasesMap::default(); let revset_extensions = RevsetExtensions::default(); @@ -839,6 +858,7 @@ fn resolve_commit_ids(repo: &dyn Repo, revset_str: &str) -> Vec { settings.user_email(), &revset_extensions, None, + Rc::new(Mailmap::from_bytes(mailmap_source.as_bytes())), ); let expression = optimize(parse(revset_str, &context).unwrap()); let symbol_resolver = DefaultSymbolResolver::new(repo, revset_extensions.symbol_resolvers()); @@ -848,6 +868,10 @@ fn resolve_commit_ids(repo: &dyn Repo, revset_str: &str) -> Vec { expression.evaluate(repo).unwrap().iter().collect() } +fn resolve_commit_ids(repo: &dyn Repo, revset_str: &str) -> Vec { + resolve_commit_ids_with_mailmap(repo, "", revset_str) +} + fn resolve_commit_ids_in_workspace( repo: &dyn Repo, revset_str: &str, @@ -865,11 +889,15 @@ fn resolve_commit_ids_in_workspace( }; let aliases_map = RevsetAliasesMap::default(); let extensions = RevsetExtensions::default(); + let mailmap = read_current_mailmap(repo, workspace_ctx.workspace_id) + .block_on() + .unwrap(); let context = RevsetParseContext::new( &aliases_map, settings.user_email(), &extensions, Some(workspace_ctx), + Rc::new(mailmap), ); let expression = optimize(parse(revset_str, &context).unwrap()); let symbol_resolver = @@ -2424,6 +2452,20 @@ fn test_evaluate_expression_author() { ), vec![commit3.id().clone(), commit1.id().clone()] ); + // Signatures are treated as their mailmapped forms + let mailmap = "nameone "; + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "author(\"nameone\")"), + vec![commit1.id().clone()] + ); + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "author(\"name1\")"), + vec![] + ); + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "author_raw(\"name1\")"), + vec![commit1.id().clone()] + ); } #[test] @@ -2493,6 +2535,16 @@ fn test_evaluate_expression_mine() { commit1.id().clone() ] ); + // Signatures are treated as their mailmapped forms + let user_email = settings.user_email(); + assert_eq!( + resolve_commit_ids_with_mailmap( + mut_repo, + format!("<{user_email}> \n name2 <{user_email}>").as_ref(), + "mine()" + ), + vec![commit3.id().clone(), commit1.id().clone()] + ); } #[test] @@ -2567,6 +2619,20 @@ fn test_evaluate_expression_committer() { resolve_commit_ids(mut_repo, "visible_heads() & committer(\"name2\")"), vec![] ); + // Signatures are treated as their mailmapped forms + let mailmap = "nameone "; + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "committer(\"nameone\")"), + vec![commit1.id().clone()] + ); + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "committer(\"name1\")"), + vec![] + ); + assert_eq!( + resolve_commit_ids_with_mailmap(mut_repo, mailmap, "committer_raw(\"name1\")"), + vec![commit1.id().clone()] + ); } #[test]