From e41fca19b1f8f5e9afebbd6b4417c9f00341d798 Mon Sep 17 00:00:00 2001 From: Austin Seipp Date: Thu, 18 Jan 2024 00:35:09 -0600 Subject: [PATCH] cli: basic `jj gerrit mail` implementation This implements the most basic workflow for submitting changes to Gerrit, through a verb called 'mail'. The idea is to 'mail in' changes to be read by another human. This name is taken from the dictionary used by Piper/CitC at Google, and intended to be distinct from the word 'submit', which for Gerrit means 'merge a change into the repository.' Given a list of revsets (specified by multiple `-r` options), this will parse the footers of every commit, collect them, insert a `Change-Id` (if one doesn't already exist), and then push them into the given remote. Because the argument is a revset, you may submit entire trees of changes at once, including multiple trees of independent changes, e.g. jj gerrit mail -r foo:: -r baz:: There are many other improvements that can be applied on top of this, including a ton of consistency and "does this make sense?" checks. However, it is flexible and a good starting point, and you can in fact both submit and cycle reviews with this interface. Signed-off-by: Austin Seipp --- cli/src/commands/gerrit.rs | 394 +++++++++++++++++++++++++++++++++++++ cli/src/commands/mod.rs | 4 + lib/src/footer.rs | 5 +- 3 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 cli/src/commands/gerrit.rs diff --git a/cli/src/commands/gerrit.rs b/cli/src/commands/gerrit.rs new file mode 100644 index 00000000000..35facd643e3 --- /dev/null +++ b/cli/src/commands/gerrit.rs @@ -0,0 +1,394 @@ +// Copyright 2024 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Debug; +use std::io::Write; +use std::str::FromStr; + +use clap::Subcommand; +use hex::ToHex; +use indexmap::{IndexMap, IndexSet}; +use jj_lib::commit::Commit; +use jj_lib::content_hash::blake2b_hash; +use jj_lib::footer::get_footer_lines; +use jj_lib::git::{self, GitRefUpdate}; +use jj_lib::hex_util::to_reverse_hex; +use jj_lib::object_id::ObjectId; +use jj_lib::repo::Repo; + +use crate::cli_util::{ + resolve_multiple_nonempty_revsets, short_commit_hash, user_error, CommandError, CommandHelper, + RemoteBranchNamePattern, RevisionArg, +}; +use crate::git_util::{get_git_repo, with_remote_git_callbacks}; +use crate::ui::Ui; + +/// Interact with Gerrit Code Review. +#[derive(Subcommand, Clone, Debug)] +pub enum GerritCommand { + /// "Mail in" a revision to Gerrit; this is the primary way to create + /// changes or update changes for code review. + /// + /// This command modifies each commit in the revset to include a `Change-Id` + /// footer in its commit message if one does not already exist. This ID is + /// NOT compatible with jj IDs, and is a Gerrit-specific ID. + /// + /// Note: this command takes a revset as an argument, and will mail in all + /// revisions in the revset to Gerrit appropriately, so you may post trees + /// or ranges of commits to Gerrit for review. + /// + /// Note: this command does not actually send an email or use a Mail + /// Transfer Agent, but rather uploads the revision to Gerrit for review + /// through underlying Git logic. The term "mail" is used to disambiguate + /// from the word "submit", which is what Gerrit calls "merging an approved + /// change into a target branch", and is used to refer to the concept of + /// "mailing in" something to be read by someone else. + Mail(MailArgs), +} + +pub fn cmd_gerrit( + ui: &mut Ui, + command: &CommandHelper, + subcommand: &GerritCommand, +) -> Result<(), CommandError> { + match subcommand { + GerritCommand::Mail(review) => cmd_mail(ui, command, review), + } +} + +#[derive(clap::Args, Clone, Debug)] +pub struct MailArgs { + /// The revset, selecting which commits are mailed in to Gerrit. This can be + /// any arbitrary set of commits; they will be modified to include a + /// `Change-Id` footer if one does not already exist, and then mailed off to + /// Gerrit for review. + #[arg(long, short = 'r')] + revision: Vec, + + /// The location where your changes are intended to land. This MUST be a + /// reference of the form `branch@remote`, where `branch` is the name of + /// the branch that the change will be submitted to, and `remote` is the + /// remote to push the changes to. + /// + /// The specified `remote` MUST be a valid Git push URL for your Gerrit + /// instance — typically a `ssh://` URL. + #[arg(long = "for", short = 'f', default_value = "main@origin")] + for_: String, + + /// If true, do not actually add `Change-Id`s to commits, and do not push + /// the changes to Gerrit. + #[arg(long = "dry-run", short = 'n')] + dry_run: bool, +} + +pub fn cmd_mail(ui: &mut Ui, command: &CommandHelper, mail: &MailArgs) -> Result<(), CommandError> { + let mut workspace_command = command.workspace_helper(ui)?; + + let to_mail: IndexSet = + resolve_multiple_nonempty_revsets(&mail.revision, &workspace_command, ui)?; + if to_mail + .iter() + .any(|commit| commit.id() == workspace_command.repo().store().root_commit_id()) + { + return Err(user_error("Cannot mail the root commit")); + } + + workspace_command.check_rewritable(to_mail.iter())?; + + let mut tx = workspace_command.start_transaction(); + let base_repo = tx.base_repo().clone(); + let mut_repo = tx.mut_repo(); + let store = base_repo.store(); + let git_repo = get_git_repo(store)?; // will fail if not a git repo + + let remote_pattern = RemoteBranchNamePattern::from_str(&mail.for_).map_err(|err| { + return user_error(err.to_string()); + })?; + + if !remote_pattern.is_exact() { + return Err(user_error(format!( + "The --for target '{}' is not exactly of the form 'branch@remote'", + remote_pattern + ))); + } + + let (for_branch, for_remote) = ( + remote_pattern.branch.as_exact().unwrap(), + remote_pattern.remote.as_exact().unwrap(), + ); + + if !git_repo + .remotes()? + .iter() + .any(|remote| remote == Some(&for_remote)) + { + return Err(user_error(format!( + "The (Gerrit) remote '{}' does not exist", + for_remote + ))); + } + + // immediately error and reject any discardable commits, i.e. the + // the empty wcc + for commit in to_mail.iter() { + if commit.is_discardable() { + return Err(user_error(format!( + "Refusing to mail in commit {} because it is an empty commit with no \ + description\n(use 'jj amend' to add a description, or 'jj abandon' to discard it)", + short_commit_hash(commit.id()) + ))); + } + } + + // the mapping is from old -> [new, is_dry_run]; the dry_run flag is used to + // disambiguate a later case when printing errors, so we know that if a + // commit was mapped to itself, it was because --dry-run was set, and not + // because e.g. it had an existing change id already + let mut old_to_new: IndexMap = IndexMap::new(); + for commit_id in base_repo + .index() + .topo_order(&mut to_mail.iter().map(|c| c.id())) + .into_iter() + { + let original_commit = store.get_commit(&commit_id).unwrap(); + let description = original_commit.description().to_owned(); + let footer = get_footer_lines(&description); + + if let Some(footer) = footer.clone() { + // look up the existing change id footer + let change_id = footer.iter().find(|(key, _)| key == &"Change-Id"); + if let Some((_, values)) = change_id { + // map the old commit to itself + old_to_new.insert(original_commit.clone(), (original_commit.clone(), false)); + + // multiple change-ids are not allowed + if values.len() > 1 { + writeln!( + ui.warning(), + "warning: multiple Change-Id footers in commit {}", + short_commit_hash(original_commit.clone().id()), + )?; + continue; + } + + // check the change-id format is correct in any case + if values[0].len() != 41 || !values[0].starts_with('I') { + writeln!( + ui.warning(), + "warning: invalid Change-Id footer in commit {}", + short_commit_hash(original_commit.id()), + )?; + continue; + } + + // XXX (aseipp): should we rewrite these invalid Change-Ids? i + // don't think so, but I don't know what gerrit will do with + // them, and I realized my old signoff.sh script created invalid + // ones, so this is a helpful barrier. + + continue; // fallthrough + } + } + + if mail.dry_run { + // mark the old commit as rewritten to itself, but only because it + // was a --dry-run, so we can give better error messages later + old_to_new.insert(original_commit.clone(), (original_commit.clone(), true)); + continue; + } + + // NOTE: Gerrit's change ID is not compatible with the alphabet used by + // jj, and the needed length of the change-id is different as well. + // + // for us, we convert to gerrit's format is the character 'I', followed + // by 40 characters of the truncated blake2 hash of the change id + let change_id = to_reverse_hex(&original_commit.change_id().hex()).unwrap(); + let hashed_id: String = blake2b_hash(&change_id).encode_hex(); + let gerrit_change_id = format!("I{}", hashed_id.chars().take(40).collect::()); + + // XXX (aseipp): move this description junk for rewriting the description to + // footer.rs; improves reusability and makes things a little cleaner + let spacing = if let Some(footer) = footer { + if footer.is_empty() { + "\n\n" + } else { + "\n" + } + } else { + "\n\n" + }; + + let new_description = format!( + "{}{}Change-Id: {}\n", + description.trim(), + spacing, + gerrit_change_id + ); + + // rewrite the set of parents to point to the commits that were + // previously rewritten in toposort order + // + // TODO FIXME (aseipp): this whole dance with toposorting, calculating + // new_parents, and then doing rewrite_commit is roughly equivalent to + // what we do in duplicate.rs as well. we should probably refactor this? + let new_parents = original_commit + .parents() + .iter() + .map(|parent| { + if let Some((rewritten_parent, _)) = old_to_new.get(parent) { + rewritten_parent + } else { + parent + } + .id() + .clone() + }) + .collect(); + + let new_commit = mut_repo + .rewrite_commit(command.settings(), &original_commit) + .set_description(new_description) + .set_parents(new_parents) + .write()?; + old_to_new.insert(original_commit.clone(), (new_commit.clone(), false)); + } + + tx.finish( + ui, + format!( + "describing {} commit(s) for mail-in to gerrit", + old_to_new.len() + ), + )?; + + // XXX (aseipp): is this transaction safe to leave open? should it record a + // push instead in the op log, even if it can't be meaningfully undone? + let tx = workspace_command.start_transaction(); + let base_repo = tx.base_repo().clone(); + + // NOTE(aseipp): write the status report *after* finishing the first + // transaction. until we call 'tx.finish', the outstanding tx write set + // contains a commit with a duplicated jj change-id, i.e. while the + // transaction is open, it is ambiguous whether the change-id refers to the + // newly written commit or the old one that already existed. + // + // this causes an awkward UX interaction, where write_commit_summary will + // output a line with a red change-id indicating it's duplicated/conflicted, + // AKA "??" status. but then the user will immediately run 'jj log' and not + // see any conflicting change-ids, because the transaction was committed by + // then and the new commits replaced the old ones! just printing this after + // the transaction finishes avoids this weird case. + // + // XXX (aseipp): ask martin for feedback + for (old, (new, is_dry)) in old_to_new.iter() { + if old != new { + write!(ui.stderr(), "Added Change-Id footer to ")?; + } else if *is_dry { + write!(ui.stderr(), "Dry-run: would have added Change-Id to ")?; + } else { + write!(ui.stderr(), "Skipped Change-Id (it already exists) for ")?; + } + tx.write_commit_summary(ui.stderr_formatter().as_mut(), new)?; + writeln!(ui.stderr())?; + } + writeln!(ui.stderr())?; + + let new_commits = old_to_new.values().map(|x| &x.0).collect::>(); + let new_heads = base_repo + .index() + .heads(&mut new_commits.iter().map(|c| c.id())); + let remote_ref = format!("refs/for/{}", for_branch); + + writeln!( + ui.stderr(), + "Found {} heads to push to Gerrit (remote '{}'), target branch '{}'", + new_heads.len(), + for_remote, + for_branch, + )?; + + // split these two loops to keep the output a little nicer; display first, + // then push + for head in &new_heads { + let head_commit = store.get_commit(head).unwrap(); + + write!(ui.stderr(), " ")?; + tx.write_commit_summary(ui.stderr_formatter().as_mut(), &head_commit)?; + writeln!(ui.stderr())?; + } + writeln!(ui.stderr())?; + + if mail.dry_run { + writeln!( + ui.stderr(), + "Dry-run: Not performing push, as `--dry-run` was requested" + )?; + return Ok(()); + } + + // NOTE (aseipp): because we are pushing everything to the same remote ref, + // we have to loop and push each commit one at a time, even though + // push_updates in theory supports multiple GitRefUpdates at once, because + // we obviously can't push multiple heads to the same ref. + for head in &new_heads { + let head_commit = store.get_commit(head).unwrap(); + let head_id = head_commit.id().clone(); + + write!(ui.stderr(), "Pushing ")?; + tx.write_commit_summary(ui.stderr_formatter().as_mut(), &head_commit)?; + writeln!(ui.stderr())?; + + // XXX (aseipp): for yuya -- is this roughly correct? i don't think we + // want to record or update jj's state on the remote refs when we push, + // since it's special and synthetic ref in the first place. + // + // how do we get better errors from the remote? 'git push' tells us + // about rejected refs AND ALSO '(nothing changed)' when there are no + // changes to push, but we don't get that here. RefUpdateRejected might + // need more context, idk. is this a libgit2 problem? + with_remote_git_callbacks(ui, |cb| { + git::push_updates( + &git_repo, + &for_remote, + &[GitRefUpdate { + qualified_name: remote_ref.clone(), + force: false, + new_target: Some(head_id), + }], + cb, + ) + }) + .map_or_else( + |err| match err { + git::GitPushError::RefUpdateRejected(_) => { + // gerrit rejects ref updates when there are no changes, i.e. + // you submit a change that is already up to date. just give + // the user a light warning and carry on + writeln!( + ui.warning(), + "warning: ref update rejected by gerrit; no changes to push (did you \ + forget to update, amend, or add new changes?)" + )?; + + Ok(()) + } + // XXX (aseipp): more cases to handle here? + _ => Err(user_error(err.to_string())), + }, + Ok, + )?; + } + + Ok(()) +} diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index 31a443a68b9..1730e1a41e1 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -29,6 +29,7 @@ mod diffedit; mod duplicate; mod edit; mod files; +mod gerrit; mod git; mod init; mod interdiff; @@ -88,6 +89,8 @@ enum Command { Edit(edit::EditArgs), Files(files::FilesArgs), #[command(subcommand)] + Gerrit(gerrit::GerritCommand), + #[command(subcommand)] Git(git::GitCommand), Init(init::InitArgs), Interdiff(interdiff::InterdiffArgs), @@ -193,6 +196,7 @@ pub fn run_command(ui: &mut Ui, command_helper: &CommandHelper) -> Result<(), Co Command::Sparse(sub_args) => sparse::cmd_sparse(ui, command_helper, sub_args), Command::Chmod(sub_args) => chmod::cmd_chmod(ui, command_helper, sub_args), Command::Git(sub_args) => git::cmd_git(ui, command_helper, sub_args), + Command::Gerrit(sub_args) => gerrit::cmd_gerrit(ui, command_helper, sub_args), Command::Util(sub_args) => util::cmd_util(ui, command_helper, sub_args), #[cfg(feature = "bench")] Command::Bench(sub_args) => bench::cmd_bench(ui, command_helper, sub_args), diff --git a/lib/src/footer.rs b/lib/src/footer.rs index 34a4c2c7029..8c2be735c34 100644 --- a/lib/src/footer.rs +++ b/lib/src/footer.rs @@ -39,8 +39,8 @@ pub fn get_footer_lines(body: &str) -> Option>> { // by newline, but we need to immediately reverse the order of the lines // to ensure we parse the footer in an unambiguous manner; this avoids cases // where a colon in the body of the message is mistaken for a footer line - let mut footer = IndexMap::new(); - let lines: Vec<&str> = body.trim().lines().rev().collect(); + + let lines = body.trim().lines().rev().collect::>(); // short-circuit if there is only 1 line; this avoids a case where a commit // with a single-line description like 'cli: fix bug' does not have a @@ -49,6 +49,7 @@ pub fn get_footer_lines(body: &str) -> Option>> { return None; } + let mut footer = IndexMap::new(); for line in lines { if line.is_empty() { break;