From a06f0305a0f1a7b11f5409320e523fe56ba60739 Mon Sep 17 00:00:00 2001 From: Waleed Khan Date: Sun, 20 Aug 2023 20:49:39 -0700 Subject: [PATCH] merge_tools: create internal difftool --- Cargo.lock | 61 +++- Cargo.toml | 1 + cli/Cargo.toml | 1 + cli/src/merge_tools/internal.rs | 610 ++++++++++++++++++++++++++++++++ cli/src/merge_tools/mod.rs | 11 +- 5 files changed, 682 insertions(+), 2 deletions(-) create mode 100644 cli/src/merge_tools/internal.rs diff --git a/Cargo.lock b/Cargo.lock index 12c404d193..48ebfd1dc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -270,6 +270,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "cassowary" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" + [[package]] name = "cast" version = "0.3.0" @@ -517,6 +523,22 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" +dependencies = [ + "bitflags 1.3.2", + "crossterm_winapi", + "libc", + "mio", + "parking_lot", + "signal-hook", + "signal-hook-mio", + "winapi", +] + [[package]] name = "crossterm" version = "0.26.1" @@ -987,7 +1009,7 @@ dependencies = [ "clap_mangen", "config", "criterion", - "crossterm", + "crossterm 0.26.1", "dirs", "esl01-renderdag", "git2", @@ -1004,6 +1026,7 @@ dependencies = [ "pest_derive", "regex", "rpassword", + "scm-record", "serde", "slab", "tempfile", @@ -1786,6 +1809,23 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scm-record" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de5add99102ba58391e9c1e0c2ee5410654d8ffe7605a393db863a403cb5a2d" +dependencies = [ + "cassowary", + "crossterm 0.26.1", + "num-traits", + "serde", + "serde_json", + "thiserror", + "tracing", + "tui", + "unicode-width", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2263,6 +2303,19 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tui" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccdd26cbd674007e649a272da4475fb666d3aa0ad0531da7136db6fab0e5bad1" +dependencies = [ + "bitflags 1.3.2", + "cassowary", + "crossterm 0.25.0", + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "typenum" version = "1.16.0" @@ -2302,6 +2355,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/Cargo.toml b/Cargo.toml index 56eea610e8..5fc6349393 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,6 +64,7 @@ smallvec = { version = "1.11.0", features = [ "const_new", "union", ] } +scm-record = "0.1.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.105" slab = "0.4.8" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 84a87819b5..8b6f0e803d 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -49,6 +49,7 @@ pest.workspace = true pest_derive.workspace = true regex.workspace = true rpassword.workspace = true +scm-record.workspace = true serde.workspace = true slab.workspace = true tempfile.workspace = true diff --git a/cli/src/merge_tools/internal.rs b/cli/src/merge_tools/internal.rs new file mode 100644 index 0000000000..bb5258f206 --- /dev/null +++ b/cli/src/merge_tools/internal.rs @@ -0,0 +1,610 @@ +use std::borrow::Cow; +use std::path::Path; +use std::sync::Arc; + +use itertools::Itertools; +use jj_lib::backend::{BackendError, FileId, ObjectId, SymlinkId, TreeId, TreeValue}; +use jj_lib::diff::{find_line_ranges, Diff, DiffHunk}; +use jj_lib::matchers::EverythingMatcher; +use jj_lib::repo_path::RepoPath; +use jj_lib::store::Store; +use jj_lib::tree::Tree; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum InternalToolError { + #[error("Failed to record changes: {0}")] + Record(#[from] scm_record::RecordError), + #[error("Failed to read file {path:?} with ID {id:?}: {source:?}")] + ReadFileBackend { + path: RepoPath, + id: FileId, + source: BackendError, + }, + #[error("Failed to read file {path:?} with ID {id:?}: {source}")] + ReadFileIo { + path: RepoPath, + id: FileId, + source: std::io::Error, + }, + #[error("Failed to read symlink {path:?} with ID {id:?} from backend: {source:?}")] + ReadSymlink { + path: RepoPath, + id: SymlinkId, + source: BackendError, + }, + #[error("Failed to decode UTF-8 text for item {item} (this should not happen): {source}")] + DecodeUtf8 { + source: std::str::Utf8Error, + item: &'static str, + }, + #[error("Rendering {item} {id} is unimplemented for the builtin difftool/mergetool")] + Unimplemented { item: &'static str, id: String }, + #[error("Backend error: {0:?}")] + BackendError(#[from] jj_lib::backend::BackendError), +} + +#[derive(Clone, Debug)] +enum FileContents { + Absent, + Text { + contents: String, + hash: Option, + num_bytes: u64, + }, + Binary { + hash: Option, + num_bytes: u64, + }, +} + +/// Information about a file that was read from disk. Note that the file may not +/// have existed, in which case its contents will be marked as absent. +#[derive(Clone, Debug)] +pub struct FileInfo { + file_mode: scm_record::FileMode, + contents: FileContents, +} + +/// File modes according to the Git file mode conventions. used for display +/// purposes and equality comparison. +/// +/// TODO: let `scm-record` accept strings instead of numbers for file modes? Or +/// figure out some other way to represent file mode changes in a jj-compatible +/// manner? +mod mode { + pub const NORMAL: usize = 0o100644; + pub const EXECUTABLE: usize = 0o100755; + pub const SYMLINK: usize = 0o120000; +} + +fn describe_binary(hash: Option<&str>, num_bytes: u64) -> String { + match hash { + Some(hash) => { + format!("{hash} ({num_bytes}B)") + } + None => format!("({num_bytes}B)"), + } +} + +fn buf_to_file_contents(hash: Option, buf: Vec) -> FileContents { + let num_bytes: u64 = buf.len().try_into().unwrap(); + let text = if buf.contains(&0) { + None + } else { + match String::from_utf8(buf) { + Ok(text) => Some(text), + Err(_) => None, + } + }; + match text { + Some(text) => FileContents::Text { + contents: text, + hash, + num_bytes, + }, + None => FileContents::Binary { hash, num_bytes }, + } +} + +fn read_file_contents( + store: &Store, + tree: &Tree, + path: &RepoPath, +) -> Result { + match tree.path_value(path) { + None => Ok(FileInfo { + file_mode: scm_record::FileMode::absent(), + contents: FileContents::Absent, + }), + + Some(TreeValue::File { id, executable }) => { + let mut reader = + store + .read_file(path, &id) + .map_err(|err| InternalToolError::ReadFileBackend { + path: path.clone(), + id: id.clone(), + source: err, + })?; + let mut buf = Vec::new(); + reader + .read_to_end(&mut buf) + .map_err(|err| InternalToolError::ReadFileIo { + path: path.clone(), + id: id.clone(), + source: err, + })?; + + let file_mode = if executable { + scm_record::FileMode(mode::EXECUTABLE) + } else { + scm_record::FileMode(mode::NORMAL) + }; + let contents = buf_to_file_contents(Some(id.hex()), buf); + Ok(FileInfo { + file_mode, + contents, + }) + } + + Some(TreeValue::Symlink(id)) => { + let value = + store + .read_symlink(path, &id) + .map_err(|err| InternalToolError::ReadSymlink { + path: path.clone(), + id: id.clone(), + source: err, + })?; + let file_mode = scm_record::FileMode(mode::SYMLINK); + let num_bytes = value.len().try_into().unwrap(); + Ok(FileInfo { + file_mode, + contents: FileContents::Text { + contents: value, + hash: Some(id.hex()), + num_bytes, + }, + }) + } + + Some(TreeValue::Tree(tree_id)) => { + unreachable!("list of changed files included a tree: {tree_id:?}"); + } + Some(TreeValue::GitSubmodule(id)) => Err(InternalToolError::Unimplemented { + item: "git submodule", + id: id.hex(), + }), + Some(TreeValue::Conflict(id)) => Err(InternalToolError::Unimplemented { + item: "conflict", + id: id.hex(), + }), + } +} + +fn make_section_changed_lines( + contents: &str, + change_type: scm_record::ChangeType, +) -> Vec> { + contents + .split_inclusive('\n') + .map(|line| scm_record::SectionChangedLine { + is_checked: false, + change_type, + line: Cow::Owned(line.to_owned()), + }) + .collect() +} + +fn make_diff_sections( + left_contents: &str, + right_contents: &str, +) -> Result>, InternalToolError> { + let diff = Diff::for_tokenizer( + &[left_contents.as_bytes(), right_contents.as_bytes()], + &find_line_ranges, + ); + let mut sections = Vec::new(); + for hunk in diff.hunks() { + match hunk { + DiffHunk::Matching(text) => { + let text = + std::str::from_utf8(text).map_err(|err| InternalToolError::DecodeUtf8 { + source: err, + item: "matching text in diff hunk", + })?; + sections.push(scm_record::Section::Unchanged { + lines: text + .split_inclusive('\n') + .map(|line| Cow::Owned(line.to_owned())) + .collect(), + }) + } + DiffHunk::Different(sides) => { + debug_assert_eq!(sides.len(), 2); + let left_side = + std::str::from_utf8(sides[0]).map_err(|err| InternalToolError::DecodeUtf8 { + source: err, + item: "left side of diff hunk", + })?; + let right_side = + std::str::from_utf8(sides[1]).map_err(|err| InternalToolError::DecodeUtf8 { + source: err, + item: "right side of diff hunk", + })?; + sections.push(scm_record::Section::Changed { + lines: [ + make_section_changed_lines(left_side, scm_record::ChangeType::Removed), + make_section_changed_lines(right_side, scm_record::ChangeType::Added), + ] + .concat(), + }) + } + } + } + Ok(sections) +} + +pub fn make_diff_files( + store: &Arc, + left_tree: &Tree, + right_tree: &Tree, + changed_files: &[RepoPath], +) -> Result>, InternalToolError> { + let mut files = Vec::new(); + for changed_path in changed_files { + let FileInfo { + file_mode: left_file_mode, + contents: left_contents, + } = read_file_contents(store, left_tree, changed_path)?; + let FileInfo { + file_mode: right_file_mode, + contents: right_contents, + } = read_file_contents(store, right_tree, changed_path)?; + + let mut sections = Vec::new(); + if left_file_mode != right_file_mode + && left_file_mode != scm_record::FileMode::absent() + && right_file_mode != scm_record::FileMode::absent() + { + sections.push(scm_record::Section::FileMode { + is_checked: false, + before: left_file_mode, + after: right_file_mode, + }); + } + + match (left_contents, right_contents) { + (FileContents::Absent, FileContents::Absent) => {} + ( + FileContents::Absent, + FileContents::Text { + contents, + hash: _, + num_bytes: _, + }, + ) => sections.push(scm_record::Section::Changed { + lines: make_section_changed_lines(&contents, scm_record::ChangeType::Added), + }), + + (FileContents::Absent, FileContents::Binary { hash, num_bytes }) => { + sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: None, + new_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))), + }) + } + + ( + FileContents::Text { + contents, + hash: _, + num_bytes: _, + }, + FileContents::Absent, + ) => sections.push(scm_record::Section::Changed { + lines: make_section_changed_lines(&contents, scm_record::ChangeType::Removed), + }), + + ( + FileContents::Text { + contents: old_contents, + hash: _, + num_bytes: _, + }, + FileContents::Text { + contents: new_contents, + hash: _, + num_bytes: _, + }, + ) => { + sections.extend(make_diff_sections(&old_contents, &new_contents)?); + } + + ( + FileContents::Text { + contents: _, + hash: old_hash, + num_bytes: old_num_bytes, + } + | FileContents::Binary { + hash: old_hash, + num_bytes: old_num_bytes, + }, + FileContents::Text { + contents: _, + hash: new_hash, + num_bytes: new_num_bytes, + } + | FileContents::Binary { + hash: new_hash, + num_bytes: new_num_bytes, + }, + ) => sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: Some(Cow::Owned(describe_binary( + old_hash.as_deref(), + old_num_bytes, + ))), + new_description: Some(Cow::Owned(describe_binary( + new_hash.as_deref(), + new_num_bytes, + ))), + }), + + (FileContents::Binary { hash, num_bytes }, FileContents::Absent) => { + sections.push(scm_record::Section::Binary { + is_checked: false, + old_description: Some(Cow::Owned(describe_binary(hash.as_deref(), num_bytes))), + new_description: None, + }) + } + } + + files.push(scm_record::File { + old_path: None, + path: Cow::Owned(changed_path.to_fs_path(Path::new(""))), + file_mode: None, + sections, + }); + } + Ok(files) +} + +pub fn apply_diff_internal( + store: Arc, + left_tree: &Tree, + right_tree: &Tree, + changed_files: Vec, + files: &[scm_record::File], +) -> Result { + // FIXME: do we need to start from the left or right tree? + let mut tree_builder = left_tree.store().tree_builder(left_tree.id().clone()); + assert_eq!( + changed_files.len(), + files.len(), + "result had a different number of files" + ); + for (path, file) in changed_files.into_iter().zip(files) { + let (selected, _unselected) = file.get_selected_contents(); + match selected { + scm_record::SelectedContents::Absent => { + tree_builder.remove(path); + } + scm_record::SelectedContents::Unchanged => { + // Do nothing. + } + scm_record::SelectedContents::Binary { + old_description: _, + new_description: _, + } => { + // FIXME: use an in-band mechanism to transmit what the binary file was + // associated with + let value = right_tree.path_value(&path); + match value { + None => { + tree_builder.remove(path); + } + Some(value) => { + tree_builder.set(path, value); + } + } + } + scm_record::SelectedContents::Present { contents } => { + let file_id = store.write_file(&path, &mut contents.as_bytes())?; + tree_builder.set( + path, + TreeValue::File { + id: file_id, + executable: file.get_file_mode() + == Some(scm_record::FileMode(mode::EXECUTABLE)), + }, + ) + } + } + } + + let tree_id = tree_builder.write_tree(); + Ok(tree_id) +} + +pub fn edit_diff_internal( + left_tree: &Tree, + right_tree: &Tree, +) -> Result { + let store = left_tree.store().clone(); + let changed_files = left_tree + .diff(right_tree, &EverythingMatcher) + .map(|(path, _value)| path) + .collect_vec(); + let files = make_diff_files(&store, left_tree, right_tree, &changed_files)?; + let recorder = scm_record::Recorder::new( + scm_record::RecordState { + is_read_only: false, + files, + }, + scm_record::EventSource::Crossterm, + ); + let result = recorder.run().map_err(InternalToolError::Record)?; + let tree_id = apply_diff_internal(store, left_tree, right_tree, changed_files, &result.files) + .map_err(InternalToolError::BackendError)?; + Ok(tree_id) +} + +#[cfg(test)] +mod tests { + use jj_lib::matchers::EverythingMatcher; + use jj_lib::repo::Repo; + use testutils::TestRepo; + + use super::*; + + #[test] + fn test_edit_diff_internal() { + let test_repo = TestRepo::init(false); + let store = test_repo.repo.store(); + + let unused_path = RepoPath::from_internal_string("unused"); + let unchanged = RepoPath::from_internal_string("unchanged"); + let changed_path = RepoPath::from_internal_string("changed"); + let added_path = RepoPath::from_internal_string("added"); + let left_tree = testutils::create_tree( + &test_repo.repo, + &[ + (&unused_path, "unused\n"), + (&unchanged, "unchanged\n"), + (&changed_path, "line1\nline2\nline3\n"), + ], + ); + let right_tree = testutils::create_tree( + &test_repo.repo, + &[ + (&unused_path, "unused\n"), + (&unchanged, "unchanged\n"), + (&changed_path, "line1\nchanged1\nchanged2\nline3\nadded1\n"), + (&added_path, "added\n"), + ], + ); + + let changed_files = vec![unchanged.clone(), changed_path.clone(), added_path.clone()]; + let files = make_diff_files(store, &left_tree, &right_tree, &changed_files).unwrap(); + insta::assert_debug_snapshot!(files, @r###" + [ + File { + old_path: None, + path: "unchanged", + file_mode: None, + sections: [ + Unchanged { + lines: [ + "unchanged\n", + ], + }, + ], + }, + File { + old_path: None, + path: "changed", + file_mode: None, + sections: [ + Unchanged { + lines: [ + "line1\n", + ], + }, + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Removed, + line: "line2\n", + }, + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "changed1\n", + }, + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "changed2\n", + }, + ], + }, + Unchanged { + lines: [ + "line3\n", + ], + }, + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "added1\n", + }, + ], + }, + ], + }, + File { + old_path: None, + path: "added", + file_mode: None, + sections: [ + Changed { + lines: [ + SectionChangedLine { + is_checked: false, + change_type: Added, + line: "added\n", + }, + ], + }, + ], + }, + ] + "###); + + let no_changes_tree_id = apply_diff_internal( + store.clone(), + &left_tree, + &right_tree, + changed_files.clone(), + &files, + ) + .unwrap(); + let no_changes_tree = store + .get_tree(&RepoPath::root(), &no_changes_tree_id) + .unwrap(); + assert_eq!( + no_changes_tree.id(), + left_tree.id(), + "no-changes tree was different: {:?}", + no_changes_tree.diff_summary(&left_tree, &EverythingMatcher) + ); + + let mut files = files; + for file in files.iter_mut() { + file.toggle_all(); + } + let all_changes_tree_id = apply_diff_internal( + store.clone(), + &left_tree, + &right_tree, + changed_files, + &files, + ) + .unwrap(); + let all_changes_tree = store + .get_tree(&RepoPath::root(), &all_changes_tree_id) + .unwrap(); + assert_eq!( + all_changes_tree.id(), + right_tree.id(), + "all-changes tree was different: {:?}", + all_changes_tree.diff_summary(&right_tree, &EverythingMatcher) + ); + } +} diff --git a/cli/src/merge_tools/mod.rs b/cli/src/merge_tools/mod.rs index 2e49fef76d..eaef0f7a4e 100644 --- a/cli/src/merge_tools/mod.rs +++ b/cli/src/merge_tools/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod external; +mod internal; use std::sync::Arc; @@ -28,11 +29,14 @@ use thiserror::Error; use self::external::{edit_diff_external, DiffCheckoutError, ExternalToolError}; pub use self::external::{generate_diff, ExternalMergeTool}; +use self::internal::{edit_diff_internal, InternalToolError}; use crate::config::CommandNameAndArgs; use crate::ui::Ui; #[derive(Debug, Error)] pub enum DiffEditError { + #[error(transparent)] + InternalTool(#[from] Box), #[error(transparent)] ExternalTool(#[from] ExternalToolError), #[error(transparent)] @@ -53,6 +57,8 @@ pub enum DiffGenerateError { #[derive(Debug, Error)] pub enum ConflictResolveError { + #[error(transparent)] + InternalTool(#[from] Box), #[error(transparent)] ExternalTool(#[from] ExternalToolError), #[error("Couldn't find the path {0:?} in this revision")] @@ -126,7 +132,10 @@ pub fn edit_diff( // Start a diff editor on the two directories. let editor = get_diff_editor_from_settings(ui, settings)?; match editor { - MergeTool::Internal => unimplemented!("run_mergetool with internal mergetool"), + MergeTool::Internal => { + let tree_id = edit_diff_internal(left_tree, right_tree).map_err(Box::new)?; + Ok(tree_id) + } MergeTool::External(editor) => edit_diff_external( editor, left_tree,