From 9077003b6995dc0f8e7c5fccdac7674da87264ff Mon Sep 17 00:00:00 2001 From: Ilya Grigoriev Date: Thu, 11 Jul 2024 16:53:19 -0700 Subject: [PATCH] conflicts: encode unmaterializeable lines Fixes #3968 Fixes #3975 --- lib/src/conflicts.rs | 63 +++++++++++++++++++- lib/tests/test_conflicts.rs | 113 ++++++++++++++++++++++++++++++++++-- 2 files changed, 170 insertions(+), 6 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 1ffd73ea1d..d36e20cdf9 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -14,9 +14,11 @@ #![allow(missing_docs)] +use std::borrow::Cow; use std::io::{Read, Write}; use std::iter::zip; +use bstr::ByteVec; use futures::{StreamExt, TryStreamExt}; use itertools::Itertools; use regex::bytes::{Regex, RegexBuilder}; @@ -51,6 +53,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::La .build() .unwrap() }); +static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(r"^\\JJ") + .multi_line(true) + .build() + .unwrap() +}); fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> { for hunk in hunks { @@ -217,6 +225,12 @@ pub fn materialize_merge_result( single_hunk: Merge, output: &mut dyn Write, ) -> std::io::Result<()> { + let single_hunk: Merge = + single_hunk.into_map(|ContentHunk(side)| ContentHunk(encode_unmaterializeable_lines(side))); + + // From now on, we assume some invariants guaranteed by the encoding function + // above. See its docstring for details. + let slices = single_hunk.map(|content| content.0.as_slice()); let merge_result = files::merge(&slices); match merge_result { @@ -347,7 +361,11 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option> = + once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes()); +static VERBATIM_LINE_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE))) + .multi_line(true) + .build() + .unwrap() +}); + +/// Encode a side of a conflict to satisfy some invariants +/// +/// - The side should not contain any conflict markers +/// - It should contain 0 or more newline-terminated lines. In other words, it +/// is either empty or ends in a newline. +fn encode_unmaterializeable_lines(mut side: Vec) -> Vec { + // TODO(ilyagr): It's likely the compiler won't be able to avoid a clone in + // the no-replacements case. If we care about that, we could try one of the + // approaches discussed in https://github.com/rust-lang/regex/issues/676. + // This also applies to the decoding function. + side = DIRECTIVE_LINE_REGEX + .replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice()) + .to_vec(); + side = CONFLICT_MARKER_REGEX + .replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice()) + .to_vec(); + if !side.is_empty() && !side.ends_with(b"\n") { + side.push_str(JJ_NO_NEWLINE_AT_EOF); + } + side +} + +fn decode_materialized_side(mut side: Vec) -> Vec { + side = VERBATIM_LINE_REGEX + .replace_all(&Cow::from(side), b"") + .to_vec(); + if side.ends_with(JJ_NO_NEWLINE_AT_EOF) { + side.truncate(side.len() - JJ_NO_NEWLINE_AT_EOF.len()); + } + side +} diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index 79325b1e10..bd8ff21789 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -326,6 +326,96 @@ fn test_materialize_parse_roundtrip() { "###); } +#[test] +fn test_materialize_parse_roundtrip_tricky() { + let test_repo = TestRepo::init(); + let store = test_repo.repo.store(); + + let path = RepoPath::from_internal_string("file"); + let base_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 + line 2 <<<<<<< + <<<<<<< line 3 + line 4 + line 5"}, + ); + let left_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 left + line 2 left + <<<<<<<< line 3 + line 4 + line 5 left"}, + ); + let right_id = testutils::write_file( + store, + path, + indoc! {" + \\JJ Verbatim Line: fake verbatim line + line 1 right + line 2 + line 3 + line 4 right + line 5 right + "}, + ); + + let conflict = Merge::from_removes_adds( + vec![Some(base_id.clone())], + vec![Some(left_id.clone()), Some(right_id.clone())], + ); + let materialized = materialize_conflict_string(store, path, &conflict); + insta::assert_snapshot!( + materialized, + @r###" + \JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line + <<<<<<< Conflict 1 of 1 + %%%%%%% Changes from base to side #1 + -line 1 + -line 2 <<<<<<< + -\JJ Verbatim Line:<<<<<<< line 3 + +line 1 left + +line 2 left + +<<<<<<<< line 3 + line 4 + -line 5 + +line 5 left + \JJ: No newline at the end of file + +++++++ Contents of side #2 + line 1 right + line 2 + line 3 + line 4 right + line 5 right + >>>>>>> Conflict 1 of 1 ends + "### + ); + + // The first add should always be from the left side + insta::assert_debug_snapshot!( + parse_merge_result(materialized.as_bytes(), conflict.num_sides()), + @r###" + Some( + Conflicted( + [ + "\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left", + "\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5", + "\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n", + ], + ), + ) + "###); + + // TODO: update_from_conflict? +} + #[test] fn test_materialize_conflict_no_newlines_at_eof() { let test_repo = TestRepo::init(); @@ -344,16 +434,29 @@ fn test_materialize_conflict_no_newlines_at_eof() { insta::assert_snapshot!(materialized, @r###" <<<<<<< Conflict 1 of 1 - %%%%%%% Changes from base to side #1 - -base+++++++ Contents of side #2 - right>>>>>>> Conflict 1 of 1 ends + +++++++ Contents of side #1 + %%%%%%% Changes from base to side #2 + -base + +right + \JJ: No newline at the end of file + >>>>>>> Conflict 1 of 1 ends "### ); - // BUG(#3968): These conflict markers cannot be parsed + // These conflict markers can be parsed (issue #3968) insta::assert_debug_snapshot!(parse_merge_result( materialized.as_bytes(), conflict.num_sides() - ),@"None"); + ),@r###" + Some( + Conflicted( + [ + "", + "base", + "right", + ], + ), + ) + "###); } #[test]