Skip to content

Commit

Permalink
conflicts: encode unmaterializeable lines
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyagr committed Jul 15, 2024
1 parent 455804d commit 41101b3
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 6 deletions.
121 changes: 120 additions & 1 deletion lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@

#![allow(missing_docs)]

use std::borrow::Cow;
use std::io::{Read, Write};
use std::iter::zip;

use bstr::ByteVec;
use futures::{StreamExt, TryStreamExt};
use itertools::Itertools;
use regex::bytes::{Regex, RegexBuilder};
Expand Down Expand Up @@ -51,6 +53,12 @@ static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::La
.build()
.unwrap()
});
static DIRECTIVE_LINE_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^\\JJ")
.multi_line(true)
.build()
.unwrap()
});

fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> std::io::Result<()> {
for hunk in hunks {
Expand Down Expand Up @@ -217,6 +225,12 @@ pub fn materialize_merge_result(
single_hunk: Merge<ContentHunk>,
output: &mut dyn Write,
) -> std::io::Result<()> {
let single_hunk: Merge<ContentHunk> =
single_hunk.into_map(|ContentHunk(side)| ContentHunk(encode_unmaterializeable_lines(side)));

// From now on, we assume some invariants guaranteed by the encoding function
// above. See its docstring for details.

let slices = single_hunk.map(|content| content.0.as_slice());
let merge_result = files::merge(&slices);
match merge_result {
Expand Down Expand Up @@ -347,7 +361,11 @@ pub fn parse_merge_result(input: &[u8], num_sides: usize) -> Option<Merge<Conten
}
}

Some(result.into_map(ContentHunk))
Some(
result
.into_map(decode_materialized_side)
.into_map(ContentHunk),
)
}

fn parse_conflict_into_list_of_hunks(
Expand Down Expand Up @@ -540,3 +558,104 @@ pub async fn update_from_content(
};
Ok(new_file_ids)
}

const JJ_NO_NEWLINE_AT_EOF: &[u8] = b"\n\\JJ: No newline at the end of file\n";
const JJ_VERBATIM_LINE: &str = "\\JJ Verbatim Line:";
static JJ_VERBATIM_LINE_REPLACEMENT: once_cell::sync::Lazy<Vec<u8>> =
once_cell::sync::Lazy::new(|| format!("{JJ_VERBATIM_LINE}$0").into_bytes());
static VERBATIM_LINE_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(&format!("^{}", regex::escape(JJ_VERBATIM_LINE)))
.multi_line(true)
.build()
.unwrap()
});

/// Encode a side of a conflict to satisfy some invariants
///
/// - The result will not contain any conflict markers
/// - The result will contain 0 or more newline-terminated lines. In other
/// words, it is either empty or ends in a newline.
///
/// This transformation is reversible and it is hoped that the result is
/// human-readable. See the tests below for examples.
fn encode_unmaterializeable_lines(mut side: Vec<u8>) -> Vec<u8> {
// TODO(ilyagr): It's likely the compiler won't be able to avoid a clone in
// the no-replacements case. If we care about that, we could try one of the
// approaches discussed in https://github.com/rust-lang/regex/issues/676.
// This also applies to the decoding function.
side = DIRECTIVE_LINE_REGEX
.replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice())
.to_vec();
side = CONFLICT_MARKER_REGEX
.replace_all(&Cow::from(side), JJ_VERBATIM_LINE_REPLACEMENT.as_slice())
.to_vec();
if !side.is_empty() && !side.ends_with(b"\n") {
side.push_str(JJ_NO_NEWLINE_AT_EOF);
}
side
}

/// Undo the transformation done by `encode_unmaterializeable_lines`
fn decode_materialized_side(mut side: Vec<u8>) -> Vec<u8> {
if side.ends_with(JJ_NO_NEWLINE_AT_EOF) {
side.truncate(side.len() - JJ_NO_NEWLINE_AT_EOF.len());
}
side = VERBATIM_LINE_REGEX
.replace_all(&Cow::from(side), b"")
.to_vec();
side
}

#[cfg(test)]
mod test {
use bstr::BString;
use indoc::indoc;

use super::{decode_materialized_side, encode_unmaterializeable_lines};

#[test]
fn test_conflict_side_encoding_and_decoding() {
let initial_text: BString = indoc! {br"
<<<<<<<
blahblah"}
.into();

let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into();
insta::assert_snapshot!(encoded_text, @r###"
\JJ Verbatim Line:<<<<<<<
blahblah
\JJ: No newline at the end of file
"###);
assert_eq!(
BString::from(decode_materialized_side(encoded_text.clone().into())),
initial_text
);

let doubly_encoded_text: BString =
encode_unmaterializeable_lines(encoded_text.clone().into()).into();
insta::assert_snapshot!(doubly_encoded_text, @r###"
\JJ Verbatim Line:\JJ Verbatim Line:<<<<<<<
blahblah
\JJ Verbatim Line:\JJ: No newline at the end of file
"###);
assert_eq!(
BString::from(decode_materialized_side(doubly_encoded_text.into())),
encoded_text
);
}

#[test]
fn test_conflict_side_encoding_and_decoding_pathological() {
let initial_text = br"\JJ: No newline at the end of file";

let encoded_text: BString = encode_unmaterializeable_lines(initial_text.to_vec()).into();
insta::assert_snapshot!(encoded_text, @r###"
\JJ Verbatim Line:\JJ: No newline at the end of file
\JJ: No newline at the end of file
"###);
assert_eq!(
BString::from(decode_materialized_side(encoded_text.clone().into())),
BString::from(initial_text)
);
}
}
113 changes: 108 additions & 5 deletions lib/tests/test_conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,96 @@ fn test_materialize_parse_roundtrip() {
"###);
}

#[test]
fn test_materialize_parse_roundtrip_tricky() {
let test_repo = TestRepo::init();
let store = test_repo.repo.store();

let path = RepoPath::from_internal_string("file");
let base_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1
line 2 <<<<<<<
<<<<<<< line 3
line 4
line 5"},
);
let left_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1 left
line 2 left
<<<<<<<< line 3
line 4
line 5 left"},
);
let right_id = testutils::write_file(
store,
path,
indoc! {"
\\JJ Verbatim Line: fake verbatim line
line 1 right
line 2
line 3
line 4 right
line 5 right
"},
);

let conflict = Merge::from_removes_adds(
vec![Some(base_id.clone())],
vec![Some(left_id.clone()), Some(right_id.clone())],
);
let materialized = materialize_conflict_string(store, path, &conflict);
insta::assert_snapshot!(
materialized,
@r###"
\JJ Verbatim Line:\JJ Verbatim Line: fake verbatim line
<<<<<<< Conflict 1 of 1
%%%%%%% Changes from base to side #1
-line 1
-line 2 <<<<<<<
-\JJ Verbatim Line:<<<<<<< line 3
+line 1 left
+line 2 left
+<<<<<<<< line 3
line 4
-line 5
+line 5 left
\JJ: No newline at the end of file
+++++++ Contents of side #2
line 1 right
line 2
line 3
line 4 right
line 5 right
>>>>>>> Conflict 1 of 1 ends
"###
);

// The first add should always be from the left side
insta::assert_debug_snapshot!(
parse_merge_result(materialized.as_bytes(), conflict.num_sides()),
@r###"
Some(
Conflicted(
[
"\\JJ Verbatim Line: fake verbatim line\nline 1 left\nline 2 left\n<<<<<<<< line 3\nline 4\nline 5 left",
"\\JJ Verbatim Line: fake verbatim line\nline 1\nline 2 <<<<<<<\n<<<<<<< line 3\nline 4\nline 5",
"\\JJ Verbatim Line: fake verbatim line\nline 1 right\nline 2\nline 3\nline 4 right\nline 5 right\n",
],
),
)
"###);

// TODO: update_from_conflict?
}

#[test]
fn test_materialize_conflict_no_newlines_at_eof() {
let test_repo = TestRepo::init();
Expand All @@ -344,16 +434,29 @@ fn test_materialize_conflict_no_newlines_at_eof() {
insta::assert_snapshot!(materialized,
@r###"
<<<<<<< Conflict 1 of 1
%%%%%%% Changes from base to side #1
-base+++++++ Contents of side #2
right>>>>>>> Conflict 1 of 1 ends
+++++++ Contents of side #1
%%%%%%% Changes from base to side #2
-base
+right
\JJ: No newline at the end of file
>>>>>>> Conflict 1 of 1 ends
"###
);
// BUG(#3968): These conflict markers cannot be parsed
// These conflict markers can be parsed (issue #3968)
insta::assert_debug_snapshot!(parse_merge_result(
materialized.as_bytes(),
conflict.num_sides()
),@"None");
),@r###"
Some(
Conflicted(
[
"",
"base",
"right",
],
),
)
"###);
}

#[test]
Expand Down

0 comments on commit 41101b3

Please sign in to comment.