diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 0eb5a5ebd5..6848fa6cbd 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -50,8 +50,14 @@ use crate::merge::MergedTreeValue; use crate::repo_path::RepoPath; use crate::store::Store; -/// Length of conflict markers. -pub const CONFLICT_MARKER_LEN: usize = 7; +/// Minimum length of conflict markers. +pub const MIN_CONFLICT_MARKER_LEN: usize = 7; + +/// If a file already contains lines which look like conflict markers of length +/// N, then the conflict markers we add will be of length (N + increment). This +/// number is chosen to make the conflict markers noticeably longer than the +/// existing markers. +pub const CONFLICT_MARKER_LEN_INCREMENT: usize = 4; fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { for hunk in hunks { @@ -259,16 +265,26 @@ impl ConflictMarkerKind { } } +/// Represents a conflict marker parsed from the file. Conflict markers consist +/// of a single ASCII character repeated for a certain length. +struct ConflictMarker { + kind: ConflictMarkerKind, + len: usize, +} + /// Write a conflict marker to an output file. fn write_conflict_marker( output: &mut dyn Write, kind: ConflictMarkerKind, + len: usize, suffix_text: &str, ) -> io::Result<()> { + debug_assert!(len >= MIN_CONFLICT_MARKER_LEN); + // It should be faster to build a string in memory and write it to the output // all at once than to write each byte to the output one at a time let mut line: BString = BString::default(); - for _ in 0..CONFLICT_MARKER_LEN { + for _ in 0..len { line.push_byte(kind.to_byte()); } @@ -281,17 +297,13 @@ fn write_conflict_marker( output.write_all(&line) } -/// Parse a conflict marker from a line of a file. The conflict marker must have -/// the correct length (CONFLICT_MARKER_LEN). -fn parse_conflict_marker(line: &[u8]) -> Option { +/// Parse a conflict marker from a line of a file. The conflict marker may have +/// any length (even less than MIN_CONFLICT_MARKER_LEN). +fn parse_conflict_marker(line: &[u8]) -> Option { let first_byte = *line.first()?; let kind = ConflictMarkerKind::parse_byte(first_byte)?; let len = line.iter().take_while(|&&b| b == first_byte).count(); - if len != CONFLICT_MARKER_LEN { - return None; - } - if let Some(next_byte) = line.get(len) { // If there is a character after the marker, it must be ASCII whitespace if !next_byte.is_ascii_whitespace() { @@ -299,19 +311,74 @@ fn parse_conflict_marker(line: &[u8]) -> Option { } } - Some(kind) + Some(ConflictMarker { kind, len }) +} + +/// Parse a conflict marker, expecting it to have a certain length. Any conflict +/// markers which have a shorter length are ignored. +/// +/// We have to accept longer conflict markers in case conflict resolution +/// results in one of the "fake" conflict marker lines being deleted, since this +/// will cause the minimum length to go down, meaning the existing conflict +/// markers will now appear to be longer than necessary. +/// +/// There is another possible issue, which is that the user could make one of +/// the "fake" conflict markers longer while resolving the conflict (but not +/// long enough that they are recognized by Jujutsu as real conflict markers). +/// This would result in the existing materialized conflict markers no longer +/// being recognized as conflict markers the next time the file is modified, +/// so the file would incorrectly be considered resolved. We may want to handle +/// this case better in the future (e.g. by re-materializing the file when the +/// length of conflict markers changes). +fn parse_conflict_marker_with_len(line: &[u8], expected_len: usize) -> Option { + debug_assert!(expected_len >= MIN_CONFLICT_MARKER_LEN); + + parse_conflict_marker(line) + .filter(|marker| marker.len >= expected_len) + .map(|marker| marker.kind) } +/// Choose an appropriate conflict marker length to use for a file. The conflict +/// marker length will be at least MIN_CONFLICT_MARKER_LENGTH, and is guaranteed +/// to be longer than any existing markers in the file. +pub fn choose_conflict_marker_len>(slices: &Merge) -> usize { + let max_existing_marker_len = slices + .iter() + .map(|slice| slice.as_ref()) + .flat_map(|slice| slice.lines_with_terminator()) + .filter_map(parse_conflict_marker) + .map(|marker| marker.len.saturating_add(1)) + .max() + .unwrap_or_default(); + + if max_existing_marker_len < MIN_CONFLICT_MARKER_LEN { + // Use the normal conflict marker length if it's unambiguous. + MIN_CONFLICT_MARKER_LEN + } else { + // If there are existing markers in the file, we want to make our conflict + // markers longer than them. We add CONFLICT_MARKER_LEN_INCREMENT to ensure + // they're longer by a visually noticeable amount. + max_existing_marker_len.saturating_add(CONFLICT_MARKER_LEN_INCREMENT) + } +} + +// Returns conflict marker length used for materializing conflicts, if there +// were any conflicts to materialize. pub fn materialize_merge_result>( single_hunk: &Merge, conflict_marker_style: ConflictMarkerStyle, output: &mut dyn Write, -) -> io::Result<()> { +) -> io::Result> { let merge_result = files::merge(single_hunk); match &merge_result { - MergeResult::Resolved(content) => output.write_all(content), + MergeResult::Resolved(content) => { + output.write_all(content)?; + Ok(None) + } MergeResult::Conflict(hunks) => { - materialize_conflict_hunks(hunks, conflict_marker_style, output) + let conflict_marker_len = choose_conflict_marker_len(single_hunk); + materialize_conflict_hunks(hunks, conflict_marker_style, conflict_marker_len, output)?; + Ok(Some(conflict_marker_len)) } } } @@ -324,9 +391,15 @@ pub fn materialize_merge_result_to_bytes>( match merge_result { MergeResult::Resolved(content) => content, MergeResult::Conflict(hunks) => { + let conflict_marker_len = choose_conflict_marker_len(single_hunk); let mut output = Vec::new(); - materialize_conflict_hunks(&hunks, conflict_marker_style, &mut output) - .expect("writing to an in-memory buffer should never fail"); + materialize_conflict_hunks( + &hunks, + conflict_marker_style, + conflict_marker_len, + &mut output, + ) + .expect("writing to an in-memory buffer should never fail"); output.into() } } @@ -335,6 +408,7 @@ pub fn materialize_merge_result_to_bytes>( fn materialize_conflict_hunks( hunks: &[Merge], conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { let num_conflicts = hunks @@ -352,13 +426,21 @@ fn materialize_conflict_hunks( match (conflict_marker_style, hunk.as_slice()) { // 2-sided conflicts can use Git-style conflict markers (ConflictMarkerStyle::Git, [left, base, right]) => { - materialize_git_style_conflict(left, base, right, &conflict_info, output)?; + materialize_git_style_conflict( + left, + base, + right, + &conflict_info, + conflict_marker_len, + output, + )?; } _ => { materialize_jj_style_conflict( hunk, &conflict_info, conflict_marker_style, + conflict_marker_len, output, )?; } @@ -373,22 +455,35 @@ fn materialize_git_style_conflict( base: &[u8], right: &[u8], conflict_info: &str, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { write_conflict_marker( output, ConflictMarkerKind::ConflictStart, + conflict_marker_len, &format!("Side #1 ({conflict_info})"), )?; output.write_all(left)?; - write_conflict_marker(output, ConflictMarkerKind::GitAncestor, "Base")?; + write_conflict_marker( + output, + ConflictMarkerKind::GitAncestor, + conflict_marker_len, + "Base", + )?; output.write_all(base)?; // VS Code doesn't seem to support any trailing text on the separator line - write_conflict_marker(output, ConflictMarkerKind::GitSeparator, "")?; + write_conflict_marker( + output, + ConflictMarkerKind::GitSeparator, + conflict_marker_len, + "", + )?; output.write_all(right)?; write_conflict_marker( output, ConflictMarkerKind::ConflictEnd, + conflict_marker_len, &format!("Side #2 ({conflict_info} ends)"), )?; @@ -399,44 +494,49 @@ fn materialize_jj_style_conflict( hunk: &Merge, conflict_info: &str, conflict_marker_style: ConflictMarkerStyle, + conflict_marker_len: usize, output: &mut dyn Write, ) -> io::Result<()> { // Write a positive snapshot (side) of a conflict - fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { + let write_side = |add_index: usize, data: &[u8], output: &mut dyn Write| { write_conflict_marker( output, ConflictMarkerKind::Add, + conflict_marker_len, &format!("Contents of side #{}", add_index + 1), )?; output.write_all(data) - } + }; // Write a negative snapshot (base) of a conflict - fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { + let write_base = |base_str: &str, data: &[u8], output: &mut dyn Write| { write_conflict_marker( output, ConflictMarkerKind::Remove, + conflict_marker_len, &format!("Contents of {base_str}"), )?; output.write_all(data) - } + }; // Write a diff from a negative term to a positive term - fn write_diff( - base_str: &str, - add_index: usize, - diff: &[DiffHunk], - output: &mut dyn Write, - ) -> io::Result<()> { - write_conflict_marker( - output, - ConflictMarkerKind::Diff, - &format!("Changes from {base_str} to side #{}", add_index + 1), - )?; - write_diff_hunks(diff, output) - } + let write_diff = + |base_str: &str, add_index: usize, diff: &[DiffHunk], output: &mut dyn Write| { + write_conflict_marker( + output, + ConflictMarkerKind::Diff, + conflict_marker_len, + &format!("Changes from {base_str} to side #{}", add_index + 1), + )?; + write_diff_hunks(diff, output) + }; - write_conflict_marker(output, ConflictMarkerKind::ConflictStart, conflict_info)?; + write_conflict_marker( + output, + ConflictMarkerKind::ConflictStart, + conflict_marker_len, + conflict_info, + )?; let mut add_index = 0; for (base_index, left) in hunk.removes().enumerate() { // The vast majority of conflicts one actually tries to resolve manually have 1 @@ -489,6 +589,7 @@ fn materialize_jj_style_conflict( write_conflict_marker( output, ConflictMarkerKind::ConflictEnd, + conflict_marker_len, &format!("{conflict_info} ends"), )?; Ok(()) @@ -539,7 +640,11 @@ pub fn materialized_diff_stream<'a>( /// they don't have the expected arity. // TODO: "parse" is not usually the opposite of "materialize", so maybe we // should rename them to "serialize" and "deserialize"? -pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option>> { +pub fn parse_conflict( + input: &[u8], + num_sides: usize, + conflict_marker_len: usize, +) -> Option>> { if input.is_empty() { return None; } @@ -549,7 +654,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { conflict_start = Some(pos); conflict_start_len = line.len(); @@ -557,7 +662,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { if let Some(conflict_start_index) = conflict_start { let conflict_body = &input[conflict_start_index + conflict_start_len..pos]; - let hunk = parse_conflict_hunk(conflict_body); + let hunk = parse_conflict_hunk(conflict_body, conflict_marker_len); if hunk.num_sides() == num_sides { let resolved_slice = &input[resolved_start..conflict_start_index]; if !resolved_slice.is_empty() { @@ -589,27 +694,29 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { +fn parse_conflict_hunk(input: &[u8], conflict_marker_len: usize) -> Merge { // If the hunk starts with a conflict marker, find its first character let initial_conflict_marker = input .lines_with_terminator() .next() - .and_then(parse_conflict_marker); + .and_then(|line| parse_conflict_marker_with_len(line, conflict_marker_len)); match initial_conflict_marker { // JJ-style conflicts must start with one of these 3 conflict marker lines Some(ConflictMarkerKind::Add | ConflictMarkerKind::Remove | ConflictMarkerKind::Diff) => { - parse_jj_style_conflict_hunk(input) + parse_jj_style_conflict_hunk(input, conflict_marker_len) } // Git-style conflicts either must not start with a conflict marker line, or must start with // the "|||||||" conflict marker line (if the first side was empty) - None | Some(ConflictMarkerKind::GitAncestor) => parse_git_style_conflict_hunk(input), + None | Some(ConflictMarkerKind::GitAncestor) => { + parse_git_style_conflict_hunk(input, conflict_marker_len) + } // No other conflict markers are allowed at the start of a hunk Some(_) => Merge::resolved(BString::new(vec![])), } } -fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_jj_style_conflict_hunk(input: &[u8], conflict_marker_len: usize) -> Merge { enum State { Diff, Minus, @@ -620,7 +727,7 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { let mut removes = vec![]; let mut adds = vec![]; for line in input.lines_with_terminator() { - match parse_conflict_marker(line) { + match parse_conflict_marker_with_len(line, conflict_marker_len) { Some(ConflictMarkerKind::Diff) => { state = State::Diff; removes.push(BString::new(vec![])); @@ -680,7 +787,7 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { } } -fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { +fn parse_git_style_conflict_hunk(input: &[u8], conflict_marker_len: usize) -> Merge { #[derive(PartialEq, Eq)] enum State { Left, @@ -692,7 +799,7 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { let mut base = BString::new(vec![]); let mut right = BString::new(vec![]); for line in input.lines_with_terminator() { - match parse_conflict_marker(line) { + match parse_conflict_marker_with_len(line, conflict_marker_len) { Some(ConflictMarkerKind::GitAncestor) => { if state == State::Left { state = State::Base; @@ -748,7 +855,8 @@ pub async fn update_from_content( // copy. let mut old_content = Vec::with_capacity(content.len()); let merge_hunk = extract_as_single_hunk(simplified_file_ids, store, path).await?; - materialize_merge_result(&merge_hunk, conflict_marker_style, &mut old_content).unwrap(); + let conflict_marker_len = + materialize_merge_result(&merge_hunk, conflict_marker_style, &mut old_content).unwrap(); if content == old_content { return Ok(file_ids.clone()); } @@ -758,11 +866,28 @@ pub async fn update_from_content( // the arity of the unsimplified conflicts since such a conflict may be // present in the working copy if written by an earlier version of jj. let (used_file_ids, hunks) = 'hunks: { - if let Some(hunks) = parse_conflict(content, simplified_file_ids.num_sides()) { - break 'hunks (simplified_file_ids, hunks); - }; + // If the simplified conflict was already resolved, we won't know the conflict + // marker length to use, since it was never calculated. However in that case, + // the file should already not contain any conflicts since num_sides() is 1. + if let Some(conflict_marker_len) = conflict_marker_len { + if let Some(hunks) = parse_conflict( + content, + simplified_file_ids.num_sides(), + conflict_marker_len, + ) { + break 'hunks (simplified_file_ids, hunks); + }; + } + // This case is only possible if the working copy was materialized in an earlier + // version of jj. All conflict markers were of length 7 in that version, so it's + // safe to use MIN_CONFLICT_MARKER_LEN for parsing if the actual conflict marker + // length is unknown. if simplified_file_ids.num_sides() != file_ids.num_sides() { - if let Some(hunks) = parse_conflict(content, file_ids.num_sides()) { + if let Some(hunks) = parse_conflict( + content, + file_ids.num_sides(), + conflict_marker_len.unwrap_or(MIN_CONFLICT_MARKER_LEN), + ) { break 'hunks (file_ids, hunks); }; }; diff --git a/lib/tests/test_conflicts.rs b/lib/tests/test_conflicts.rs index e3c4a00f6f..4cd436e19b 100644 --- a/lib/tests/test_conflicts.rs +++ b/lib/tests/test_conflicts.rs @@ -19,6 +19,7 @@ use jj_lib::conflicts::materialize_merge_result_to_bytes; use jj_lib::conflicts::parse_conflict; use jj_lib::conflicts::update_from_content; use jj_lib::conflicts::ConflictMarkerStyle; +use jj_lib::conflicts::MIN_CONFLICT_MARKER_LEN; use jj_lib::merge::Merge; use jj_lib::repo::Repo; use jj_lib::repo_path::RepoPath; @@ -505,7 +506,7 @@ fn test_materialize_parse_roundtrip() { // The first add should always be from the left side insta::assert_debug_snapshot!( - parse_conflict(materialized.as_bytes(), conflict.num_sides()), + parse_conflict(materialized.as_bytes(), conflict.num_sides(), MIN_CONFLICT_MARKER_LEN), @r###" Some( [ @@ -628,7 +629,8 @@ fn test_materialize_conflict_no_newlines_at_eof() { // BUG(#3968): These conflict markers cannot be parsed insta::assert_debug_snapshot!(parse_conflict( materialized.as_bytes(), - conflict.num_sides() + conflict.num_sides(), + MIN_CONFLICT_MARKER_LEN ),@"None"); } @@ -795,7 +797,8 @@ line 3 line 4 line 5 "}, - 2 + 2, + 7 ), None ); @@ -817,7 +820,8 @@ fn test_parse_conflict_simple() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -853,7 +857,8 @@ fn test_parse_conflict_simple() { >>>>>>> More and more text line 5 "}, - 2 + 2, + 7 ), @r###" Some( @@ -892,7 +897,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -931,7 +937,8 @@ fn test_parse_conflict_simple() { >>>>>>> Random text line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -969,7 +976,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1005,7 +1013,8 @@ fn test_parse_conflict_simple() { >>>>>>> End line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1027,8 +1036,7 @@ fn test_parse_conflict_simple() { ) "# ); - // The conflict markers are too long and shouldn't parse (though we may - // decide to change this in the future) + // Conflict markers are allowed to be longer than necessary insta::assert_debug_snapshot!( parse_conflict(indoc! {b" line 1 @@ -1043,9 +1051,28 @@ fn test_parse_conflict_simple() { >>>>>>>>>>> line 5 "}, - 2 + 2, + 7 ), - @"None" + @r#" + Some( + [ + Resolved( + "line 1\n", + ), + Conflicted( + [ + "line 2\nleft\nline 4\n", + "line 2\nline 3\nline 4\n", + "right\n", + ], + ), + Resolved( + "line 5\n", + ), + ], + ) + "# ); } @@ -1071,7 +1098,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1114,7 +1142,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r###" Some( @@ -1160,7 +1189,8 @@ fn test_parse_conflict_multi_way() { >>>>>>> Random text line 5 "}, - 3 + 3, + 7 ), @r#" Some( @@ -1203,7 +1233,8 @@ fn test_parse_conflict_crlf_markers() { >>>>>>>\r line 5\r "}, - 2 + 2, + 7 ), @r#" Some( @@ -1248,7 +1279,8 @@ fn test_parse_conflict_diff_stripped_whitespace() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1290,7 +1322,8 @@ fn test_parse_conflict_wrong_arity() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1311,7 +1344,8 @@ fn test_parse_conflict_malformed_missing_removes() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1334,7 +1368,8 @@ fn test_parse_conflict_malformed_marker() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1358,7 +1393,8 @@ fn test_parse_conflict_malformed_diff() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1380,7 +1416,8 @@ fn test_parse_conflict_snapshot_missing_header() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1400,7 +1437,8 @@ fn test_parse_conflict_wrong_git_style() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1422,7 +1460,8 @@ fn test_parse_conflict_git_reordered_headers() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1448,7 +1487,8 @@ fn test_parse_conflict_git_too_many_sides() { >>>>>>> line 5 "}, - 3 + 3, + 7 ), None ); @@ -1471,7 +1511,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1489,7 +1530,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> line 5 "}, - 2 + 2, + 7 ), None ); @@ -1506,7 +1548,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Conflict 1 of 1 ends line 5 "}, - 2 + 2, + 7 ), @r#" Some( @@ -1541,7 +1584,8 @@ fn test_parse_conflict_mixed_header_styles() { >>>>>>> Side #2 (Conflict 1 of 1 ends) line 5 "}, - 2 + 2, + 7 ), @r#" Some(