From 0166aa95ec65557cf05c2e7adf86a18ecf762005 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Sun, 24 Nov 2024 15:20:18 -0600 Subject: [PATCH] conflicts: refactor conflict marker writing and parsing These changes make the code a bit more readable, and they will make it easier to have conflict markers of different lengths in the next commit. --- lib/src/conflicts.rs | 279 ++++++++++++++++++++++++++----------------- 1 file changed, 167 insertions(+), 112 deletions(-) diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 32997363c1..75c38dc75c 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -17,6 +17,7 @@ use std::io; use std::io::Read; use std::io::Write; +use std::iter; use std::iter::zip; use bstr::BString; @@ -28,8 +29,6 @@ use futures::StreamExt; use futures::TryStreamExt; use itertools::Itertools; use pollster::FutureExt; -use regex::bytes::Regex; -use regex::bytes::RegexBuilder; use crate::backend::BackendError; use crate::backend::BackendResult; @@ -51,49 +50,25 @@ use crate::merge::MergedTreeValue; use crate::repo_path::RepoPath; use crate::store::Store; -const CONFLICT_START_LINE: &str = "<<<<<<<"; -const CONFLICT_END_LINE: &str = ">>>>>>>"; -const CONFLICT_DIFF_LINE: &str = "%%%%%%%"; -const CONFLICT_MINUS_LINE: &str = "-------"; -const CONFLICT_PLUS_LINE: &str = "+++++++"; -const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||"; -const CONFLICT_GIT_SEPARATOR_LINE: &str = "======="; -const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0]; -const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0]; -const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0]; -const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0]; -const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0]; -const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0]; -const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0]; - -/// A conflict marker is one of the separators, optionally followed by a space -/// and some text. -// TODO: All the `{7}` could be replaced with `{7,}` to allow longer -// separators. This could be useful to make it possible to allow conflict -// markers inside the text of the conflicts. -static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$") - .multi_line(true) - .build() - .unwrap() -}); +/// Length of conflict markers. +pub const CONFLICT_MARKER_LEN: usize = 7; fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { for hunk in hunks { match hunk.kind { DiffHunkKind::Matching => { debug_assert!(hunk.contents.iter().all_equal()); - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b" ")?; file.write_all(line)?; } } DiffHunkKind::Different => { - for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[0].lines_with_terminator() { file.write_all(b"-")?; file.write_all(line)?; } - for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') { + for line in hunk.contents[1].lines_with_terminator() { file.write_all(b"+")?; file.write_all(line)?; } @@ -250,6 +225,78 @@ pub enum ConflictMarkerStyle { Git, } +/// Represents a kind of conflict marker which can be materialized and parsed. +#[derive(Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +enum ConflictMarkerKind { + ConflictStart = b'<', + ConflictEnd = b'>', + Add = b'+', + Remove = b'-', + Diff = b'%', + GitAncestor = b'|', + GitSeparator = b'=', +} + +impl ConflictMarkerKind { + /// Get the ASCII byte used for this conflict marker. + fn to_byte(self) -> u8 { + self as u8 + } + + /// Parse a byte to see if it corresponds with any kind of conflict marker. + fn parse_byte(byte: u8) -> Option { + match byte { + b'<' => Some(Self::ConflictStart), + b'>' => Some(Self::ConflictEnd), + b'+' => Some(Self::Add), + b'-' => Some(Self::Remove), + b'%' => Some(Self::Diff), + b'|' => Some(Self::GitAncestor), + b'=' => Some(Self::GitSeparator), + _ => None, + } + } +} + +/// Write a conflict marker to an output file. +fn write_conflict_marker( + output: &mut dyn Write, + kind: ConflictMarkerKind, + suffix_text: &str, +) -> io::Result<()> { + let conflict_marker: BString = iter::repeat(kind.to_byte()) + .take(CONFLICT_MARKER_LEN) + .collect(); + + if suffix_text.is_empty() { + writeln!(output, "{conflict_marker}") + } else { + writeln!(output, "{conflict_marker} {suffix_text}") + } +} + +/// Parse a conflict marker from a line of a file. The conflict marker must have +/// the correct length (CONFLICT_MARKER_LEN). +fn parse_conflict_marker(line: &[u8]) -> Option { + let first_byte = *line.first()?; + let kind = ConflictMarkerKind::parse_byte(first_byte)?; + let len = line.iter().take_while(|&&b| b == first_byte).count(); + + if len != CONFLICT_MARKER_LEN { + return None; + } + + if let Some(next_byte) = line.get(len) { + // If there is a character after the marker, it must be ASCII whitespace + if !next_byte.is_ascii_whitespace() { + return None; + } + } + + Some(kind) +} + pub fn materialize_merge_result>( single_hunk: &Merge, conflict_marker_style: ConflictMarkerStyle, @@ -323,14 +370,22 @@ fn materialize_git_style_conflict( conflict_info: &str, output: &mut dyn Write, ) -> io::Result<()> { - writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?; + write_conflict_marker( + output, + ConflictMarkerKind::ConflictStart, + &format!("Side #1 ({conflict_info})"), + )?; output.write_all(left)?; - writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?; + write_conflict_marker(output, ConflictMarkerKind::GitAncestor, "Base")?; output.write_all(base)?; // VS Code doesn't seem to support any trailing text on the separator line - writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?; + write_conflict_marker(output, ConflictMarkerKind::GitSeparator, "")?; output.write_all(right)?; - writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?; + write_conflict_marker( + output, + ConflictMarkerKind::ConflictEnd, + &format!("Side #2 ({conflict_info} ends)"), + )?; Ok(()) } @@ -343,17 +398,21 @@ fn materialize_jj_style_conflict( ) -> io::Result<()> { // Write a positive snapshot (side) of a conflict fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!( + write_conflict_marker( output, - "{CONFLICT_PLUS_LINE} Contents of side #{}", - add_index + 1 + ConflictMarkerKind::Add, + &format!("Contents of side #{}", add_index + 1), )?; output.write_all(data) } // Write a negative snapshot (base) of a conflict fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { - writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?; + write_conflict_marker( + output, + ConflictMarkerKind::Remove, + &format!("Contents of {base_str}"), + )?; output.write_all(data) } @@ -364,15 +423,15 @@ fn materialize_jj_style_conflict( diff: &[DiffHunk], output: &mut dyn Write, ) -> io::Result<()> { - writeln!( + write_conflict_marker( output, - "{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}", - add_index + 1 + ConflictMarkerKind::Diff, + &format!("Changes from {base_str} to side #{}", add_index + 1), )?; write_diff_hunks(diff, output) } - writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?; + write_conflict_marker(output, ConflictMarkerKind::ConflictStart, conflict_info)?; let mut add_index = 0; for (base_index, left) in hunk.removes().enumerate() { // The vast majority of conflicts one actually tries to resolve manually have 1 @@ -422,7 +481,11 @@ fn materialize_jj_style_conflict( for (add_index, slice) in hunk.adds().enumerate().skip(add_index) { write_side(add_index, slice, output)?; } - writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?; + write_conflict_marker( + output, + ConflictMarkerKind::ConflictEnd, + &format!("{conflict_info} ends"), + )?; Ok(()) } @@ -480,24 +543,28 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option { conflict_start = Some(pos); conflict_start_len = line.len(); - } else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR { - let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos]; - let hunk = parse_conflict_hunk(conflict_body); - if hunk.num_sides() == num_sides { - let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; - if !resolved_slice.is_empty() { - hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + Some(ConflictMarkerKind::ConflictEnd) => { + if let Some(conflict_start_index) = conflict_start { + let conflict_body = &input[conflict_start_index + conflict_start_len..pos]; + let hunk = parse_conflict_hunk(conflict_body); + if hunk.num_sides() == num_sides { + let resolved_slice = &input[resolved_start..conflict_start_index]; + if !resolved_slice.is_empty() { + hunks.push(Merge::resolved(BString::from(resolved_slice))); + } + hunks.push(hunk); + resolved_start = pos + line.len(); } - hunks.push(hunk); - resolved_start = pos + line.len(); + conflict_start = None; } - conflict_start = None; } + _ => {} } pos += line.len(); } @@ -519,20 +586,19 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option Merge { // If the hunk starts with a conflict marker, find its first character - let initial_conflict_marker_char = input + let initial_conflict_marker = input .lines_with_terminator() .next() - .filter(|line| is_conflict_marker_line(line)) - .map(|line| line[0]); + .and_then(parse_conflict_marker); - match initial_conflict_marker_char { + match initial_conflict_marker { // JJ-style conflicts must start with one of these 3 conflict marker lines - Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => { + Some(ConflictMarkerKind::Diff | ConflictMarkerKind::Remove | ConflictMarkerKind::Add) => { parse_jj_style_conflict_hunk(input) } // Git-style conflicts either must not start with a conflict marker line, or must start with // the "|||||||" conflict marker line (if the first side was empty) - None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input), + None | Some(ConflictMarkerKind::GitAncestor) => parse_git_style_conflict_hunk(input), // No other conflict markers are allowed at the start of a hunk Some(_) => Merge::resolved(BString::new(vec![])), } @@ -541,34 +607,32 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge { fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { enum State { Diff, - Minus, - Plus, + Remove, + Add, Unknown, } let mut state = State::Unknown; let mut removes = vec![]; let mut adds = vec![]; for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_DIFF_LINE_CHAR => { - state = State::Diff; - removes.push(BString::new(vec![])); - adds.push(BString::new(vec![])); - continue; - } - CONFLICT_MINUS_LINE_CHAR => { - state = State::Minus; - removes.push(BString::new(vec![])); - continue; - } - CONFLICT_PLUS_LINE_CHAR => { - state = State::Plus; - adds.push(BString::new(vec![])); - continue; - } - _ => {} + match parse_conflict_marker(line) { + Some(ConflictMarkerKind::Diff) => { + state = State::Diff; + removes.push(BString::new(vec![])); + adds.push(BString::new(vec![])); + continue; } + Some(ConflictMarkerKind::Remove) => { + state = State::Remove; + removes.push(BString::new(vec![])); + continue; + } + Some(ConflictMarkerKind::Add) => { + state = State::Add; + adds.push(BString::new(vec![])); + continue; + } + _ => {} } match state { State::Diff => { @@ -590,10 +654,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge { return Merge::resolved(BString::new(vec![])); } } - State::Minus => { + State::Remove => { removes.last_mut().unwrap().extend_from_slice(line); } - State::Plus => { + State::Add => { adds.last_mut().unwrap().extend_from_slice(line); } State::Unknown => { @@ -623,28 +687,26 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { let mut base = BString::new(vec![]); let mut right = BString::new(vec![]); for line in input.lines_with_terminator() { - if is_conflict_marker_line(line) { - match line[0] { - CONFLICT_GIT_ANCESTOR_LINE_CHAR => { - if state == State::Left { - state = State::Base; - continue; - } else { - // Base must come after left - return Merge::resolved(BString::new(vec![])); - } + match parse_conflict_marker(line) { + Some(ConflictMarkerKind::GitAncestor) => { + if state == State::Left { + state = State::Base; + continue; + } else { + // Base must come after left + return Merge::resolved(BString::new(vec![])); } - CONFLICT_GIT_SEPARATOR_LINE_CHAR => { - if state == State::Base { - state = State::Right; - continue; - } else { - // Right must come after base - return Merge::resolved(BString::new(vec![])); - } + } + Some(ConflictMarkerKind::GitSeparator) => { + if state == State::Base { + state = State::Right; + continue; + } else { + // Right must come after base + return Merge::resolved(BString::new(vec![])); } - _ => {} } + _ => {} } match state { State::Left => left.extend_from_slice(line), @@ -661,13 +723,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge { } } -/// Check whether a line is a conflict marker. Removes trailing whitespace -/// before checking against regex to ensure it parses CRLF endings correctly. -fn is_conflict_marker_line(line: &[u8]) -> bool { - let line = line.trim_end_with(|ch| ch.is_ascii_whitespace()); - CONFLICT_MARKER_REGEX.is_match_at(line, 0) -} - /// Parses conflict markers in `content` and returns an updated version of /// `file_ids` with the new contents. If no (valid) conflict markers remain, a /// single resolves `FileId` will be returned.