Skip to content

Commit

Permalink
Parse filename unambiguously using color escape sequences (#1634)
Browse files Browse the repository at this point in the history
* Simplify handle_grep_line via early return

Rather than indenting the entire body of the function twice, return
early if the conditions aren't met.

* Parse filename unambiguously using color escape sequences

`git grep`, by default, emits ANSI color escape sequences around the
filename, separator, and line number. Parse these if available.

This currently assumes the default colors, and will fall back to the
previous parsing if any of the colors have been changed.

Add tests for filenames that previously failed to parse correctly.
  • Loading branch information
joshtriplett authored Mar 2, 2024
1 parent 037665b commit f7ae06a
Showing 1 changed file with 170 additions and 49 deletions.
219 changes: 170 additions & 49 deletions src/handlers/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,58 +91,64 @@ impl<'a> StateMachine<'a> {
State::Unknown => (None, None, &None, true),
_ => (None, None, &None, false),
};
if !try_parse {
return Ok(false);
}

let mut handled_line = false;
if try_parse {
let line = self.line.clone(); // TODO: avoid clone
// Try parse_raw_grep_line on raw_line, and fall back to parse_grep_line
let raw_line = self.raw_line.clone(); // TODO: avoid clone
let line;
let grep_line = if let Some(grep_line) = parse_raw_grep_line(&raw_line) {
grep_line
} else {
line = self.line.clone(); // TODO: avoid clone
if let Some(grep_line) = parse_grep_line(&line) {
if matches!(grep_line.line_type, LineType::Ignore) {
handled_line = true;
return Ok(handled_line);
}
let first_path = previous_path.is_none();
let new_path = first_path || previous_path.as_deref() != Some(&grep_line.path);
// Emit a '--' section separator when output contains context lines (i.e. *grep option -A, -B, -C is in effect).
let new_section = !new_path
&& (previous_line_type == Some(&LineType::Context)
|| grep_line.line_type == LineType::Context)
&& previous_line < &grep_line.line_number.as_ref().map(|n| n - 1);
self.state = State::Grep(
self.config
.grep_output_type
.clone()
.unwrap_or_else(|| grep_line.grep_type.clone()),
grep_line.line_type,
grep_line.path.to_string(),
grep_line.line_number,
);
if new_section {
self.painter.set_highlighter()
}
if new_path {
if let Some(lang) = handlers::diff_header::get_extension(&grep_line.path)
.or(self.config.default_language.as_deref())
{
self.painter.set_syntax(Some(lang));
self.painter.set_highlighter();
}
}
match &self.state {
State::Grep(GrepType::Ripgrep, _, _, _) => self.emit_ripgrep_format_grep_line(
grep_line,
new_path,
first_path,
new_section,
),
State::Grep(GrepType::Classic, _, _, _) => {
self.emit_classic_format_grep_line(grep_line)
}
_ => delta_unreachable("Impossible state while handling grep line."),
}?;
handled_line = true
grep_line
} else {
return Ok(false);
}
};

if matches!(grep_line.line_type, LineType::Ignore) {
return Ok(true);
}
Ok(handled_line)
let first_path = previous_path.is_none();
let new_path = first_path || previous_path.as_deref() != Some(&grep_line.path);
// Emit a '--' section separator when output contains context lines (i.e. *grep option -A, -B, -C is in effect).
let new_section = !new_path
&& (previous_line_type == Some(&LineType::Context)
|| grep_line.line_type == LineType::Context)
&& previous_line < &grep_line.line_number.as_ref().map(|n| n - 1);
self.state = State::Grep(
self.config
.grep_output_type
.clone()
.unwrap_or_else(|| grep_line.grep_type.clone()),
grep_line.line_type,
grep_line.path.to_string(),
grep_line.line_number,
);
if new_section {
self.painter.set_highlighter()
}
if new_path {
if let Some(lang) = handlers::diff_header::get_extension(&grep_line.path)
.or(self.config.default_language.as_deref())
{
self.painter.set_syntax(Some(lang));
self.painter.set_highlighter();
}
}
match &self.state {
State::Grep(GrepType::Ripgrep, _, _, _) => {
self.emit_ripgrep_format_grep_line(grep_line, new_path, first_path, new_section)
}
State::Grep(GrepType::Classic, _, _, _) => {
self.emit_classic_format_grep_line(grep_line)
}
_ => delta_unreachable("Impossible state while handling grep line."),
}?;
Ok(true)
}

// Emulate ripgrep output: each section of hits from the same path has a header line,
Expand Down Expand Up @@ -467,12 +473,18 @@ fn make_output_config() -> GrepOutputConfig {
}

enum GrepLineRegex {
WithColor,
WithFileExtensionAndLineNumber,
WithFileExtension,
WithFileExtensionNoSpaces,
WithoutSeparatorCharacters,
}

lazy_static! {
static ref GREP_LINE_REGEX_ASSUMING_COLOR: Regex =
make_grep_line_regex(GrepLineRegex::WithColor);
}

lazy_static! {
static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER: Regex =
make_grep_line_regex(GrepLineRegex::WithFileExtensionAndLineNumber);
Expand Down Expand Up @@ -519,6 +531,15 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex {
// Make-7-file-7-xxx

let file_path = match regex_variant {
GrepLineRegex::WithColor => {
r"
\x1b\[35m # starts with ANSI color sequence
( # 1. file name
[^\x1b]* # anything
)
\x1b\[m # ends with ANSI color reset
"
}
GrepLineRegex::WithFileExtensionAndLineNumber | GrepLineRegex::WithFileExtension => {
r"
( # 1. file name (colons not allowed)
Expand Down Expand Up @@ -548,6 +569,51 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex {
};

let separator = match regex_variant {
GrepLineRegex::WithColor => {
r#"
\x1b\[36m # starts with ANSI color sequence for separator
(?:
(
: # 2. match marker
\x1b\[m # ANSI color reset
(?: # optional: line number followed by second match marker
\x1b\[32m # ANSI color sequence for line number
([0-9]+) # 3. line number
\x1b\[m # ANSI color reset
\x1b\[36m # ANSI color sequence for separator
: # second match marker
\x1b\[m # ANSI color reset
)?
)
|
(
- # 4. nomatch marker
\x1b\[m # ANSI color reset
(?: # optional: line number followed by second nomatch marker
\x1b\[32m # ANSI color sequence for line number
([0-9]+) # 5. line number
\x1b\[m # ANSI color reset
\x1b\[36m # ANSI color sequence for separator
- # second nomatch marker
\x1b\[m # ANSI color reset
)?
)
|
(
= # 6. context header marker
\x1b\[m # ANSI color reset
(?: # optional: line number followed by second context header marker
\x1b\[32m # ANSI color sequence for line number
([0-9]+) # 7. line number
\x1b\[m # ANSI color reset
\x1b\[36m # ANSI color sequence for separator
= # second context header marker
\x1b\[m # ANSI color reset
)?
)
)
"#
}
GrepLineRegex::WithFileExtensionAndLineNumber => {
r#"
(?:
Expand Down Expand Up @@ -620,6 +686,23 @@ pub fn parse_grep_line(line: &str) -> Option<GrepLine> {
}
}

pub fn parse_raw_grep_line(raw_line: &str) -> Option<GrepLine> {
// Early exit if we don't have an escape sequence
if !raw_line.starts_with("\x1b") {
return None;
}
if !matches!(
&*process::calling_process(),
process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep
) {
return None;
}
_parse_grep_line(&GREP_LINE_REGEX_ASSUMING_COLOR, raw_line).map(|mut grep_line| {
grep_line.code = ansi::strip_ansi_codes(&grep_line.code).into();
grep_line
})
}

pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b>> {
let caps = regex.captures(line)?;
let file = caps.get(1).unwrap().as_str().into();
Expand Down Expand Up @@ -652,7 +735,9 @@ pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b>

#[cfg(test)]
mod tests {
use crate::handlers::grep::{parse_grep_line, GrepLine, GrepType, LineType};
use crate::handlers::grep::{
parse_grep_line, parse_raw_grep_line, GrepLine, GrepType, LineType,
};
use crate::utils::process::tests::FakeParentArgs;

#[test]
Expand Down Expand Up @@ -865,6 +950,42 @@ mod tests {
);
}

#[test]
fn test_parse_grep_color() {
let fake_parent_grep_command =
"/usr/local/bin/git --doesnt-matter grep --nor-this nor_this -- nor_this";
let _args = FakeParentArgs::for_scope(fake_parent_grep_command);

let e = "\x1B";
assert_eq!(
parse_raw_grep_line(&format!(
r#"{e}[35msrc/zlib-ng/configure{e}[m{e}[36m:{e}[m -a*=* | -{e}[1;32m-arch{e}[ms=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#
)),
Some(GrepLine {
grep_type: GrepType::Classic,
path: "src/zlib-ng/configure".into(),
line_number: None,
line_type: LineType::Match,
code: r#" -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#.into(),
submatches: None,
})
);

assert_eq!(
parse_raw_grep_line(&format!(
r#"{e}[35msrc/zlib-ng/configure{e}[m{e}[36m:{e}[m{e}[32m214{e}[m{e}[36m:{e}[m -a*=* | -{e}[1;32m-arch{e}[ms=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#
)),
Some(GrepLine {
grep_type: GrepType::Classic,
path: "src/zlib-ng/configure".into(),
line_number: Some(214),
line_type: LineType::Match,
code: r#" -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#.into(),
submatches: None,
})
);
}

#[test]
fn test_parse_grep_no_match() {
let fake_parent_grep_command =
Expand Down

0 comments on commit f7ae06a

Please sign in to comment.