From 60d13801a1ec5e950eb2de86185d824bdf82ed0e Mon Sep 17 00:00:00 2001 From: Dan Davison Date: Sun, 14 Nov 2021 14:15:51 -0500 Subject: [PATCH] Handle grep output - Handle standard filepath:code and filepath:line_number:code output as produced by `git grep`, `rg -H`, `grep -H`, etc (with -n for line numbers). - Retain the match highlighting as produced by the grep tool, and expose it in delta's color output styled with grep-match-style. (Note that --color=always is needed to retain the color if piping into delta, but not for `git grep` when delta is configured as git's pager) - Special handling of -p, and -W options of `git grep`: these display the function context in which the matches occur. - `navigate` keybindings jump between match function contexts under `git grep -p` and between matching lines under `git grep -W`. Thanks @zachriggle for the proposal. Fixes #769 --- Cargo.lock | 9 +- Cargo.toml | 2 + README.md | 12 + src/cli.rs | 34 ++ src/config.rs | 38 ++ src/delta.rs | 10 +- src/handlers/file_meta.rs | 2 +- src/handlers/grep.rs | 709 +++++++++++++++++++++++++++++++++++ src/handlers/hunk_header.rs | 2 +- src/handlers/mod.rs | 2 + src/handlers/ripgrep_json.rs | 205 ++++++++++ src/options/set.rs | 6 + src/paint.rs | 8 + 13 files changed, 1034 insertions(+), 5 deletions(-) create mode 100644 src/handlers/grep.rs create mode 100644 src/handlers/ripgrep_json.rs diff --git a/Cargo.lock b/Cargo.lock index 0c081ac2b..af34912cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -356,6 +356,8 @@ dependencies = [ "lazy_static", "pathdiff", "regex", + "serde", + "serde_json", "shell-words", "smol_str", "structopt", @@ -806,6 +808,9 @@ name = "serde" version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" +dependencies = [ + "serde_derive", +] [[package]] name = "serde_derive" @@ -820,9 +825,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.61" +version = "1.0.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fceb2595057b6891a4ee808f70054bd2d12f0e97f1cbb78689b59f676df325a" +checksum = "e277c495ac6cd1a01a58d0a0c574568b4d1ddf14f59965c6a58b8d96400b54f3" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index e7181ddcb..5aeeb8eaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ itertools = "0.10.1" lazy_static = "1.4" pathdiff = "0.2.1" regex = "1.4.6" +serde = { version = "1.0.118", features = ["derive"] } +serde_json = "1.0.70" shell-words = "1.0.0" smol_str = "0.1.18" structopt = "0.3.25" diff --git a/README.md b/README.md index 8cc6b0a09..8a2aa1f41 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ show = delta log = delta blame = delta + grep = delta reflog = delta [interactive] @@ -64,6 +65,7 @@ Code evolves, and we all spend time studying diffs. Delta aims to make this both - [Choosing colors (styles)](#choosing-colors-styles) - [Line numbers](#line-numbers) - [Side-by-side view](#side-by-side-view) + - [Grep](#grep) - ["Features": named groups of settings](#features-named-groups-of-settings) - [Custom themes](#custom-themes) - [diff-highlight and diff-so-fancy emulation](#diff-highlight-and-diff-so-fancy-emulation) @@ -151,6 +153,7 @@ Here's what `git show` can look like with git configured to use delta: - Git style strings (foreground color, background color, font attributes) are supported for >20 stylable elements - Side-by-side view with line-wrapping - Line numbering +- Handles grep output with file paths from `rg`, `git grep`, `grep`, etc - `diff-highlight` and `diff-so-fancy` emulation modes - Stylable box/line decorations to draw attention to commit, file and hunk header sections. - Support for Git's `--color-moved` feature. @@ -410,6 +413,15 @@ In contrast, the long replacement line in the right panel overflows by almost an For control over the details of line wrapping, see `--wrap-max-lines`, `--wrap-left-symbol`, `--wrap-right-symbol`, `--wrap-right-percent`, `--wrap-right-prefix-symbol`, `--inline-hint-style`. Line wrapping was implemented by @th1000s. +### Grep + +Delta applies syntax-highlighting and other enhancements to standard grep output such as from `git grep`, [ripgrep](https://github.com/BurntSushi/ripgrep/) (aka `rg`), grep, etc. +To use with `git grep`, set delta as the pager for `grep` in the `[pager]` section of your gitconfig. See the example at the [top of the page](#get-started). +Output from other grep tools can be piped to delta: e.g. `rg -Hn --color=always`, `grep -Hn --color=always`, etc. +To customize the colors and syntax highlighting, see `grep-match-line-style`, `grep-match-word-style`, `grep-contexct-line-style`, `grep-file-style`, `grep-line-number-style`. +Ripgrep's `rg --json` output format is supported; this avoids certain file name parsing ambiguities that are inevitable with the standard grep output formats. +Note that `git grep` can display the "function context" for matches and that delta handles this output specially: see the `-p` and `-W` options of `git grep`. + ### "Features": named groups of settings All delta options can go under the `[delta]` section in your git config file. However, you can also use named "features" to keep things organized: these are sections in git config like `[delta "my-feature"]`. Here's an example using two custom features: diff --git a/src/cli.rs b/src/cli.rs index aa40ce1ed..1d0c7640c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -441,6 +441,40 @@ pub struct Opt { )] pub blame_timestamp_format: String, + #[structopt(long = "grep-match-line-style")] + /// Style (foreground, background, attributes) for matching lines of code in + /// grep output. See STYLES section. Defaults to plus-style. + pub grep_match_line_style: Option, + + #[structopt(long = "grep-match-word-style")] + /// Style (foreground, background, attributes) for the specific matching + /// substrings within a matching line of code in grep output. See STYLES + /// section. Defaults to plus-style. + pub grep_match_word_style: Option, + + #[structopt(long = "grep-context-line-style")] + /// Style (foreground, background, attributes) for non-matching lines of + /// code in grep output. See STYLES section. Defaults to zero-style. + pub grep_context_line_style: Option, + + #[structopt(long = "grep-file-style")] + /// Style (foreground, background, attributes) for file paths in grep + /// output. See STYLES section. Defaults to hunk-header-file-path-style. + pub grep_file_style: Option, + + #[structopt(long = "grep-line-number-style")] + /// Style (foreground, background, attributes) for line numbers in grep + /// output. See STYLES section. Defaults to hunk-header-line-number-style. + pub grep_line_number_style: Option, + + #[structopt(long = "grep-separator-symbol", default_value = ":")] + /// Symbol used in grep output to separate file path (and line number) from + /// the line of file contents. Defaults to ":" for both match and context + /// lines, since many terminal emulators recognize constructs like + /// "/path/to/file:7:". However, standard grep output uses "-" for context + /// lines: set this option to "keep" to keep the original separator symbols. + pub grep_separator_symbol: String, + /// Default language used for syntax highlighting when this cannot be /// inferred from a filename. It will typically make sense to set this in /// per-repository git config (.git/config) diff --git a/src/config.rs b/src/config.rs index 4ed7f27ad..2b476f9ee 100644 --- a/src/config.rs +++ b/src/config.rs @@ -79,6 +79,12 @@ pub struct Config { pub git_config: Option, pub git_minus_style: Style, pub git_plus_style: Style, + pub grep_context_line_style: Style, + pub grep_file_style: Style, + pub grep_line_number_style: Style, + pub grep_match_line_style: Style, + pub grep_match_word_style: Style, + pub grep_separator_symbol: String, pub hunk_header_file_style: Style, pub hunk_header_line_number_style: Style, pub hunk_header_style_include_file_path: bool, @@ -217,6 +223,32 @@ impl From for Config { _ => *style::GIT_DEFAULT_PLUS_STYLE, }; + let grep_match_line_style = if let Some(s) = opt.grep_match_line_style { + Style::from_str(&s, None, None, opt.computed.true_color, false) + } else { + zero_style + }; + let grep_match_word_style = if let Some(s) = opt.grep_match_word_style { + Style::from_str(&s, None, None, opt.computed.true_color, false) + } else { + plus_emph_style + }; + let grep_context_line_style = if let Some(s) = opt.grep_context_line_style { + Style::from_str(&s, None, None, opt.computed.true_color, false) + } else { + zero_style + }; + let grep_file_style = if let Some(s) = opt.grep_file_style { + Style::from_str(&s, None, None, opt.computed.true_color, false) + } else { + hunk_header_file_style + }; + let grep_line_number_style = if let Some(s) = opt.grep_line_number_style { + Style::from_str(&s, None, None, opt.computed.true_color, false) + } else { + hunk_header_line_number_style + }; + let blame_palette = make_blame_palette(opt.blame_palette, opt.computed.is_light_mode); let file_added_label = opt.file_added_label; @@ -285,6 +317,12 @@ impl From for Config { file_style, git_config: opt.git_config, git_config_entries: opt.git_config_entries, + grep_context_line_style, + grep_file_style, + grep_line_number_style, + grep_match_line_style, + grep_match_word_style, + grep_separator_symbol: opt.grep_separator_symbol, hunk_header_file_style, hunk_header_line_number_style, hunk_header_style, diff --git a/src/delta.rs b/src/delta.rs index fbf45c53d..5f4391391 100644 --- a/src/delta.rs +++ b/src/delta.rs @@ -23,6 +23,7 @@ pub enum State { SubmoduleLog, // In a submodule section, with gitconfig diff.submodule = log SubmoduleShort(String), // In a submodule section, with gitconfig diff.submodule = short Blame(String, Option), // In a line of `git blame` output (commit, repeat_blame_line). + Grep(String, Option), // In a line of `git grep` output (file, repeat_grep_line). Unknown, // The following elements are created when a line is wrapped to display it: HunkZeroWrapped, // Wrapped unchanged line @@ -121,6 +122,7 @@ impl<'a> StateMachine<'a> { || self.handle_submodule_short_line()? || self.handle_hunk_line()? || self.handle_blame_line()? + || self.handle_grep_line()? || self.should_skip_line() || self.emit_line_unchanged()?; } @@ -133,7 +135,13 @@ impl<'a> StateMachine<'a> { fn ingest_line(&mut self, raw_line_bytes: &[u8]) { // TODO: retain raw_line as Cow self.raw_line = String::from_utf8_lossy(raw_line_bytes).to_string(); - if self.config.max_line_length > 0 && self.raw_line.len() > self.config.max_line_length { + if self.config.max_line_length > 0 + && self.raw_line.len() > self.config.max_line_length + // We must not truncate ripgrep --json output + // TODO: An alternative might be to truncate `line` but retain + // `raw_line` untruncated? + && !self.raw_line.starts_with('{') + { self.raw_line = ansi::truncate_str( &self.raw_line, self.config.max_line_length, diff --git a/src/handlers/file_meta.rs b/src/handlers/file_meta.rs index 8a1ea0d3e..bea05625b 100644 --- a/src/handlers/file_meta.rs +++ b/src/handlers/file_meta.rs @@ -210,7 +210,7 @@ fn get_file_extension_from_file_meta_line_file_path(path: &str) -> Option<&str> } /// Attempt to parse input as a file path and return extension as a &str. -fn get_extension(s: &str) -> Option<&str> { +pub fn get_extension(s: &str) -> Option<&str> { let path = Path::new(s); path.extension() .and_then(|e| e.to_str()) diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs new file mode 100644 index 000000000..953ab8941 --- /dev/null +++ b/src/handlers/grep.rs @@ -0,0 +1,709 @@ +// TODO +// Bad parsing: "etc/examples/119-within-line-edits:4:repo=$(mktemp -d)" +// Parsing "Makefile" +// Inspect process tree once +use std::borrow::Cow; + +use lazy_static::lazy_static; +use regex::Regex; +use serde::Deserialize; +use unicode_segmentation::UnicodeSegmentation; + +use crate::ansi; +use crate::delta::{State, StateMachine}; +use crate::handlers::{self, ripgrep_json}; +use crate::paint::{self, BgShouldFill, StyleSectionSpecifier}; +use crate::style::Style; +use crate::utils; + +#[derive(Debug, PartialEq)] +pub struct GrepLine<'b> { + pub path: Cow<'b, str>, + pub line_number: Option, + pub line_type: LineType, + pub code: Cow<'b, str>, + pub submatches: Option>, +} + +#[derive(Clone, Copy, Debug, PartialEq, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum LineType { + ContextHeader, + Context, + Match, + Ignore, +} + +struct GrepOutputConfig { + add_navigate_marker_to_matches: bool, + render_context_header_as_hunk_header: bool, + pad_line_number: bool, +} + +impl<'a> StateMachine<'a> { + /// If this is a line of git grep output then render it accordingly. If this + /// is the first grep line, then set the syntax-highlighter language. + pub fn handle_grep_line(&mut self) -> std::io::Result { + self.painter.emit()?; + let mut handled_line = false; + + // TODO: It should be possible to eliminate some of the .clone()s and + // .to_owned()s. + let (_previous_file, repeat_grep_line, try_parse) = match &self.state { + State::Grep(file, repeat_grep_line) => { + (Some(file.as_str()), repeat_grep_line.clone(), true) + } + State::Unknown => (None, None, true), + _ => (None, None, false), + }; + if try_parse { + if let Some(mut grep_line) = parse_grep_line(&self.line) { + if matches!(grep_line.line_type, LineType::Ignore) { + handled_line = true; + return Ok(handled_line); + } + + let output_config = make_output_config(); + + // Emit syntax-highlighted code + // TODO: Determine the language less frequently, e.g. only when the file changes. + if let Some(lang) = handlers::file_meta::get_extension(&grep_line.path) + .or_else(|| self.config.default_language.as_deref()) + { + self.painter.set_syntax(Some(lang)); + self.painter.set_highlighter(); + } + self.state = State::Grep(grep_line.path.to_string(), repeat_grep_line); + + match ( + &grep_line.line_type, + output_config.render_context_header_as_hunk_header, + ) { + // Emit context header line + (LineType::ContextHeader, true) => handlers::hunk_header::write_hunk_header( + &grep_line.code, + &[(grep_line.line_number.unwrap_or(0), 0)], + &mut self.painter, + &self.line, + &grep_line.path, + self.config, + )?, + _ => { + if self.config.navigate { + write!( + self.painter.writer, + "{}", + match ( + &grep_line.line_type, + output_config.add_navigate_marker_to_matches + ) { + (LineType::Match, true) => "• ", + (_, true) => " ", + _ => "", + } + )? + } + // Emit file & line-number + let separator = if self.config.grep_separator_symbol == "keep" { + // grep, rg, and git grep use ":" for matching lines + // and "-" for non-matching lines (and `git grep -W` + // uses "=" for a context header line). + match grep_line.line_type { + LineType::Match => ":", + LineType::Context => "-", + LineType::ContextHeader => "=", + LineType::Ignore => "", + } + } else { + // But ":" results in a "file/path:number:" + // construct that terminal emulators are more likely + // to recognize and render as a clickable link. If + // navigate is enabled then there is already a good + // visual indicator of match lines (in addition to + // the grep-match-style highlighting) and so we use + // ":" for matches and non-matches alike. + &self.config.grep_separator_symbol + }; + write!( + self.painter.writer, + "{}", + paint::paint_file_path_with_line_number( + grep_line.line_number, + &grep_line.path, + output_config.pad_line_number, + separator, + true, + Some(self.config.grep_file_style), + Some(self.config.grep_line_number_style), + self.config + ) + )?; + + // Emit code line + let code_style_sections = + match (&grep_line.line_type, &grep_line.submatches) { + (LineType::Match, Some(submatches)) => { + // We expand tabs at this late stage because + // the tabs are escaped in the JSON, so + // expansion must come after JSON parsing. + // (At the time of writing, we are in this + // arm iff we are handling `ripgrep --json` + // output.) + grep_line.code = self + .painter + .expand_tabs(grep_line.code.graphemes(true)) + .into(); + make_style_sections( + &grep_line.code, + submatches, + self.config.grep_match_word_style, + self.config.grep_match_line_style, + ) + } + (LineType::Match, None) => { + // HACK: We need tabs expanded, and we need + // the &str passed to + // `get_code_style_sections` to live long + // enough. But at the point it is guaranteed + // that this handler is going to handle this + // line, so mutating it is acceptable. + self.raw_line = + self.painter.expand_tabs(self.raw_line.graphemes(true)); + get_code_style_sections( + &self.raw_line, + self.config.grep_match_word_style, + self.config.grep_match_line_style, + &grep_line, + ) + .unwrap_or( + StyleSectionSpecifier::Style( + self.config.grep_match_line_style, + ), + ) + } + _ => StyleSectionSpecifier::Style( + self.config.grep_context_line_style, + ), + }; + self.painter.syntax_highlight_and_paint_line( + &format!("{}\n", grep_line.code), + code_style_sections, + self.state.clone(), + BgShouldFill::default(), + ) + } + } + handled_line = true + } + } + Ok(handled_line) + } +} + +fn make_style_sections<'a>( + line: &'a str, + submatches: &[(usize, usize)], + match_style: Style, + non_match_style: Style, +) -> StyleSectionSpecifier<'a> { + let mut sections = Vec::new(); + let mut curr = 0; + for (start_, end_) in submatches { + let (start, end) = (*start_, *end_); + if start > curr { + sections.push((non_match_style, &line[curr..start])) + }; + sections.push((match_style, &line[start..end])); + curr = end; + } + if curr < line.len() { + sections.push((non_match_style, &line[curr..])) + } + StyleSectionSpecifier::StyleSections(sections) +} + +// Return style sections describing colors received from git. +fn get_code_style_sections<'b>( + raw_line: &'b str, + match_style: Style, + non_match_style: Style, + grep: &GrepLine, +) -> Option> { + if let Some(raw_code_start) = ansi::ansi_preserving_index( + raw_line, + match grep.line_number { + Some(n) => format!("{}:{}:", grep.path, n).len(), + None => grep.path.len() + 1, + }, + ) { + let match_style_sections = ansi::parse_style_sections(&raw_line[raw_code_start..]) + .iter() + .map(|(ansi_term_style, s)| { + if ansi_term_style.foreground.is_some() { + (match_style, *s) + } else { + (non_match_style, *s) + } + }) + .collect(); + Some(StyleSectionSpecifier::StyleSections(match_style_sections)) + } else { + None + } +} + +fn make_output_config() -> GrepOutputConfig { + match utils::process::git_grep_command_options() { + Some((longs, shorts)) if shorts.contains("-W") || longs.contains("--function-context") => { + // --function-context is in effect: i.e. the entire function is + // being displayed. In that case we don't render the first line as a + // header, since the second line is the true next line, and it will + // be more readable to have these displayed normally. We do add the + // navigate marker, since match lines will be surrounded by (many) + // non-match lines. And, since we are printing (many) successive lines + // of code, we pad line numbers <100 in order to maintain code + // alignment up to line 9999. + GrepOutputConfig { + render_context_header_as_hunk_header: false, + add_navigate_marker_to_matches: true, + pad_line_number: true, + } + } + Some((longs, shorts)) if shorts.contains("-p") || longs.contains("--show-function") => { + // --show-function is in effect, i.e. the function header is being + // displayed, along with matches within the function. Therefore we + // render the first line as a header, but we do not add the navigate + // marker, since all non-header lines are matches. + GrepOutputConfig { + render_context_header_as_hunk_header: true, + add_navigate_marker_to_matches: false, + pad_line_number: false, + } + } + _ => GrepOutputConfig { + render_context_header_as_hunk_header: true, + add_navigate_marker_to_matches: false, + pad_line_number: false, + }, + } +} + +enum GrepLineRegex { + FilePathWithFileExtension, + FilePathWithoutSeparatorCharacters, +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex = + make_grep_line_regex(GrepLineRegex::FilePathWithFileExtension); +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex = + make_grep_line_regex(GrepLineRegex::FilePathWithoutSeparatorCharacters); +} + +// See tests for example grep lines +fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { + // Grep tools such as `git grep` and `rg` emit lines like the following, + // where "xxx" represents arbitrary code. Note that there are 3 possible + // "separator characters": ':', '-', '='. + + // The format is ambiguous, but we attempt to parse it. + + // src/co-7-fig.rs:xxx + // src/co-7-fig.rs:7:xxx + // src/co-7-fig.rs-xxx + // src/co-7-fig.rs-7-xxx + // src/co-7-fig.rs=xxx + // src/co-7-fig.rs=7=xxx + + // Makefile:xxx + // Makefile:7:xxx + // Makefile-xxx + // Makefile-7-xxx + + // Make-7-file:xxx + // Make-7-file:7:xxx + // Make-7-file-xxx + // Make-7-file-7-xxx + + let file_path = match regex_variant { + GrepLineRegex::FilePathWithFileExtension => { + r" + ( # 1. file name (colons not allowed) + [^:\ ] # a file name cannot start with whitespace + [^:]* # anything + \.[^.\ :=-]{1,6} # extension + ) + " + } + GrepLineRegex::FilePathWithoutSeparatorCharacters => { + r" + ( # 1. file name (colons not allowed) + [^:\ =-] # a file name cannot start with whitespace + [^:=-]* # anything except separators + [^:\ ] # a file name cannot end with whitespace + ) + " + } + }; + + Regex::new(&format!( + "(?x) +^ +{file_path} +(?: + ( + : # 2. match marker + (?:([0-9]+):)? # 3. optional: line number followed by second match marker + ) + | + ( + - # 4. nomatch marker + (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker + ) + | + ( + = # 6. match marker + (?:([0-9]+)=)? # 7. optional: line number followed by second header marker + ) +) +(.*) # 8. code (i.e. line contents) +$ +", + file_path = file_path + )) + .unwrap() +} + +pub fn parse_grep_line(line: &str) -> Option { + if line.starts_with('{') { + return ripgrep_json::parse_line(line); + } + [ + &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION, + &*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS, + ] + .iter() + .find_map(|regex| _parse_grep_line(*regex, line)) +} + +pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option> { + let caps = regex.captures(line)?; + let file = caps.get(1).unwrap().as_str().into(); + let (line_type, line_number) = &[ + (2, LineType::Match), + (4, LineType::Context), + (6, LineType::ContextHeader), + ] + .iter() + .find_map(|(i, line_type)| { + if caps.get(*i).is_some() { + let line_number: Option = + caps.get(i + 1).map(|m| m.as_str().parse().ok()).flatten(); + Some((*line_type, line_number)) + } else { + None + } + }) + .unwrap(); // The regex matches so one of the three alternatrives must have matched + let code = caps.get(8).unwrap().as_str().into(); + + Some(GrepLine { + path: file, + line_number: *line_number, + line_type: *line_type, + code, + submatches: None, + }) +} + +#[cfg(test)] +mod tests { + use crate::handlers::grep::{parse_grep_line, GrepLine, LineType}; + + #[test] + fn test_parse_grep_match() { + assert_eq!( + parse_grep_line("src/co-7-fig.rs:xxx"), + Some(GrepLine { + path: "src/co-7-fig.rs".into(), + line_number: None, + line_type: LineType::Match, + code: "xxx".into(), + submatches: None, + }) + ); + assert_eq!( + parse_grep_line("src/config.rs:use crate::minusplus::MinusPlus;"), + Some(GrepLine { + path: "src/config.rs".into(), + line_number: None, + line_type: LineType::Match, + code: "use crate::minusplus::MinusPlus;".into(), + submatches: None, + }) + ); + assert_eq!( + parse_grep_line( + "src/config.rs: pub line_numbers_style_minusplus: MinusPlus