From 0b94ad061322ea8c75dbd77ebedff52f73097071 Mon Sep 17 00:00:00 2001 From: Edoardo Pirovano Date: Wed, 7 Jul 2021 17:50:23 +0100 Subject: [PATCH] cli: add --stop-on-nonmatch flag This causes ripgrep to stop searching an individual file after it has found a non-matching line. But this only occurs after it has found a matching line. Fixes #1790, Closes #1930 --- CHANGELOG.md | 2 ++ complete/_rg | 1 + crates/core/app.rs | 25 +++++++++++++- crates/core/args.rs | 3 +- crates/searcher/src/searcher/core.rs | 49 ++++++++++++++++++++++------ crates/searcher/src/searcher/mod.rs | 24 ++++++++++++++ tests/feature.rs | 7 ++++ 7 files changed, 99 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index baa58b5c9..4dda5eab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Unreleased changes. Release notes have not yet been written. Feature enhancements: * Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V +* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790): + Add new `--stop-on-nonmatch` flag. * [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195): When `extra-verbose` mode is enabled in zsh, show extra file type info. * [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409): diff --git a/complete/_rg b/complete/_rg index 0af8d7afc..657648ca7 100644 --- a/complete/_rg +++ b/complete/_rg @@ -319,6 +319,7 @@ _rg() { '(-q --quiet)'{-q,--quiet}'[suppress normal output]' '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' '*'{-u,--unrestricted}'[reduce level of "smart" searching]' + '--stop-on-nonmatch[stop on first non-matching line after a matching one]' + operand # Operands '(--files --type-list file regexp)1: :_guard "^-*" pattern' diff --git a/crates/core/app.rs b/crates/core/app.rs index d38bf986a..875c880b2 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -632,6 +632,7 @@ pub fn all_args_and_flags() -> Vec { flag_sort(&mut args); flag_sortr(&mut args); flag_stats(&mut args); + flag_stop_on_nonmatch(&mut args); flag_text(&mut args); flag_threads(&mut args); flag_trim(&mut args); @@ -1926,13 +1927,16 @@ Nevertheless, if you only care about matches spanning at most one line, then it is always better to disable multiline mode. This flag can be disabled with --no-multiline. + +This overrides the --stop-on-nonmatch flag. " ); let arg = RGArg::switch("multiline") .short("U") .help(SHORT) .long_help(LONG) - .overrides("no-multiline"); + .overrides("no-multiline") + .overrides("stop-on-nonmatch"); args.push(arg); let arg = RGArg::switch("no-multiline").hidden().overrides("multiline"); @@ -2854,6 +2858,25 @@ This flag can be disabled with --no-stats. args.push(arg); } +fn flag_stop_on_nonmatch(args: &mut Vec) { + const SHORT: &str = "Stop searching after a non-match."; + const LONG: &str = long!( + "\ +Enabling this option will cause ripgrep to stop reading a file once it +encounters a non-matching line after it has encountered a matching line. +This is useful if it is expected that all matches in a given file will be on +sequential lines, for example due to the lines being sorted. + +This overrides the -U/--multiline flag. +" + ); + let arg = RGArg::switch("stop-on-nonmatch") + .help(SHORT) + .long_help(LONG) + .overrides("multiline"); + args.push(arg); +} + fn flag_text(args: &mut Vec) { const SHORT: &str = "Search binary files as if they were text."; const LONG: &str = long!( diff --git a/crates/core/args.rs b/crates/core/args.rs index ad2ee2e87..973477553 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -821,7 +821,8 @@ impl ArgMatches { .before_context(ctx_before) .after_context(ctx_after) .passthru(self.is_present("passthru")) - .memory_map(self.mmap_choice(paths)); + .memory_map(self.mmap_choice(paths)) + .stop_on_nonmatch(self.is_present("stop-on-nonmatch")); match self.encoding()? { EncodingMode::Some(enc) => { builder.encoding(Some(enc)); diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs index 7d6ccd660..95b4ba6a6 100644 --- a/crates/searcher/src/searcher/core.rs +++ b/crates/searcher/src/searcher/core.rs @@ -10,6 +10,12 @@ use crate::sink::{ }; use grep_matcher::{LineMatchKind, Matcher}; +enum FastMatchResult { + Continue, + Stop, + SwitchToSlow, +} + #[derive(Debug)] pub struct Core<'s, M: 's, S> { config: &'s Config, @@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> { last_line_visited: usize, after_context_left: usize, has_sunk: bool, + has_matched: bool, } impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { @@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { last_line_visited: 0, after_context_left: 0, has_sunk: false, + has_matched: false, }; if !core.searcher.multi_line_with_matcher(&core.matcher) { if core.is_line_by_line_fast() { @@ -109,7 +117,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { pub fn match_by_line(&mut self, buf: &[u8]) -> Result { if self.is_line_by_line_fast() { - self.match_by_line_fast(buf) + match self.match_by_line_fast(buf)? { + FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf), + FastMatchResult::Continue => Ok(true), + FastMatchResult::Stop => Ok(false), + } } else { self.match_by_line_slow(buf) } @@ -270,7 +282,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } }; self.set_pos(line.end()); - if matched != self.config.invert_match { + let success = matched != self.config.invert_match; + if success { + self.has_matched = true; if !self.before_context_by_line(buf, line.start())? { return Ok(false); } @@ -286,40 +300,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { return Ok(false); } } + if self.config.stop_on_nonmatch && !success && self.has_matched { + return Ok(false); + } } Ok(true) } - fn match_by_line_fast(&mut self, buf: &[u8]) -> Result { - debug_assert!(!self.config.passthru); + fn match_by_line_fast( + &mut self, + buf: &[u8], + ) -> Result { + use FastMatchResult::*; + debug_assert!(!self.config.passthru); while !buf[self.pos()..].is_empty() { + if self.config.stop_on_nonmatch && self.has_matched { + return Ok(SwitchToSlow); + } if self.config.invert_match { if !self.match_by_line_fast_invert(buf)? { - return Ok(false); + return Ok(Stop); } } else if let Some(line) = self.find_by_line_fast(buf)? { + self.has_matched = true; if self.config.max_context() > 0 { if !self.after_context_by_line(buf, line.start())? { - return Ok(false); + return Ok(Stop); } if !self.before_context_by_line(buf, line.start())? { - return Ok(false); + return Ok(Stop); } } self.set_pos(line.end()); if !self.sink_matched(buf, &line)? { - return Ok(false); + return Ok(Stop); } } else { break; } } if !self.after_context_by_line(buf, buf.len())? { - return Ok(false); + return Ok(Stop); } self.set_pos(buf.len()); - Ok(true) + Ok(Continue) } #[inline(always)] @@ -344,6 +369,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if invert_match.is_empty() { return Ok(true); } + self.has_matched = true; if !self.after_context_by_line(buf, invert_match.start())? { return Ok(false); } @@ -577,6 +603,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if self.config.passthru { return false; } + if self.config.stop_on_nonmatch && self.has_matched { + return false; + } if let Some(line_term) = self.matcher.line_terminator() { if line_term == self.config.line_term { return true; diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index 3bd939bbe..9b6c9bd43 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -173,6 +173,9 @@ pub struct Config { encoding: Option, /// Whether to do automatic transcoding based on a BOM or not. bom_sniffing: bool, + /// Whether to stop searching when a non-matching line is found after a + /// matching line. + stop_on_nonmatch: bool, } impl Default for Config { @@ -190,6 +193,7 @@ impl Default for Config { multi_line: false, encoding: None, bom_sniffing: true, + stop_on_nonmatch: false, } } } @@ -555,6 +559,19 @@ impl SearcherBuilder { self.config.bom_sniffing = yes; self } + + /// Stop searching a file when a non-matching line is found after a + /// matching line. + /// + /// This is useful for searching sorted files where it is expected that all + /// the matches will be on adjacent lines. + pub fn stop_on_nonmatch( + &mut self, + stop_on_nonmatch: bool, + ) -> &mut SearcherBuilder { + self.config.stop_on_nonmatch = stop_on_nonmatch; + self + } } /// A searcher executes searches over a haystack and writes results to a caller @@ -838,6 +855,13 @@ impl Searcher { self.config.multi_line } + /// Returns true if and only if this searcher is configured to stop when in + /// finds a non-matching line after a matching one. + #[inline] + pub fn stop_on_nonmatch(&self) -> bool { + self.config.stop_on_nonmatch + } + /// Returns true if and only if this searcher will choose a multi-line /// strategy given the provided matcher. /// diff --git a/tests/feature.rs b/tests/feature.rs index 8283a1bba..6d4d19477 100644 --- a/tests/feature.rs +++ b/tests/feature.rs @@ -992,3 +992,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| { dir.create("test", "δ"); cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err(); }); + +// See: https://github.com/BurntSushi/ripgrep/issues/1790 +rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| { + dir.create("test", "line1\nline2\nline3\nline4\nline5"); + cmd.args(&["--stop-on-nonmatch", "[235]"]); + eqnice!("test:line2\ntest:line3\n", cmd.stdout()); +});