From 17fd03c2dad2b7af0d7b462d859d980e7db5deef Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 16 Oct 2023 18:05:39 -0400 Subject: [PATCH] progress --- Cargo.lock | 17 +- Cargo.toml | 5 +- crates/core/flags/args.rs | 589 ++++++ crates/core/flags/complete/bash.rs | 114 ++ crates/core/flags/complete/fish.rs | 54 + crates/core/flags/complete/mod.rs | 18 + crates/core/flags/complete/powershell.rs | 93 + crates/core/flags/defs.rs | 2251 +++++++++++++++++++++ crates/core/flags/doc/help.rs | 166 ++ crates/core/flags/doc/man.rs | 141 ++ crates/core/flags/doc/mod.rs | 33 + crates/core/flags/doc/template.long.help | 58 + crates/core/flags/doc/template.rg.1 | 402 ++++ crates/core/flags/doc/template.short.help | 27 + crates/core/flags/mod.rs | 77 + crates/core/flags/parse.rs | 195 ++ crates/core/main.rs | 3 + crates/searcher/src/searcher/mod.rs | 2 +- 18 files changed, 4242 insertions(+), 3 deletions(-) create mode 100644 crates/core/flags/args.rs create mode 100644 crates/core/flags/complete/bash.rs create mode 100644 crates/core/flags/complete/fish.rs create mode 100644 crates/core/flags/complete/mod.rs create mode 100644 crates/core/flags/complete/powershell.rs create mode 100644 crates/core/flags/defs.rs create mode 100644 crates/core/flags/doc/help.rs create mode 100644 crates/core/flags/doc/man.rs create mode 100644 crates/core/flags/doc/mod.rs create mode 100644 crates/core/flags/doc/template.long.help create mode 100644 crates/core/flags/doc/template.rg.1 create mode 100644 crates/core/flags/doc/template.short.help create mode 100644 crates/core/flags/mod.rs create mode 100644 crates/core/flags/parse.rs diff --git a/Cargo.lock b/Cargo.lock index 3becb925b3..6f31fee595 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,7 +70,7 @@ checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "bitflags", "strsim", - "textwrap", + "textwrap 0.11.0", "unicode-width", ] @@ -291,6 +291,12 @@ dependencies = [ "libc", ] +[[package]] +name = "lexopt" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401" + [[package]] name = "libc" version = "0.2.149" @@ -432,17 +438,20 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" name = "ripgrep" version = "13.0.0" dependencies = [ + "aho-corasick", "anyhow", "bstr", "clap", "grep", "ignore", "jemallocator", + "lexopt", "log", "serde", "serde_derive", "serde_json", "termcolor", + "textwrap 0.16.0", "walkdir", ] @@ -533,6 +542,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "textwrap" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/Cargo.toml b/Cargo.toml index 14bb7c8de9..943a5e7fea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,13 +49,16 @@ members = [ ] [dependencies] +aho-corasick = "1.1.2" anyhow = "1.0.75" -bstr = "1.6.0" +bstr = "1.7.0" grep = { version = "0.2.12", path = "crates/grep" } ignore = { version = "0.4.19", path = "crates/ignore" } +lexopt = "0.3.0" log = "0.4.5" serde_json = "1.0.23" termcolor = "1.1.0" +textwrap = { version = "0.16.0", default-features = false } [dependencies.clap] version = "2.33.0" diff --git a/crates/core/flags/args.rs b/crates/core/flags/args.rs new file mode 100644 index 0000000000..d66388ef8c --- /dev/null +++ b/crates/core/flags/args.rs @@ -0,0 +1,589 @@ +use std::{ + ffi::{OsStr, OsString}, + path::PathBuf, +}; + +use { + bstr::{BString, ByteSlice, ByteVec}, + grep::{ + printer::{HyperlinkFormat, UserColorSpec}, + searcher::Encoding, + }, +}; + +#[derive(Debug, Default)] +pub(crate) struct Args { + pub(crate) positional: Vec, + pub(crate) binary: BinaryMode, + pub(crate) buffer: BufferMode, + pub(crate) byte_offset: bool, + pub(crate) case: CaseMode, + pub(crate) color: ColorChoice, + pub(crate) colors: Vec, + pub(crate) column: bool, + pub(crate) context: ContextMode, + pub(crate) context_separator: ContextSeparator, + pub(crate) count: Option, + pub(crate) crlf: bool, + pub(crate) dfa_size_limit: Option, + pub(crate) encoding: EncodingMode, + pub(crate) engine: EngineChoice, + pub(crate) field_context_separator: FieldContextSeparator, + pub(crate) field_match_separator: FieldMatchSeparator, + pub(crate) file_matches: Option, + pub(crate) files: bool, + pub(crate) fixed_strings: bool, + pub(crate) follow: bool, + pub(crate) glob_case_insensitive: bool, + pub(crate) globs: Vec, + pub(crate) heading: bool, + pub(crate) hidden: bool, + pub(crate) hostname_bin: Option, + pub(crate) hyperlink_format: Option, + pub(crate) ignore_file: Vec, + pub(crate) ignore_file_case_insensitive: bool, + pub(crate) include_zero: bool, + pub(crate) invert_match: bool, + pub(crate) json: bool, + pub(crate) line_number: bool, + pub(crate) line_regexp: bool, + pub(crate) logging: Option, + pub(crate) max_columns: Option, + pub(crate) max_columns_preview: bool, + pub(crate) max_count: Option, + pub(crate) max_depth: Option, + pub(crate) max_filesize: Option, + pub(crate) mmap: MmapMode, + pub(crate) multiline: bool, + pub(crate) multiline_dotall: bool, + pub(crate) no_config: bool, + pub(crate) no_ignore: bool, + pub(crate) no_ignore_dot: bool, + pub(crate) no_ignore_exclude: bool, + pub(crate) no_ignore_files: bool, + pub(crate) no_ignore_global: bool, + pub(crate) no_ignore_messages: bool, + pub(crate) no_ignore_parent: bool, + pub(crate) no_ignore_vcs: bool, + pub(crate) no_messages: bool, + pub(crate) no_pcre2_unicode: bool, + pub(crate) no_require_git: bool, + pub(crate) no_unicode: bool, + pub(crate) null: bool, + pub(crate) null_data: bool, + pub(crate) one_file_system: bool, + pub(crate) only_matching: bool, + pub(crate) path_separator: Option, + pub(crate) patterns: Vec, + pub(crate) pcre2: bool, + pub(crate) pcre2_version: bool, + pub(crate) pre: Option, + pub(crate) pre_glob: Vec, + pub(crate) pretty: bool, + pub(crate) quiet: bool, + pub(crate) regex_size_limit: Option, + pub(crate) replace: Option>, + pub(crate) search_zip: bool, + pub(crate) sort: Option, + pub(crate) stats: bool, + pub(crate) stop_on_nonmatch: bool, + pub(crate) text: bool, + pub(crate) threads: Option, + pub(crate) trim: bool, + pub(crate) types: Vec, + pub(crate) type_changes: Vec, + pub(crate) type_list: bool, + pub(crate) unrestricted: usize, + pub(crate) vimgrep: bool, + pub(crate) with_filename: bool, + pub(crate) word_regexp: bool, +} + +/// Indicates how ripgrep should treat binary data. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BinaryMode { + /// Automatically determine the binary mode to use. Essentially, when + /// a file is searched explicitly, then it will be searched using the + /// `SearchAndSuppress` strategy. Otherwise, it will be searched in a way + /// that attempts to skip binary files as much as possible. That is, once + /// a file is classified as binary, searching will immediately stop. + Auto, + /// Search files even when they have binary data, but if a match is found, + /// suppress it and emit a warning. + /// + /// In this mode, `NUL` bytes are replaced with line terminators. This is + /// a heuristic meant to reduce heap memory usage, since true binary data + /// isn't line oriented. If one attempts to treat such data as line + /// oriented, then one may wind up with impractically large lines. For + /// example, many binary files contain very long runs of NUL bytes. + SearchAndSuppress, + /// Treat all files as if they were plain text. There's no skipping and no + /// replacement of `NUL` bytes with line terminators. + AsText, +} + +impl Default for BinaryMode { + fn default() -> BinaryMode { + BinaryMode::Auto + } +} + +/// Indicates the buffer mode that ripgrep should use when printing output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum BufferMode { + /// Select the buffer mode, 'line' or 'block', automatically based on + /// whether stdout is connected to a tty. + Auto, + /// Flush the output buffer whenever a line terminator is seen. + /// + /// This is useful when wants to see search results more immediately, + /// for example, with `tail -f`. + Line, + /// Flush the output buffer whenever it reaches some fixed size. The size + /// is usually big enough to hold many lines. + /// + /// This is useful for maximum performance, particularly when printing + /// lots of results. + Block, +} + +impl Default for BufferMode { + fn default() -> BufferMode { + BufferMode::Auto + } +} + +/// Indicates the case mode for how to interpret all patterns given to ripgrep. +/// +/// The default is `Sensitive`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum CaseMode { + /// Patterns are matched case sensitively. i.e., `a` does not match `A`. + Sensitive, + /// Patterns are matched case insensitively. i.e., `a` does match `A`. + Insensitive, + /// Patterns are automatically matched case insensitively only when they + /// consist of all lowercase literal characters. For example, the pattern + /// `a` will match `A` but `A` will not match `a`. + Smart, +} + +impl Default for CaseMode { + fn default() -> CaseMode { + CaseMode::Sensitive + } +} + +/// Indicates whether ripgrep should include color/hyperlinks in its output. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ColorChoice { + /// Color and hyperlinks will never be used. + Never, + /// Color and hyperlinks will be used only when stdout is connected to a + /// tty. + Auto, + /// Color will always be used. + Always, + /// Color will always be used and only ANSI escapes will be used. + /// + /// This only makes sense in the context of legacy Windows console APIs. + /// At time of writing, ripgrep will try to use the legacy console APIs + /// if ANSI coloring isn't believed to be possible. This option will force + /// ripgrep to use ANSI coloring. + Ansi, +} + +impl Default for ColorChoice { + fn default() -> ColorChoice { + ColorChoice::Auto + } +} + +/// Indicates the line context options ripgrep should use for output. +/// +/// The default is no context at all. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum ContextMode { + /// All lines will be printed. That is, the context is unbounded. + Passthru, + /// Only show a certain number of lines before and after each match. + Limited(ContextModeLimited), +} + +impl Default for ContextMode { + fn default() -> ContextMode { + ContextMode::Limited(ContextModeLimited::default()) + } +} + +impl ContextMode { + /// Set the "before" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "before" and `0` for + /// "after." + pub(crate) fn set_before(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: Some(lines), + after: None, + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut before, + .. + }) => *before = Some(lines), + } + } + + /// Set the "after" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "after" and `0` for + /// "before." + pub(crate) fn set_after(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: Some(lines), + both: None, + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut after, .. + }) => *after = Some(lines), + } + } + + /// Set the "both" context. + /// + /// If this was set to "passthru" context, then it is overridden in favor + /// of limited context with the given value for "both" and `None` for + /// "before" and "after". + pub(crate) fn set_both(&mut self, lines: usize) { + match *self { + ContextMode::Passthru => { + *self = ContextMode::Limited(ContextModeLimited { + before: None, + after: None, + both: Some(lines), + }) + } + ContextMode::Limited(ContextModeLimited { + ref mut both, .. + }) => *both = Some(lines), + } + } + + /// A convenience function for use in tests that returns the limited + /// context. If this mode isn't limited, then it panics. + #[cfg(test)] + pub(crate) fn get_limited(&self) -> (usize, usize) { + match *self { + ContextMode::Passthru => unreachable!("context mode is passthru"), + ContextMode::Limited(ref limited) => limited.get(), + } + } +} + +/// A context mode indicating that a specific number of lines (possibly zero) +/// should be shown before and/or after each matching line. +/// +/// Note that there is a subtle difference between `Some(0)` and `None`. In the +/// former case, it happens when `0` is given explicitly, where as `None` is +/// the default value and occurs when no value is specified. +/// +/// `both` is only set by the -C/--context flag. The reason why we don't just +/// set before = after = --context is because the before and after context +/// settings always take precedent over the -C/--context setting, regardless of +/// order. Thus, we need to keep track of them separately. +#[derive(Debug, Default, Eq, PartialEq)] +pub(crate) struct ContextModeLimited { + before: Option, + after: Option, + both: Option, +} + +impl ContextModeLimited { + /// Returns the specific number of contextual lines that should be shown + /// around each match. This takes proper precedent into account, i.e., + /// that `before` and `after` both partially override `both` in all cases. + /// + /// By default, this returns `(0, 0)`. + pub(crate) fn get(&self) -> (usize, usize) { + let (mut before, mut after) = + self.both.map(|lines| (lines, lines)).unwrap_or((0, 0)); + // --before and --after always override --context, regardless + // of where they appear relative to each other. + if let Some(lines) = self.before { + before = lines; + } + if let Some(lines) = self.after { + after = lines; + } + (before, after) + } +} + +/// Represents the separator to use between non-contiguous sections of +/// contextual lines. +/// +/// The default is `--`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct ContextSeparator(Option); + +impl Default for ContextSeparator { + fn default() -> ContextSeparator { + ContextSeparator(Some(BString::from("--"))) + } +} + +impl ContextSeparator { + /// Create a new context separator from the user provided argument. This + /// handles unescaping. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(ContextSeparator(Some(Vec::unescape_bytes(string).into()))) + } + + /// Creates a new separator that intructs the printer to disable contextual + /// separators entirely. + pub(crate) fn disabled() -> ContextSeparator { + ContextSeparator(None) + } + + /// Return the raw bytes of this separator. + /// + /// If context separators were disabled, then this returns `None`. + /// + /// Note that this may return a `Some` variant with zero bytes. + pub(crate) fn into_bytes(self) -> Option> { + self.0.map(|sep| sep.into()) + } +} + +/// The method of counting to use. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum CountMode { + /// Count each matching line once, even if the line contains multiple + /// matches of the pattern. + Lines, + /// Count each individual match, even when multiple matches appear on the + /// same line. + All, +} + +/// The encoding mode the searcher will use. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EncodingMode { + /// Use only BOM sniffing to auto-detect an encoding. + Auto, + /// Use an explicit encoding forcefully, but let BOM sniffing override it. + Some(Encoding), + /// Use no explicit encoding and disable all BOM sniffing. This will + /// always result in searching the raw bytes, regardless of their + /// true encoding. + Disabled, +} + +impl Default for EncodingMode { + fn default() -> EncodingMode { + EncodingMode::Auto + } +} + +/// The regex engine to use. +/// +/// The default is `Default`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum EngineChoice { + /// Uses the default regex engine: Rust's `regex` crate. + /// + /// (Well, technically it uses `regex-automata`, but `regex-automata` is + /// the implementation of the `regex` crate.) + Default, + /// Dynamically select the right engine to use. + /// + /// This works by trying to use the default engine, and if the pattern does + /// not compile, it switches over to the PCRE2 engine if it's available. + Auto, + /// Uses the PCRE2 regex engine if it's available. + PCRE2, +} + +impl Default for EngineChoice { + fn default() -> EngineChoice { + EngineChoice::Default + } +} + +/// The field context separator to use to between metadata for each contextual +/// line. +/// +/// The default is `-`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct FieldContextSeparator(BString); + +impl Default for FieldContextSeparator { + fn default() -> FieldContextSeparator { + FieldContextSeparator(BString::from("-")) + } +} + +impl FieldContextSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldContextSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The field match separator to use to between metadata for each matching +/// line. +/// +/// The default is `:`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct FieldMatchSeparator(BString); + +impl Default for FieldMatchSeparator { + fn default() -> FieldMatchSeparator { + FieldMatchSeparator(BString::from(":")) + } +} + +impl FieldMatchSeparator { + /// Create a new separator from the given argument value provided by the + /// user. Unescaping it automatically handled. + pub(crate) fn new(os: &OsStr) -> anyhow::Result { + let Some(string) = os.to_str() else { + anyhow::bail!( + "separator must be valid UTF-8 (use escape sequences \ + to provide a separator that is not valid UTF-8)" + ) + }; + Ok(FieldMatchSeparator(Vec::unescape_bytes(string).into())) + } + + /// Return the raw bytes of this separator. + /// + /// Note that this may return an empty `Vec`. + pub(crate) fn into_bytes(self) -> Vec { + self.0.into() + } +} + +/// The type of summary "file match" mode to use. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum FileMatchMode { + /// Emit all file paths with at least one match. + WithMatches, + /// Emit all file paths that contain zero matches. + WithoutMatch, +} + +/// The type of logging to do. `Debug` emits some details while `Trace` emits +/// much more. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum LoggingMode { + Debug, + Trace, +} + +/// Indicates when to use memory maps. +/// +/// The default is `Auto`. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum MmapMode { + /// This instructs ripgrep to use heuristics for selecting when to and not + /// to use memory maps for searching. + Auto, + /// This instructs ripgrep to always try memory maps when possible. (Memory + /// maps are not possible to use in all circumstances, for example, for + /// virtual files.) + AlwaysTryMmap, + /// Never use memory maps under any circumstances. This includes even + /// when multi-line search is enabled where ripgrep will read the entire + /// contents of a file on to the heap before searching it. + Never, +} + +impl Default for MmapMode { + fn default() -> MmapMode { + MmapMode::Auto + } +} + +/// Represents a source of patterns that ripgrep should search for. +/// +/// The reason to unify these is so that we can retain the order of `-f/--flag` +/// and `-e/--regexp` flags relative to one another. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum PatternSource { + /// Comes from the `-f/--file` flag. + File(PathBuf), + /// Comes from the `-e/--regexp` flag. + Regexp(String), +} + +/// The sort criteria, if present. +#[derive(Debug, Eq, PartialEq)] +pub(crate) struct SortMode { + /// Whether to reverse the sort criteria (i.e., descending order). + pub(crate) reverse: bool, + /// The actual sorting criteria. + pub(crate) kind: SortModeKind, +} + +/// The criteria to use for sorting. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum SortModeKind { + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +/// A single instance of either a change or a selection of one ripgrep's +/// file types. +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum TypeChoice { + /// Clear the given type from ripgrep. + Clear { name: String }, + /// Add the given type definition (name and glob) to ripgrep. + Add { def: String }, + /// Select the given type for filtering. + Select { name: String }, + /// Select the given type for filtering but negate it. + Negate { name: String }, +} diff --git a/crates/core/flags/complete/bash.rs b/crates/core/flags/complete/bash.rs new file mode 100644 index 0000000000..dec5034d61 --- /dev/null +++ b/crates/core/flags/complete/bash.rs @@ -0,0 +1,114 @@ +use crate::flags::defs::FLAGS; + +const TEMPLATE_FULL: &'static str = " +_rg() { + local i cur prev opts cmds + COMPREPLY=() + cur=\"${COMP_WORDS[COMP_CWORD]}\" + prev=\"${COMP_WORDS[COMP_CWORD-1]}\" + cmd=\"\" + opts=\"\" + + for i in ${COMP_WORDS[@]}; do + case \"${i}\" in + rg) + cmd=\"rg\" + ;; + *) + ;; + esac + done + + case \"${cmd}\" in + rg) + opts=\"!OPTS!\" + if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + fi + case \"${prev}\" in +!CASES! + esac + COMPREPLY=($(compgen -W \"${opts}\" -- \"${cur}\")) + return 0 + ;; + esac +} + +complete -F _rg -o bashdefault -o default rg +"; + +const TEMPLATE_CASE: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -f \"${cur}\")) + return 0 + ;; +"; + +const TEMPLATE_CASE_CHOICES: &'static str = " + !FLAG!) + COMPREPLY=($(compgen -W \"!CHOICES!\" -- \"${cur}\")) + return 0 + ;; +"; + +/// Generate completions for Bash. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut opts = String::new(); + for (i, flag) in FLAGS.iter().enumerate() { + opts.push_str("--"); + opts.push_str(flag.name_long()); + opts.push(' '); + if let Some(short) = flag.name_short() { + opts.push('-'); + opts.push(char::from(short)); + opts.push(' '); + } + if let Some(name) = flag.name_negated() { + opts.push_str("--"); + opts.push_str(name); + opts.push(' '); + } + } + opts.push_str(" ..."); + + let mut cases = String::new(); + for flag in FLAGS.iter() { + let template = if !flag.doc_choices().is_empty() { + let choices = flag.doc_choices().join(" "); + TEMPLATE_CASE_CHOICES.trim_end().replace("!CHOICES!", &choices) + } else { + TEMPLATE_CASE.trim_end().to_string() + }; + let name = format!("--{}", flag.name_long()); + cases.push_str(&template.replace("!FLAG!", &name)); + if let Some(short) = flag.name_short() { + let name = format!("-{}", char::from(short)); + cases.push_str(&template.replace("!FLAG!", &name)); + } + if let Some(negated) = flag.name_negated() { + let name = format!("--{negated}"); + cases.push_str(&template.replace("!FLAG!", &name)); + } + } + + TEMPLATE_FULL + .replace("!OPTS!", &opts) + .replace("!CASES!", &cases) + .trim_start() + .to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn todo_remove() { + std::fs::create_dir_all("/tmp/rg-test").unwrap(); + std::fs::write("/tmp/rg-test/rg.bash", generate().as_bytes()).unwrap(); + } +} diff --git a/crates/core/flags/complete/fish.rs b/crates/core/flags/complete/fish.rs new file mode 100644 index 0000000000..348ba88f53 --- /dev/null +++ b/crates/core/flags/complete/fish.rs @@ -0,0 +1,54 @@ +use crate::flags::defs::FLAGS; + +const TEMPLATE: &'static str = + "complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC!"; +const TEMPLATE_CHOICES: &'static str = + "complete -c rg -n '__fish_use_subcommand' !SHORT! !LONG! !DOC! -r -f -a '!CHOICES!'"; + +/// Generate completions for Fish. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut out = String::new(); + for flag in FLAGS.iter() { + let short = match flag.name_short() { + None => "".to_string(), + Some(byte) => format!("-s {}", char::from(byte)), + }; + let long = format!("-l '{}'", flag.name_long().replace("'", "\\'")); + let doc = format!("-d '{}'", flag.doc_short().replace("'", "\\'")); + let template = if flag.doc_choices().is_empty() { + TEMPLATE.to_string() + } else { + TEMPLATE_CHOICES + .replace("!CHOICES!", &flag.doc_choices().join(" ")) + }; + out.push_str( + &template + .replace("!SHORT!", &short) + .replace("!LONG!", &long) + .replace("!DOC!", &doc), + ); + if let Some(negated) = flag.name_negated() { + out.push_str( + &template + .replace("!SHORT!", "") + .replace("!LONG!", &long) + .replace("!DOC!", &doc), + ); + } + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn todo_remove() { + std::fs::create_dir_all("/tmp/rg-test").unwrap(); + std::fs::write("/tmp/rg-test/rg.fish", generate().as_bytes()).unwrap(); + } +} diff --git a/crates/core/flags/complete/mod.rs b/crates/core/flags/complete/mod.rs new file mode 100644 index 0000000000..e0bf2f19cb --- /dev/null +++ b/crates/core/flags/complete/mod.rs @@ -0,0 +1,18 @@ +// If you're looking for zsh, those are maintained by hand in `complete/_rg`. +// The main reason for this is that the zsh completions were: +// +// 1. Lovingly written by an expert in such things. +// 2. Are much higher in quality than the ones below that are auto-generated. +// Namely, the zsh completions take application level context about flag +// compatibility into account. +// 3. There is a CI script that fails if a new flag is added to ripgrep that +// isn't included in the zsh completions. +// 4. There is a wealth of documentation in the zsh script explaining how it +// works and how it can be extended. +// +// In principle, I'd be open to maintaining any completion script by hand so +// long as it meets criteria 3 and 4 above. + +pub(super) mod bash; +pub(super) mod fish; +pub(super) mod powershell; diff --git a/crates/core/flags/complete/powershell.rs b/crates/core/flags/complete/powershell.rs new file mode 100644 index 0000000000..166d3f4ebb --- /dev/null +++ b/crates/core/flags/complete/powershell.rs @@ -0,0 +1,93 @@ +use crate::flags::defs::FLAGS; + +const TEMPLATE: &'static str = " +using namespace System.Management.Automation +using namespace System.Management.Automation.Language + +Register-ArgumentCompleter -Native -CommandName 'rg' -ScriptBlock { + param($wordToComplete, $commandAst, $cursorPosition) + $commandElements = $commandAst.CommandElements + $command = @( + 'rg' + for ($i = 1; $i -lt $commandElements.Count; $i++) { + $element = $commandElements[$i] + if ($element -isnot [StringConstantExpressionAst] -or + $element.StringConstantType -ne [StringConstantType]::BareWord -or + $element.Value.StartsWith('-')) { + break + } + $element.Value + }) -join ';' + + $completions = @(switch ($command) { + 'rg' { +!FLAGS! + } + }) + + $completions.Where{ $_.CompletionText -like \"$wordToComplete*\" } | + Sort-Object -Property ListItemText +} +"; + +const TEMPLATE_FLAG: &'static str = + "[CompletionResult]::new('!DASH_NAME!', '!NAME!', [CompletionResultType]::ParameterName, '!DOC!')"; + +/// Generate completions for PowerShell. +/// +/// Note that these completions are based on what was produced for ripgrep <=13 +/// using Clap 2.x. Improvements on this are welcome. +pub(crate) fn generate() -> String { + let mut flags = String::new(); + for (i, flag) in FLAGS.iter().enumerate() { + let doc = flag.doc_short().replace("'", "''"); + + let dash_name = format!("--{}", flag.name_long()); + let name = flag.name_long(); + if i > 0 { + flags.push('\n'); + } + flags.push_str(" "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + + if let Some(byte) = flag.name_short() { + let dash_name = format!("-{}", char::from(byte)); + let name = char::from(byte).to_string(); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &name) + .replace("!DOC!", &doc), + ); + } + + if let Some(negated) = flag.name_negated() { + let dash_name = format!("--{}", negated); + flags.push_str("\n "); + flags.push_str( + &TEMPLATE_FLAG + .replace("!DASH_NAME!", &dash_name) + .replace("!NAME!", &negated) + .replace("!DOC!", &doc), + ); + } + } + TEMPLATE.trim_start().replace("!FLAGS!", &flags) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn todo_remove() { + std::fs::create_dir_all("/tmp/rg-test").unwrap(); + std::fs::write("/tmp/rg-test/_rg.ps1", generate().as_bytes()).unwrap(); + } +} diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs new file mode 100644 index 0000000000..afaec8118d --- /dev/null +++ b/crates/core/flags/defs.rs @@ -0,0 +1,2251 @@ +use std::{ffi::OsString, path::PathBuf}; + +use anyhow::Context as AnyhowContext; + +use crate::flags::{ + Args, BinaryMode, BufferMode, CaseMode, ColorChoice, ContextMode, + CountMode, EncodingMode, EngineChoice, Flag, FlagValue, LoggingMode, + MmapMode, PatternSource, SortMode, SortModeKind, TypeChoice, +}; + +#[cfg(test)] +use crate::flags::parse; + +pub(super) const FLAGS: &'static [&'static dyn Flag] = &[ + &AfterContext, + &AutoHybridRegex, + &BeforeContext, + &Binary, + &BlockBuffered, + &ByteOffset, + &CaseSensitive, + &Color, + &Colors, + &Column, + &Context, + &ContextSeparator, + &Count, + &CountMatches, + &Crlf, + &Debug, + &DfaSizeLimit, + &Encoding, + &Engine, + &FieldContextSeparator, + &FieldMatchSeparator, + &File, + &Files, + &Regexp, + &Trace, +]; + +/// -A/--after-context +#[derive(Debug)] +struct AfterContext; + +impl Flag for AfterContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'A') + } + fn name_long(&self) -> &'static str { + "after-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Show NUM lines after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines after each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.context.set_after(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_after_context() { + // TODO: Add override tests with --passthru. + + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_after(lines); + mode + }; + + let args = parse(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse(["--after-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["--after-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-A", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-A5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-A5", "-A10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse(["-A5", "-A0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let n = usize::MAX.to_string(); + let args = parse(["--after-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse(["--after-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --auto-hybrid-regex +#[derive(Debug)] +struct AutoHybridRegex; + +impl Flag for AutoHybridRegex { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "auto-hybrid-regex" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-auto-hybrid-regex") + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + "(DEPRECATED) Dynamically use PCRE2 if appropriate." + } + fn doc_long(&self) -> &'static str { + r" +DEPRECATED. Use \flag{engine} instead. +.sp +When this flag is used, ripgrep will dynamically choose between supported regex +engines depending on the features used in a pattern. When ripgrep chooses a +regex engine, it applies that choice for every regex provided to ripgrep (e.g., +via multiple \flag{regexp} or \flag{file} flags). +.sp +As an example of how this flag might behave, ripgrep will attempt to use +its default finite automata based regex engine whenever the pattern can be +successfully compiled with that regex engine. If PCRE2 is enabled and if the +pattern given could not be compiled with the default regex engine, then PCRE2 +will be automatically used for searching. If PCRE2 isn't available, then this +flag has no effect because there is only one regex engine to choose from. +.sp +In the future, ripgrep may adjust its heuristics for how it decides which +regex engine to use. In general, the heuristics will be limited to a static +analysis of the patterns, and not to any specific runtime behavior observed +while searching files. +.sp +The primary downside of using this flag is that it may not always be obvious +which regex engine ripgrep uses, and thus, the match semantics or performance +profile of ripgrep may subtly and unexpectedly change. However, in many cases, +all regex engines will agree on what constitutes a match and it can be nice +to transparently support more advanced regex features like look-around and +backreferences without explicitly needing to enable them. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let mode = if v.unwrap_switch() { + EngineChoice::Auto + } else { + EngineChoice::Default + }; + args.engine = mode; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_auto_hybrid_regex() { + // TODO: Add more tests here with interaction with --engine and -P/--pcre2. + + let args = parse(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse(["--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = + parse(["--auto-hybrid-regex", "--no-auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = + parse(["--no-auto-hybrid-regex", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); +} + +/// -B/--before-context +#[derive(Debug)] +struct BeforeContext; + +impl Flag for BeforeContext { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'B') + } + fn name_long(&self) -> &'static str { + "before-context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Show NUM lines before each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before each match. +.sp +This overrides the \flag{passthru} flag and partially overrides the +\flag{context} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.context.set_before(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_before_context() { + // TODO: Add override tests with --passthru. + + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_before(lines); + mode + }; + + let args = parse(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse(["--before-context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["--before-context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-B", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-B5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-B5", "-B10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse(["-B5", "-B0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let n = usize::MAX.to_string(); + let args = parse(["--before-context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse(["--before-context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --binary +#[derive(Debug)] +struct Binary; + +impl Flag for Binary { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "binary" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-binary") + } + fn doc_category(&self) -> &'static str { + "filter" + } + fn doc_short(&self) -> &'static str { + "Search binary files." + } + fn doc_long(&self) -> &'static str { + r" +Enabling this flag will cause ripgrep to search binary files. By default, +ripgrep attempts to automatically skip binary files in order to improve the +relevance of results and make the search faster. +.sp +Binary files are heuristically detected based on whether they contain a +\fBNUL\fP byte or not. By default (without this flag set), once a \fBNUL\fP +byte is seen, ripgrep will stop searching the file. Usually, \fBNUL\fP bytes +occur in the beginning of most binary files. If a \fBNUL\fP byte occurs after +a match, then ripgrep will not print the match, stop searching that file, and +emit a warning that some matches are being suppressed. +.sp +In contrast, when this flag is provided, ripgrep will continue searching a +file even if a \fBNUL\fP byte is found. In particular, if a \fBNUL\fP byte is +found then ripgrep will continue searching until either a match is found or +the end of the file is reached, whichever comes sooner. If a match is found, +then ripgrep will stop and print a warning saying that the search stopped +prematurely. +.sp +If you want ripgrep to search a file without any special \fBNUL\fP byte +handling at all (and potentially print binary data to stdout), then you should +use the \flag{text} flag. +.sp +The \flag{binary} flag is a flag for controlling ripgrep's automatic filtering +mechanism. As such, it does not need to be used when searching a file +explicitly or when searching stdin. That is, it is only applicable when +recursively searching a directory. +.sp +When the \flag{unrestricted} flag is provided for a third time, then this flag +is automatically enabled. +.sp +This flag overrides the \flag{text} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.binary = if v.unwrap_switch() { + BinaryMode::SearchAndSuppress + } else { + BinaryMode::Auto + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_binary() { + // TODO: Add more tests here with interaction with -a/--text. + + let args = parse(None::<&str>).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse(["--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); + + let args = parse(["--binary", "--no-binary"]).unwrap(); + assert_eq!(BinaryMode::Auto, args.binary); + + let args = parse(["--no-binary", "--binary"]).unwrap(); + assert_eq!(BinaryMode::SearchAndSuppress, args.binary); +} + +/// --block-buffered +#[derive(Debug)] +struct BlockBuffered; + +impl Flag for BlockBuffered { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "block-buffered" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-block-buffered") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Force block buffering." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will use block buffering. That is, whenever a matching +line is found, it will be written to an in-memory buffer and will not be +written to stdout until the buffer reaches a certain size. This is the default +when ripgrep's stdout is redirected to a pipeline or a file. When ripgrep's +stdout is connected to a terminal, line buffering will be used by default. +Forcing block buffering can be useful when dumping a large amount of contents +to a terminal. +.sp +This overrides the \flag{line-buffered} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.buffer = if v.unwrap_switch() { + BufferMode::Block + } else { + BufferMode::Auto + }; + Ok(()) + } +} + +/// --byte-offset +#[derive(Debug)] +struct ByteOffset; + +impl Flag for ByteOffset { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'b') + } + fn name_long(&self) -> &'static str { + "byte-offset" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-byte-offset") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Print the byte offset for each matching line." + } + fn doc_long(&self) -> &'static str { + r" +Print the 0-based byte offset within the input file before each line of output. +If \flag{only-matching} is specified, print the offset of the matched text +itself. +.sp +If ripgrep does transcoding, then the byte offset is in terms of the result +of transcoding and not the original data. This applies similarly to other +transformations on the data, such as decompression or a \flag{pre} filter. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.byte_offset = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_byte_offset() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse(["--byte-offset"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse(["-b"]).unwrap(); + assert_eq!(true, args.byte_offset); + + let args = parse(["--byte-offset", "--no-byte-offset"]).unwrap(); + assert_eq!(false, args.byte_offset); + + let args = parse(["--no-byte-offset", "-b"]).unwrap(); + assert_eq!(true, args.byte_offset); +} + +/// -s/--case-sensitive +#[derive(Debug)] +struct CaseSensitive; + +impl Flag for CaseSensitive { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b's') + } + fn name_long(&self) -> &'static str { + "case-sensitive" + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + r"Search case sensitively (default)." + } + fn doc_long(&self) -> &'static str { + r" +Execute the search case sensitively. This is the default mode. +.sp +This is a global option that applies to all patterns given to ripgrep. +Individual patterns can still be matched case insensitively by using inline +regex flags. For example, \fB(?i)abc\fP will match \fBabc\fP case insensitively +even when this flag is used. +.sp +This flag overrides the \flag{ignore-case} and \flag{smart-case} flags. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "flag has no negation"); + args.case = CaseMode::Sensitive; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_case_sensitive() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse(["--case-sensitive"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); + + let args = parse(["-s"]).unwrap(); + assert_eq!(CaseMode::Sensitive, args.case); +} + +/// --color +#[derive(Debug)] +struct Color; + +impl Flag for Color { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "color" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("WHEN") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "When to use color." + } + fn doc_long(&self) -> &'static str { + r" +This flag controls when to use colors. The default setting is \fBauto\fP, which +means ripgrep will try to guess when to use colors. For example, if ripgrep is +printing to a terminal, then it will use colors, but if it is redirected to a +file or a pipe, then it will suppress color output. +.sp +ripgrep will suppress color output by default in some other circumstances as +well. These include, but are not limited to: +.sp +.IP \(bu 3n +When the \fBTERM\fP environment variable is not set or set to \fBdumb\fP. +.sp +.IP \(bu 3n +When the \fBNO_COLOR\fP environment variable is set (regardless of value). +.sp +.IP \(bu 3n +When flags that imply no use for colors are given. For example, +\flag{vimgrep} and \flag{json}. +. +.PP +The possible values for this flag are: +.sp +.IP \fBnever\fP 10n +Colors will never be used. +.sp +.IP \fBauto\fP 10n +The default. ripgrep tries to be smart. +.sp +.IP \fBalways\fP 10n +Colors will always be used regardless of where output is sent. +.sp +.IP \fBansi\fP 10n +Like 'always', but emits ANSI escapes (even in a Windows console). +. +.PP +This flag also controls whether hyperlinks are emitted. For example, when +a hyperlink format is specified, hyperlinks won't be used when color is +suppressed. If one wants to emit hyperlinks but no colors, then one must use +the \flag{colors} flag to manually set all color styles to \fBnone\fP: +.sp +.EX + \-\-colors 'path:none' \\ + \-\-colors 'line:none' \\ + \-\-colors 'column:none' \\ + \-\-colors 'match:none' +.EE +.sp +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["never", "auto", "always", "ansi"] + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.color = match convert::str(&v.unwrap_value())? { + "never" => ColorChoice::Never, + "auto" => ColorChoice::Auto, + "always" => ColorChoice::Always, + "ansi" => ColorChoice::Ansi, + unk => anyhow::bail!("choice '{unk}' is unrecognized"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_color() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse(["--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = parse(["--color", "auto"]).unwrap(); + assert_eq!(ColorChoice::Auto, args.color); + + let args = parse(["--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let args = parse(["--color", "ansi"]).unwrap(); + assert_eq!(ColorChoice::Ansi, args.color); + + let args = parse(["--color=never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = parse(["--color", "always", "--color", "never"]).unwrap(); + assert_eq!(ColorChoice::Never, args.color); + + let args = parse(["--color", "never", "--color", "always"]).unwrap(); + assert_eq!(ColorChoice::Always, args.color); + + let result = parse(["--color", "foofoo"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse(["--color", "Always"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --colors +#[derive(Debug)] +struct Colors; + +impl Flag for Colors { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "colors" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("COLOR_SPEC") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Configure color settings and styles." + } + fn doc_long(&self) -> &'static str { + r" +This flag specifies color settings for use in the output. This flag may be +provided multiple times. Settings are applied iteratively. Pre-existing color +labels are limited to one of eight choices: \fBred\fP, \fBblue\fP, \fBgreen\fP, +\fBcyan\fP, \fBmagenta\fP, \fByellow\fP, \fBwhite\fP and \fBblack\fP. Styles +are limited to \fBnobold\fP, \fBbold\fP, \fBnointense\fP, \fBintense\fP, +\fBnounderline\fP or \fBunderline\fP. +.sp +The format of the flag is +\fB{\fP\fItype\fP\fB}:{\fP\fIattribute\fP\fB}:{\fP\fIvalue\fP\fB}\fP. +\fItype\fP should be one of \fBpath\fP, \fBline\fP, \fBcolumn\fP or +\fBmatch\fP. \fIattribute\fP can be \fBfg\fP, \fBbg\fP or \fBstyle\fP. +\fIvalue\fP is either a color (for \fBfg\fP and \fBbg\fP) or a text style. A +special format, \fB{\fP\fItype\fP\fB}:none\fP, will clear all color settings +for \fItype\fP. +.sp +For example, the following command will change the match color to magenta and +the background color for line numbers to yellow: +.sp +.EX + rg \-\-colors 'match:fg:magenta' \-\-colors 'line:bg:yellow' +.EE +.sp +Extended colors can be used for \fIvalue\fP when the terminal supports +ANSI color sequences. These are specified as either \fIx\fP (256-color) or +.IB x , x , x +(24-bit truecolor) where \fIx\fP is a number between \fB0\fP and \fB255\fP +inclusive. \fIx\fP may be given as a normal decimal number or a hexadecimal +number, which is prefixed by \fB0x\fP. +.sp +For example, the following command will change the match background color to +that represented by the rgb value (0,128,255): +.sp +.EX + rg \-\-colors 'match:bg:0,128,255' +.EE +.sp +or, equivalently, +.sp +.EX + rg \-\-colors 'match:bg:0x0,0x80,0xFF' +.EE +.sp +Note that the \fBintense\fP and \fBnointense\fP styles will have no effect when +used alongside these extended color codes. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let v = convert::str(&v)?; + args.colors.push(v.parse()?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_colors() { + let args = parse(None::<&str>).unwrap(); + assert!(args.colors.is_empty()); + + let args = parse(["--colors", "match:fg:magenta"]).unwrap(); + assert_eq!(args.colors, vec!["match:fg:magenta".parse().unwrap()]); + + let args = + parse(["--colors", "match:fg:magenta", "--colors", "line:bg:yellow"]) + .unwrap(); + assert_eq!( + args.colors, + vec![ + "match:fg:magenta".parse().unwrap(), + "line:bg:yellow".parse().unwrap() + ] + ); +} + +/// --column +#[derive(Debug)] +struct Column; + +impl Flag for Column { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "column" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-column") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + "Show column numbers." + } + fn doc_long(&self) -> &'static str { + r" +Show column numbers (1-based). This only shows the column numbers for the first +match on each line. This does not try to account for Unicode. One byte is equal +to one column. This implies \flag{line-number}. +.sp +When \flag{only-matching} is used, then the column numbers written correspond +to the start of each match. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.column = v.unwrap_switch(); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_column() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(false, args.column); + + let args = parse(["--column"]).unwrap(); + assert_eq!(true, args.column); + + let args = parse(["--column", "--no-column"]).unwrap(); + assert_eq!(false, args.column); + + let args = parse(["--no-column", "--column"]).unwrap(); + assert_eq!(true, args.column); +} + +/// -C/--context +#[derive(Debug)] +struct Context; + +impl Flag for Context { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'C') + } + fn name_long(&self) -> &'static str { + "context" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Show NUM lines before and after each match." + } + fn doc_long(&self) -> &'static str { + r" +Show \fINUM\fP lines before and after each match. This is equivalent to +providing both the \flag{before\-context} and \flag{after-context} flags with +the same value. +.sp +This overrides the \flag{passthru} flag. The \flag{after-context} and +\flag{before-context} flags both partially override this flag, regardless of +the order. For example, \fB\-A2 \-C1\fP is equivalent to \fB\-A2 \-B1\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.context.set_both(convert::usize(&v.unwrap_value())?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context() { + // TODO: Add override tests with --passthru. + + let mkctx = |lines| { + let mut mode = ContextMode::default(); + mode.set_both(lines); + mode + }; + + let args = parse(None::<&str>).unwrap(); + assert_eq!(ContextMode::default(), args.context); + + let args = parse(["--context", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["--context=5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-C", "5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-C5"]).unwrap(); + assert_eq!(mkctx(5), args.context); + + let args = parse(["-C5", "-C10"]).unwrap(); + assert_eq!(mkctx(10), args.context); + + let args = parse(["-C5", "-C0"]).unwrap(); + assert_eq!(mkctx(0), args.context); + + let n = usize::MAX.to_string(); + let args = parse(["--context", n.as_str()]).unwrap(); + assert_eq!(mkctx(usize::MAX), args.context); + + #[cfg(target_pointer_width = "64")] + { + let n = (u128::from(u64::MAX) + 1).to_string(); + let result = parse(["--context", n.as_str()]); + assert!(result.is_err(), "{result:?}"); + } + + // Test the interaction between -A/-B and -C. Basically, -A/-B always + // partially overrides -C, regardless of where they appear relative to + // each other. This behavior is also how GNU grep works, and it also makes + // logical sense to me: -A/-B are the more specific flags. + let args = parse(["-A1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse(["-B1", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse(["-A1", "-B2", "-C5"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); + + // These next three are like the ones above, but with -C before -A/-B. This + // tests that -A and -B only partially override -C. That is, -C1 -A2 is + // equivalent to -B1 -A2. + let args = parse(["-C5", "-A1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((5, 1), args.context.get_limited()); + + let args = parse(["-C5", "-B1"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((1, 5), args.context.get_limited()); + + let args = parse(["-C5", "-A1", "-B2"]).unwrap(); + let mut mode = ContextMode::default(); + mode.set_before(2); + mode.set_after(1); + mode.set_both(5); + assert_eq!(mode, args.context); + assert_eq!((2, 1), args.context.get_limited()); +} + +/// --context-separator +#[derive(Debug)] +struct ContextSeparator; + +impl Flag for ContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "context-separator" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-context-separator") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Set the separator used between contextual chunks." + } + fn doc_long(&self) -> &'static str { + r" +The string used to separate non-contiguous context lines in the output. This is +only used when one of the context flags is used (that is, \flag{after-context}, +\flag{before-context} or \flag{context}). Escape sequences like \fB\\x7F\fP or +\fB\\t\fP may be used. The default value is \fB\-\-\fP. +.sp +When the context separator is set to an empty string, then a line break +is still inserted. To completely disable context separators, use the +\flag-negate{context-separator} flag. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + use crate::flags::ContextSeparator as Separator; + + args.context_separator = match v { + FlagValue::Switch(true) => { + unreachable!("flag can only be disabled") + } + FlagValue::Switch(false) => Separator::disabled(), + FlagValue::Value(v) => Separator::new(&v)?, + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_context_separator() { + use bstr::BString; + + use crate::flags::ContextSeparator as Separator; + + let getbytes = |ctxsep: Separator| ctxsep.into_bytes().map(BString::from); + + let args = parse(None::<&str>).unwrap(); + assert_eq!(Some(BString::from("--")), getbytes(args.context_separator)); + + let args = parse(["--context-separator", "XYZ"]).unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + let args = parse(["--no-context-separator"]).unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse(["--context-separator", "XYZ", "--no-context-separator"]) + .unwrap(); + assert_eq!(None, getbytes(args.context_separator)); + + let args = parse(["--no-context-separator", "--context-separator", "XYZ"]) + .unwrap(); + assert_eq!(Some(BString::from("XYZ")), getbytes(args.context_separator)); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse(["--context-separator", r"\xFF"]).unwrap(); + assert_eq!(Some(BString::from(b"\xFF")), getbytes(args.context_separator)); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse([ + OsStr::from_bytes(b"--context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -c/--count +#[derive(Debug)] +struct Count; + +impl Flag for Count { + fn is_switch(&self) -> bool { + true + } + fn name_short(&self) -> Option { + Some(b'c') + } + fn name_long(&self) -> &'static str { + "count" + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Only show the count of matching lines for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of lines that match the +given patterns for each file searched. Each file containing a match has its +path and count printed on each line. Note that unless \flag{multiline} +is enabled, this reports the number of lines that match and not the total +number of matches. In multiline mode, \flag{count} is equivalent to +\flag{count-matches}. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. If you need a count to be printed regardless of whether +there is a match, then use \flag{include-zero}. +.sp +This overrides the \flag{count-matches} flag. Note that when \flag{count} +is combined with \flag{only-matching}, then ripgrep behaves as if +\flag{count-matches} was given. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let yes = v.unwrap_switch(); + assert!(yes, "--count can only be enabled"); + args.count = Some(CountMode::Lines); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(None, args.count); + + let args = parse(["--count"]).unwrap(); + assert_eq!(Some(CountMode::Lines), args.count); + + let args = parse(["-c"]).unwrap(); + assert_eq!(Some(CountMode::Lines), args.count); + + let args = parse(["--count-matches", "--count"]).unwrap(); + assert_eq!(Some(CountMode::Lines), args.count); + + let args = parse(["--count-matches", "-c"]).unwrap(); + assert_eq!(Some(CountMode::Lines), args.count); +} + +/// --count-matches +#[derive(Debug)] +struct CountMatches; + +impl Flag for CountMatches { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "count-matches" + } + fn doc_variable(&self) -> Option<&'static str> { + None + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Only show the count of individual matches for each file." + } + fn doc_long(&self) -> &'static str { + r" +This flag suppresses normal output and shows the number of individual matches +of the given patterns for each file searched. Each file containing matches has +its path and match count printed on each line. Note that this reports the total +number of individual matches and not the number of lines that match. +.sp +If only one file is given to ripgrep, then only the count is printed if there +is a match. The \flag{with-filename} flag can be used to force printing the +file path in this case. +.sp +This overrides the \flag{count} flag. Note that when \flag{count} is combined +with \flag{only-matching}, then ripgrep behaves as if \flag{count-matches} was +given. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let yes = v.unwrap_switch(); + assert!(yes, "--count-matches can only be enabled"); + args.count = Some(CountMode::All); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_count_matches() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(None, args.count); + + let args = parse(["--count-matches"]).unwrap(); + assert_eq!(Some(CountMode::All), args.count); + + let args = parse(["--count", "--count-matches"]).unwrap(); + assert_eq!(Some(CountMode::All), args.count); + + let args = parse(["-c", "--count-matches"]).unwrap(); + assert_eq!(Some(CountMode::All), args.count); +} + +/// --crlf +#[derive(Debug)] +struct Crlf; + +impl Flag for Crlf { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "crlf" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-crlf") + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + r"Support CRLF line terminators (useful on Windows)." + } + fn doc_long(&self) -> &'static str { + r" +When enabled, ripgrep will treat CRLF (\fB\\r\\n\fP) as a line terminator +instead of just \fB\\n\fP. +.sp +Principally, this permits the line anchor assertions \fB^\fP and \fB$\fP in +regex patterns to treat CRLF, CR or LF as line terminators instead of just LF. +Note that they will never match between a CR and a LF. CRLF is treated as one +single line terminator. +.sp +When using the default regex engine, CRLF support can also be enabled inside +the pattern with the \fBR\fP flag. For example, \fB(?R:$)\fP will match just +before either CR or LF, but never between CR and LF. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + args.crlf = v.unwrap_switch(); + if args.crlf { + args.null_data = false; + } + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_crlf() { + // TODO: Add more tests with --null-data. + + let args = parse(None::<&str>).unwrap(); + assert_eq!(false, args.crlf); + + let args = parse(["--crlf"]).unwrap(); + assert_eq!(true, args.crlf); +} + +/// --debug +#[derive(Debug)] +struct Debug; + +impl Flag for Debug { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "debug" + } + fn doc_category(&self) -> &'static str { + "logging" + } + fn doc_short(&self) -> &'static str { + r"Show debug messages." + } + fn doc_long(&self) -> &'static str { + r" +Show debug messages. Please use this when filing a bug report. +.sp +The \flag{debug} flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. +.sp +To get even more debug output, use the \flag{trace} flag, which implies +\flag{debug} along with additional trace data. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--debug can only be enabled"); + args.logging = Some(LoggingMode::Debug); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_debug() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse(["--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); + + let args = parse(["--trace", "--debug"]).unwrap(); + assert_eq!(Some(LoggingMode::Debug), args.logging); +} + +/// --dfa-size-limit +#[derive(Debug)] +struct DfaSizeLimit; + +impl Flag for DfaSizeLimit { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "dfa-size-limit" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM+SUFFIX?") + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + r"The upper size limit of the regex DFA." + } + fn doc_long(&self) -> &'static str { + r" +The upper size limit of the regex DFA. The default limit is something generous +for any single pattern or for many smallish patterns. This should only be +changed on very large regex inputs where the (slower) fallback regex engine may +otherwise be used if the limit is reached. +.sp +The input format accepts suffixes of \fBK\fP, \fBM\fP or \fBG\fP which +correspond to kilobytes, megabytes and gigabytes, respectively. If no suffix is +provided the input is treated as bytes. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let v = v.unwrap_value(); + args.dfa_size_limit = Some(convert::human_readable_usize(&v)?); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_dfa_size_limit() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(None, args.dfa_size_limit); + + #[cfg(target_pointer_width = "64")] + { + let args = parse(["--dfa-size-limit", "9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = parse(["--dfa-size-limit=9G"]).unwrap(); + assert_eq!(Some(9 * (1 << 30)), args.dfa_size_limit); + + let args = + parse(["--dfa-size-limit=9G", "--dfa-size-limit=0"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + } + + let args = parse(["--dfa-size-limit=0K"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse(["--dfa-size-limit=0M"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let args = parse(["--dfa-size-limit=0G"]).unwrap(); + assert_eq!(Some(0), args.dfa_size_limit); + + let result = parse(["--dfa-size-limit", "9999999999999999999999"]); + assert!(result.is_err(), "{result:?}"); + + let result = parse(["--dfa-size-limit", "9999999999999999G"]); + assert!(result.is_err(), "{result:?}"); +} + +/// -E/--encoding +#[derive(Debug)] +struct Encoding; + +impl Flag for Encoding { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'E') + } + fn name_long(&self) -> &'static str { + "encoding" + } + fn name_negated(&self) -> Option<&'static str> { + Some("no-encoding") + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENCODING") + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + r"Specify the text encoding of files to search." + } + fn doc_long(&self) -> &'static str { + r" +Specify the text encoding that ripgrep will use on all files searched. The +default value is \fBauto\fP, which will cause ripgrep to do a best effort +automatic detection of encoding on a per-file basis. Automatic detection in +this case only applies to files that begin with a UTF-8 or UTF-16 byte-order +mark (BOM). No other automatic detection is performed. One can also specify +\fBnone\fP which will then completely disable BOM sniffing and always result +in searching the raw bytes, including a BOM if it's present, regardless of its +encoding. +.sp +Other supported values can be found in the list of labels here: +\fIhttps://encoding.spec.whatwg.org/#concept-encoding-get\fP. +.sp +For more details on encoding and how ripgrep deals with it, see \fBGUIDE.md\fP. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let value = match v { + FlagValue::Value(v) => v, + FlagValue::Switch(true) => { + unreachable!("--encoding must accept a value") + } + FlagValue::Switch(false) => { + args.encoding = EncodingMode::Auto; + return Ok(()); + } + }; + let label = convert::str(&value)?; + args.encoding = match label { + "auto" => EncodingMode::Auto, + "none" => EncodingMode::Disabled, + _ => EncodingMode::Some(grep::searcher::Encoding::new(label)?), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_encoding() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse(["--encoding", "auto"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse(["--encoding", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse(["--encoding=none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse(["-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse(["-Enone"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse(["-E", "none", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let args = parse(["--no-encoding", "-E", "none"]).unwrap(); + assert_eq!(EncodingMode::Disabled, args.encoding); + + let args = parse(["-E", "utf-16"]).unwrap(); + let enc = grep::searcher::Encoding::new("utf-16").unwrap(); + assert_eq!(EncodingMode::Some(enc), args.encoding); + + let args = parse(["-E", "utf-16", "--no-encoding"]).unwrap(); + assert_eq!(EncodingMode::Auto, args.encoding); + + let result = parse(["-E", "foo"]); + assert!(result.is_err(), "{result:?}"); +} + +/// --engine +#[derive(Debug)] +struct Engine; + +impl Flag for Engine { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "engine" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("ENGINE") + } + fn doc_category(&self) -> &'static str { + "search" + } + fn doc_short(&self) -> &'static str { + r"Specify which regex engine to use." + } + fn doc_long(&self) -> &'static str { + r" +Specify which regular expression engine to use. When you choose a regex engine, +it applies that choice for every regex provided to ripgrep (e.g., via multiple +\flag{regexp} or \flag{file} flags). +.sp +Accepted values are \fBdefault\fP, \fBpcre2\fP, or \fBauto\fP. +.sp +The default value is \fBdefault\fP, which is usually the fastest and should be +good for most use cases. The \fBpcre2\fP engine is generally useful when you +want to use features such as look-around or backreferences. \fBauto\fP will +dynamically choose between supported regex engines depending on the features +used in a pattern on a best effort basis. +.sp +Note that the \fBpcre2\fP engine is an optional ripgrep feature. If PCRE2 +wasn't included in your build of ripgrep, then using this flag will result in +ripgrep printing an error message and exiting. +.sp +This overrides previous uses of the \flag{pcre2} and \flag{auto-hybrid-regex} +flags. +" + } + fn doc_choices(&self) -> &'static [&'static str] { + &["default", "pcre2", "auto"] + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let v = v.unwrap_value(); + let string = convert::str(&v)?; + args.engine = match string { + "default" => EngineChoice::Default, + "pcre2" => EngineChoice::PCRE2, + "auto" => EngineChoice::Auto, + _ => anyhow::bail!("unrecognized regex engine '{string}'"), + }; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_engine() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse(["--engine", "pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse(["--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse(["--auto-hybrid-regex", "--engine=pcre2"]).unwrap(); + assert_eq!(EngineChoice::PCRE2, args.engine); + + let args = parse(["--engine=pcre2", "--auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = parse(["--auto-hybrid-regex", "--engine=auto"]).unwrap(); + assert_eq!(EngineChoice::Auto, args.engine); + + let args = parse(["--auto-hybrid-regex", "--engine=default"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); + + let args = parse(["--engine=pcre2", "--no-auto-hybrid-regex"]).unwrap(); + assert_eq!(EngineChoice::Default, args.engine); +} + +/// --field-context-separator +#[derive(Debug)] +struct FieldContextSeparator; + +impl Flag for FieldContextSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-context-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Set the field context separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field context separator. This separator is only used when printing +contextual lines. It is used to delimit file paths, line numbers, columns and +the contextual line itself. The separator may be any number of bytes, including +zero. Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB-\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + use crate::flags::FieldContextSeparator as Separator; + + args.field_context_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_context_separator() { + use bstr::BString; + + let args = parse(None::<&str>).unwrap(); + assert_eq!(BString::from("-"), args.field_context_separator.into_bytes()); + + let args = parse(["--field-context-separator", "XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse(["--field-context-separator=XYZ"]).unwrap(); + assert_eq!( + BString::from("XYZ"), + args.field_context_separator.into_bytes() + ); + + let args = parse([ + "--field-context-separator", + "XYZ", + "--field-context-separator", + "ABC", + ]) + .unwrap(); + assert_eq!( + BString::from("ABC"), + args.field_context_separator.into_bytes() + ); + + let args = parse(["--field-context-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_context_separator.into_bytes()); + + let args = parse(["--field-context-separator", r"\x00"]).unwrap(); + assert_eq!( + BString::from("\x00"), + args.field_context_separator.into_bytes() + ); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse(["--field-context-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_context_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse([ + OsStr::from_bytes(b"--field-context-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// --field-match-separator +#[derive(Debug)] +struct FieldMatchSeparator; + +impl Flag for FieldMatchSeparator { + fn is_switch(&self) -> bool { + false + } + fn name_long(&self) -> &'static str { + "field-match-separator" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("SEPARATOR") + } + fn doc_category(&self) -> &'static str { + "output" + } + fn doc_short(&self) -> &'static str { + r"Set the field match separator." + } + fn doc_long(&self) -> &'static str { + r" +Set the field match separator. This separator is only used when printing +matching lines. It is used to delimit file paths, line numbers, columns and the +matching line itself. The separator may be any number of bytes, including zero. +Escape sequences like \fB\\x7F\fP or \fB\\t\fP may be used. +.sp +The \fB:\fP character is the default value. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + use crate::flags::FieldMatchSeparator as Separator; + + args.field_match_separator = Separator::new(&v.unwrap_value())?; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_field_match_separator() { + use bstr::BString; + + let args = parse(None::<&str>).unwrap(); + assert_eq!(BString::from(":"), args.field_match_separator.into_bytes()); + + let args = parse(["--field-match-separator", "XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse(["--field-match-separator=XYZ"]).unwrap(); + assert_eq!(BString::from("XYZ"), args.field_match_separator.into_bytes()); + + let args = parse([ + "--field-match-separator", + "XYZ", + "--field-match-separator", + "ABC", + ]) + .unwrap(); + assert_eq!(BString::from("ABC"), args.field_match_separator.into_bytes()); + + let args = parse(["--field-match-separator", r"\t"]).unwrap(); + assert_eq!(BString::from("\t"), args.field_match_separator.into_bytes()); + + let args = parse(["--field-match-separator", r"\x00"]).unwrap(); + assert_eq!(BString::from("\x00"), args.field_match_separator.into_bytes()); + + // This checks that invalid UTF-8 can be used. This case isn't too tricky + // to handle, because it passes the invalid UTF-8 as an escape sequence + // that is itself valid UTF-8. It doesn't become invalid UTF-8 until after + // the argument is parsed and then unescaped. + let args = parse(["--field-match-separator", r"\xFF"]).unwrap(); + assert_eq!( + BString::from(b"\xFF"), + args.field_match_separator.into_bytes() + ); + + // In this case, we specifically try to pass an invalid UTF-8 argument to + // the flag. In theory we might be able to support this, but because we do + // unescaping and because unescaping wants valid UTF-8, we do a UTF-8 check + // on the value. Since we pass invalid UTF-8, it fails. This demonstrates + // that the only way to use an invalid UTF-8 separator is by specifying an + // escape sequence that is itself valid UTF-8. + #[cfg(unix)] + { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + + let result = parse([ + OsStr::from_bytes(b"--field-match-separator"), + OsStr::from_bytes(&[0xFF]), + ]); + assert!(result.is_err(), "{result:?}"); + } +} + +/// -f/--file +#[derive(Debug)] +struct File; + +impl Flag for File { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'f') + } + fn name_long(&self) -> &'static str { + "file" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERNFILE") + } + fn doc_category(&self) -> &'static str { + "input" + } + fn doc_short(&self) -> &'static str { + r"Search for patterns from the given file." + } + fn doc_long(&self) -> &'static str { + r" +Search for patterns from the given file, with one pattern per line. When this +flag is used multiple times or in combination with the \flag{regexp} flag, then +all patterns provided are searched. Empty pattern lines will match all input +lines, and the newline is not counted as part of the pattern. +.sp +A line is printed if and only if it matches at least one of the patterns. +.sp +When \fIPATTERNFILE\fP is \fB-\fP, then \fBstdin\fP will be read for the +patterns. +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let path = PathBuf::from(v.unwrap_value()); + args.patterns.push(PatternSource::File(path)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_file() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse(["--file", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse(["--file=foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse(["-f", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse(["-ffoo"]).unwrap(); + assert_eq!(vec![PatternSource::File(PathBuf::from("foo"))], args.patterns); + + let args = parse(["--file", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse(["--file=-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse(["-f", "-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse(["-f-foo"]).unwrap(); + assert_eq!( + vec![PatternSource::File(PathBuf::from("-foo"))], + args.patterns + ); + + let args = parse(["--file=foo", "--file", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::File(PathBuf::from("foo")), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + // We permit path arguments to be invalid UTF-8. So test that. Some of + // these cases are tricky and depend on lexopt doing the right thing. + // + // We probably should add tests for this handling on Windows too, but paths + // that are invalid UTF-16 appear incredibly rare in the Windows world. + #[cfg(unix)] + { + use std::{ + ffi::{OsStr, OsString}, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let path = PathBuf::from(OsString::from_vec(bytes.to_vec())); + + let args = + parse([OsStr::from_bytes(b"--file"), OsStr::from_bytes(bytes)]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let args = parse([OsStr::from_bytes(b"-f"), OsStr::from_bytes(bytes)]) + .unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"--file=A".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + + let mut bytes = b"-fA".to_vec(); + bytes.push(0xFF); + bytes.push(b'Z'); + let args = parse([OsStr::from_bytes(&bytes)]).unwrap(); + assert_eq!(vec![PatternSource::File(path.clone())], args.patterns); + } +} + +/// -e/--regexp +#[derive(Debug)] +struct Regexp; + +impl Flag for Regexp { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option { + Some(b'e') + } + fn name_long(&self) -> &'static str { + "regexp" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("PATTERN") + } + fn doc_category(&self) -> &'static str { + "input" + } + fn doc_short(&self) -> &'static str { + r"A pattern to search for." + } + fn doc_long(&self) -> &'static str { + r" +A pattern to search for. This option can be provided multiple times, where +all patterns given are searched, in addition to any patterns provided by +\flag{file}. Lines matching at least one of the provided patterns are printed. +This flag can also be used when searching for patterns that start with a dash. +.sp +For example, to search for the literal \fB\-foo\fP: +.sp +.EX + rg \-e \-foo +.EE +.sp +You can also use the special \fB\-\-\fP delimiter to indicate that no more +flags will be provided. Namely, the following is equivalent to the above: +.sp +.EX + rg \-\- \-foo +.EE +.sp +When \flag{file} or \flag{regexp} is used, then ripgrep treats all positional +arguments as files or directories to search. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + let regexp = convert::string(v.unwrap_value())?; + args.patterns.push(PatternSource::Regexp(regexp)); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_regexp() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(Vec::::new(), args.patterns); + + let args = parse(["--regexp", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse(["--regexp=foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse(["-e", "foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse(["-efoo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("foo".to_string())], args.patterns); + + let args = parse(["--regexp", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse(["--regexp=-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse(["-e", "-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse(["-e-foo"]).unwrap(); + assert_eq!(vec![PatternSource::Regexp("-foo".to_string())], args.patterns); + + let args = parse(["--regexp=foo", "--regexp", "bar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::Regexp("bar".to_string()) + ], + args.patterns + ); + + // While we support invalid UTF-8 arguments in general, patterns must be + // valid UTF-8. + #[cfg(unix)] + { + use std::{ + ffi::{OsStr, OsString}, + os::unix::ffi::{OsStrExt, OsStringExt}, + }; + + let bytes = &[b'A', 0xFF, b'Z'][..]; + let result = + parse([OsStr::from_bytes(b"-e"), OsStr::from_bytes(bytes)]); + assert!(result.is_err(), "{result:?}"); + } + + // Check that combining -e/--regexp and -f/--file works as expected. + let args = parse(["-efoo", "-fbar"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")) + ], + args.patterns + ); + + let args = parse(["-efoo", "-fbar", "-equux"]).unwrap(); + assert_eq!( + vec![ + PatternSource::Regexp("foo".to_string()), + PatternSource::File(PathBuf::from("bar")), + PatternSource::Regexp("quux".to_string()), + ], + args.patterns + ); +} + +/// --files +#[derive(Debug)] +struct Files; + +impl Flag for Files { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "files" + } + fn doc_category(&self) -> &'static str { + "other-behaviors" + } + fn doc_short(&self) -> &'static str { + r"Print each file that would be searched." + } + fn doc_long(&self) -> &'static str { + r" +Print each file that would be searched without actually performing the search. +This is useful to determine whether a particular file is being searched or not. +.sp +This overrides \flag{type-list}. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + assert!(v.unwrap_switch()); + args.files = true; + args.type_list = false; + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_files() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(false, args.files); + + let args = parse(["--files"]).unwrap(); + assert_eq!(true, args.files); +} + +/// --trace +#[derive(Debug)] +struct Trace; + +impl Flag for Trace { + fn is_switch(&self) -> bool { + true + } + fn name_long(&self) -> &'static str { + "trace" + } + fn doc_category(&self) -> &'static str { + "logging" + } + fn doc_short(&self) -> &'static str { + r"Show trace messages." + } + fn doc_long(&self) -> &'static str { + r" +Show trace messages. This shows even more detail than the \flag{debug} +flag. Generally, one should only use this if \flag{debug} doesn't emit the +information you're looking for. +" + } + + fn update(&self, v: FlagValue, args: &mut Args) -> anyhow::Result<()> { + assert!(v.unwrap_switch(), "--trace can only be enabled"); + args.logging = Some(LoggingMode::Trace); + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_trace() { + let args = parse(None::<&str>).unwrap(); + assert_eq!(None, args.logging); + + let args = parse(["--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); + + let args = parse(["--debug", "--trace"]).unwrap(); + assert_eq!(Some(LoggingMode::Trace), args.logging); +} + +mod convert { + use std::ffi::{OsStr, OsString}; + + use {anyhow::Context, bstr::ByteSlice}; + + pub(super) fn str(v: &OsStr) -> anyhow::Result<&str> { + let Some(s) = v.to_str() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn string(v: OsString) -> anyhow::Result { + let Ok(s) = v.into_string() else { + anyhow::bail!("value is not valid UTF-8") + }; + Ok(s) + } + + pub(super) fn usize(v: &OsStr) -> anyhow::Result { + str(v)?.parse().context("value is not a valid number") + } + + pub(super) fn human_readable_size(v: &OsStr) -> anyhow::Result { + grep::cli::parse_human_readable_size(str(v)?).context("invalid size") + } + + pub(super) fn human_readable_usize(v: &OsStr) -> anyhow::Result { + let size = human_readable_size(v)?; + let Ok(size) = usize::try_from(size) else { + anyhow::bail!("size is too big") + }; + Ok(size) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn available_shorts() { + let mut total = vec![false; 128]; + for byte in 0..=0x7F { + match byte { + b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => { + total[usize::from(byte)] = true + } + _ => continue, + } + } + + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + taken[usize::from(short)] = true; + } + + for byte in 0..=0x7F { + if total[usize::from(byte)] && !taken[usize::from(byte)] { + eprintln!("{:?}", char::from(byte)); + } + } + } + + #[test] + fn shorts_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let Some(byte) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + byte.is_ascii_alphanumeric() || byte == b'.', + "\\x{byte:0X} is not a valid short flag for {long}", + ) + } + } + + #[test] + fn longs_all_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + let count = long.chars().count(); + assert!(count >= 2, "flag '{long}' is less than 2 characters"); + assert!( + long.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{long}' does not match ^[-0-9A-Za-z]+$", + ); + let Some(negated) = flag.name_negated() else { continue }; + assert!( + negated.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'), + "flag '{negated}' does not match ^[-0-9A-Za-z]+$", + ); + } + } + + #[test] + fn shorts_no_duplicates() { + let mut taken = vec![false; 128]; + for flag in FLAGS.iter() { + let Some(short) = flag.name_short() else { continue }; + let long = flag.name_long(); + assert!( + !taken[usize::from(short)], + "flag {long} has duplicate short flag {}", + char::from(short) + ); + } + } + + #[test] + fn longs_no_duplicates() { + use std::collections::BTreeSet; + + let mut taken = BTreeSet::new(); + for flag in FLAGS.iter() { + let long = flag.name_long(); + assert!(taken.insert(long), "flag {long} has a duplicate name"); + let Some(negated) = flag.name_negated() else { continue }; + assert!( + taken.insert(negated), + "negated flag {negated} has a duplicate name" + ); + } + } + + #[test] + fn switches_have_no_choices() { + for flag in FLAGS.iter() { + if !flag.is_switch() { + continue; + } + let long = flag.name_long(); + let choices = flag.doc_choices(); + assert!( + choices.is_empty(), + "switch flag '{long}' \ + should not have any choices but has some: {choices:?}", + ); + } + } + + #[test] + fn choices_ascii_alphanumeric() { + for flag in FLAGS.iter() { + let long = flag.name_long(); + for choice in flag.doc_choices() { + assert!( + choice + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '-'), + "choice '{choice}' for flag '{long}' does not match \ + ^[-0-9A-Za-z]+$", + ) + } + } + } +} diff --git a/crates/core/flags/doc/help.rs b/crates/core/flags/doc/help.rs new file mode 100644 index 0000000000..46ffd26b22 --- /dev/null +++ b/crates/core/flags/doc/help.rs @@ -0,0 +1,166 @@ +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, Flag}; + +const TEMPLATE_SHORT: &'static str = include_str!("template.short.help"); +const TEMPLATE_LONG: &'static str = include_str!("template.long.help"); + +pub(crate) fn generate_short() -> String { + let version = option_env!("CARGO_PKG_VERSION").unwrap_or("N/A"); + let out = TEMPLATE_SHORT.replace("!!VERSION!!", version); + out +} + +pub(crate) fn generate_long() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + write!(cat, "\n\n"); + } + generate_long_flag(flag, &mut cat); + } + + let version = option_env!("CARGO_PKG_VERSION").unwrap_or("N/A"); + let mut out = TEMPLATE_LONG.replace("!!VERSION!!", version); + for (name, value) in cats.iter() { + let var = format!("!!{name}!!"); + out = out.replace(&var, value); + } + out +} + +fn generate_long_flag(flag: &dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r" -{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r" {var}"); + } + write!(out, r", "); + } else { + write!(out, r" "); + } + + let name = flag.name_long(); + write!(out, r"--{name}"); + if let Some(var) = flag.doc_variable() { + write!(out, r"={var}"); + } + write!(out, "\n"); + + let doc = flag.doc_long().trim(); + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + // TODO: Change this to a panic once everything is wired up. + // unreachable!(r"found unrecognized \flag{{{name}}} in --help docs") + write!(out, r"--{name}"); + return; + }; + if let Some(name) = flag.name_short() { + write!(out, r"-{}/", char::from(name)); + } + write!(out, r"--{}", flag.name_long()); + }); + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + // TODO: Change this to a panic once everything is wired up. + // unreachable!(r"found unrecognized \flag-negate{{{name}}} in --help docs") + write!(out, r"--{name}"); + return; + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in --help docs but \ + {long} does not have a negation" + ); + }; + write!(out, r"--{name}"); + }); + + let mut cleaned = remove_roff(&doc); + if let Some(negated) = flag.name_negated() { + write!(cleaned, "\n\nThis flag can be disabled with --{negated}."); + } + let indent = " ".repeat(8); + let wrapopts = textwrap::Options::new(71) + // Normally I'd be fine with breaking at hyphens, but ripgrep's docs + // includes a lot of flag names, and they in turn contain hyphens. + // Breaking flag names across lines is not great. + .word_splitter(textwrap::WordSplitter::NoHyphenation); + for (i, paragraph) in cleaned.split("\n\n").enumerate() { + if i > 0 { + write!(out, "\n\n"); + } + let mut new = paragraph.to_string(); + if paragraph.lines().all(|line| line.starts_with(" ")) { + new = textwrap::indent(&new, &indent).replace(r"\\", r"\"); + } else { + new = new.replace("\n", " "); + new = textwrap::refill(&new, &wrapopts); + new = textwrap::indent(&new, &indent); + } + write!(out, "{}", new.trim_end()); + } +} + +/// Removes roff syntax from 'v' such that the result is approximately plain +/// text readable. +/// +/// This is basically a mish mash of heuristics based on the specific roff used +/// in the docs for the flags in this tool. If new kinds of roff are used in +/// the docs, then this may need to be updated to handle them. +fn remove_roff(v: &str) -> String { + let mut lines = vec![]; + for line in v.trim().lines() { + assert!(!line.is_empty(), "roff should have no empty lines"); + if line.starts_with(".") { + if line.starts_with(".IP ") { + let item_label = line + .split(" ") + .nth(1) + .expect("first argument to .IP") + .replace(r"\(bu", r"•") + .replace(r"\fB", "") + .replace(r"\fP", ":"); + lines.push(format!("{item_label}")); + } else if line.starts_with(".IB ") || line.starts_with(".BI ") { + let pieces = line + .split_whitespace() + .skip(1) + .collect::>() + .concat(); + lines.push(format!("{pieces}")); + } else if line.starts_with(".sp") || line.starts_with(".PP") { + lines.push("".to_string()); + } + } else { + lines.push(line.to_string()); + } + } + lines + .join("\n") + .replace(r"\fB", "") + .replace(r"\fI", "") + .replace(r"\fP", "") + .replace(r"\-", "-") + .replace(r"\\", r"\") +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + + #[test] + fn todo_remove() { + let dir = PathBuf::from("/tmp/rg-test"); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("rg.short.help"), generate_short().as_bytes()) + .unwrap(); + std::fs::write(dir.join("rg.long.help"), generate_long().as_bytes()) + .unwrap(); + } +} diff --git a/crates/core/flags/doc/man.rs b/crates/core/flags/doc/man.rs new file mode 100644 index 0000000000..ffec34c81c --- /dev/null +++ b/crates/core/flags/doc/man.rs @@ -0,0 +1,141 @@ +use std::{collections::BTreeMap, fmt::Write}; + +use crate::flags::{defs::FLAGS, Flag}; + +const TEMPLATE: &'static str = include_str!("template.rg.1"); + +pub(crate) fn generate() -> String { + let mut cats = BTreeMap::new(); + for flag in FLAGS.iter().copied() { + let mut cat = cats.entry(flag.doc_category()).or_insert(String::new()); + if !cat.is_empty() { + writeln!(cat, ".sp"); + } + generate_flag(flag, &mut cat); + } + + let version = option_env!("CARGO_PKG_VERSION").unwrap_or("N/A"); + let mut out = TEMPLATE.replace("!!VERSION!!", version); + for (name, value) in cats.iter() { + let var = format!("!!{name}!!"); + out = out.replace(&var, value); + } + out +} + +fn generate_flag(flag: &'static dyn Flag, out: &mut String) { + if let Some(byte) = flag.name_short() { + let name = char::from(byte); + write!(out, r"\fB\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r" \fI{var}\fP"); + } + write!(out, r", "); + } + + let name = flag.name_long(); + write!(out, r"\fB\-\-{name}\fP"); + if let Some(var) = flag.doc_variable() { + write!(out, r"=\fI{var}\fP"); + } + write!(out, "\n"); + + writeln!(out, ".RS 4"); + let doc = flag.doc_long().trim(); + // Convert \flag{foo} into something nicer. + let doc = super::render_custom_markup(doc, "flag", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + // TODO: Change this to a panic once everything is wired up. + // unreachable!(r"found unrecognized \flag{{{name}}} in roff docs") + write!(out, r"\fB\-\-{name}\fP"); + return; + }; + out.push_str(r"\fB"); + if let Some(name) = flag.name_short() { + write!(out, r"\-{}/", char::from(name)); + } + write!(out, r"\-\-{}", flag.name_long()); + out.push_str(r"\fP"); + }); + // Convert \flag-negate{foo} into something nicer. + let doc = super::render_custom_markup(&doc, "flag-negate", |name, out| { + let Some(flag) = crate::flags::parse::lookup(name) else { + // TODO: Change this to a panic once everything is wired up. + // unreachable!(r"found unrecognized \flag-negate{{{name}}} in roff docs") + write!(out, r"\fB\-\-{name}\fP"); + return; + }; + let Some(name) = flag.name_negated() else { + let long = flag.name_long(); + unreachable!( + "found \\flag-negate{{{long}}} in roff docs but \ + {long} does not have a negation" + ); + }; + out.push_str(r"\fB"); + write!(out, r"\-\-{name}"); + out.push_str(r"\fP"); + }); + writeln!(out, "{doc}"); + if let Some(negated) = flag.name_negated() { + // Flags that can be negated that aren't switches, like + // --context-separator, are somewhat weird. Because of that, the docs + // for those flags should discuss the semantics of negation explicitly. + // But for switches, the behavior is always the same. + if !flag.is_switch() { + writeln!(out, ".sp"); + writeln!( + out, + r"This flag can be disabled with \fB\-\-{negated}\fP." + ); + } + } + writeln!(out, ".RE"); +} + +fn flag_long_doc(flag: &'static dyn Flag) -> String { + let flag_prefix = r"\flag{"; + let original = flag.doc_long().trim(); + let mut out = String::new(); + let mut last_match = 0; + for (offset, _) in original.match_indices(flag_prefix) { + out.push_str(&original[last_match..offset]); + + let start = offset + flag_prefix.len(); + let end = start + + original[start..] + .find('}') + .expect(r"found \flag{ without closing }"); + let name = &original[start..end]; + last_match = end + 1; + + let Some(flag) = crate::flags::parse::lookup(name) else { + // TODO: Change this to a panic once everything is wired up. + write!(out, r"\fB--{name}\fP"); + continue; + // unreachable!(r"found unrecognized \flag{{{name}}} in roff docs") + }; + out.push_str(r"\fB"); + if let Some(name) = flag.name_short() { + write!(out, r"\-{}/", char::from(name)); + } + write!(out, r"\-\-{}", flag.name_long()); + out.push_str(r"\fP"); + } + out.push_str(&original[last_match..]); + out +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use super::*; + + #[test] + fn todo_remove() { + let dir = PathBuf::from("/tmp/rg-test"); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("rg.1"), generate().as_bytes()).unwrap(); + } +} diff --git a/crates/core/flags/doc/mod.rs b/crates/core/flags/doc/mod.rs new file mode 100644 index 0000000000..ea14622bf5 --- /dev/null +++ b/crates/core/flags/doc/mod.rs @@ -0,0 +1,33 @@ +pub(crate) mod help; +pub(crate) mod man; + +/// Searches for `\tag{...}` occurrences in `doc` and calls `replacement` for +/// each such tag found. +/// +/// The first argument given to `replacement` is the tag value, `...`. The +/// second argument is the buffer that accumulates the full replacement text. +/// +/// Since this function is only intended to be used on doc strings written into +/// the program source code, callers should panic in `replacement` if there are +/// any errors or unexpected circumstances. +fn render_custom_markup( + mut doc: &str, + tag: &str, + mut replacement: impl FnMut(&str, &mut String), +) -> String { + let mut out = String::with_capacity(doc.len()); + let tag_prefix = format!(r"\{tag}{{"); + while let Some(offset) = doc.find(&tag_prefix) { + out.push_str(&doc[..offset]); + + let start = offset + tag_prefix.len(); + let Some(end) = doc[start..].find('}').map(|i| start + i) else { + unreachable!(r"found {tag_prefix} without closing }}"); + }; + let name = &doc[start..end]; + replacement(name, &mut out); + doc = &doc[end + 1..]; + } + out.push_str(doc); + out +} diff --git a/crates/core/flags/doc/template.long.help b/crates/core/flags/doc/template.long.help new file mode 100644 index 0000000000..1ba8e99850 --- /dev/null +++ b/crates/core/flags/doc/template.long.help @@ -0,0 +1,58 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect gitignore rules and automatically skip hidden +files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + rg [OPTIONS] -e PATTERN ... [PATH ...] + rg [OPTIONS] -f PATTERNFILE ... [PATH ...] + rg [OPTIONS] --files [PATH ...] + rg [OPTIONS] --type-list + command | rg [OPTIONS] PATTERN + rg [OPTIONS] --help + rg [OPTIONS] --version + +POSITIONAL ARGUMENTS: + + A regular expression used for searching. To match a pattern beginning + with a dash, use the -e/--regexp flag. + + For example, to search for the literal '-foo', you can use this flag: + + rg -e -foo + + You can also use the special '--' delimiter to indicate that no more + flags will be provided. Namely, the following is equivalent to the + above: + + rg -- -foo + + ... + A file or directory to search. Directories are searched recursively. + File paths specified on the command line override glob and ignore + rules. + +INPUT OPTIONS: +!!input!! + +SEARCH OPTIONS: +!!search!! + +FILTER OPTIONS: +!!filter!! + +OUTPUT OPTIONS: +!!output!! + +LOGGING OPTIONS: +!!logging!! + +OTHER BEHAVIORS: +!!other-behaviors!! diff --git a/crates/core/flags/doc/template.rg.1 b/crates/core/flags/doc/template.rg.1 new file mode 100644 index 0000000000..f04754a6d5 --- /dev/null +++ b/crates/core/flags/doc/template.rg.1 @@ -0,0 +1,402 @@ +.TH RG 1 2023-10-16 !!VERSION!! "User Commands" +. +. +.SH NAME +rg \- recursively search the current directory for lines matching a pattern +. +. +.SH SYNOPSIS +.\" I considered using GNU troff's .SY and .YS "synopsis" macros here, but it +.\" looks like they aren't portable. Specifically, they don't appear to be in +.\" BSD's mdoc used on macOS. +.sp +\fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-e\fP \fIPATTERN\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-f\fP \fIPATTERNFILE\fP... [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-files\fP [\fIPATH\fP...] +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-type\-list\fP +.sp +\fIcommand\fP | \fBrg\fP [\fIOPTIONS\fP] \fIPATTERN\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-help\fP +.sp +\fBrg\fP [\fIOPTIONS\fP] \fB\-\-version\fP +. +. +.SH DESCRIPTION +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect your \fB.gitignore\fP and automatically skip +hidden files/directories and binary files. +.sp +ripgrep's default regex engine uses finite automata and guarantees linear +time searching. Because of this, features like backreferences and arbitrary +look-around are not supported. However, if ripgrep is built with PCRE2, then +the \fB-P/--pcre2\fP flag can be used to enable backreferences and look-around. +.sp +ripgrep supports configuration files. Set \fBRIPGREP_CONFIG_PATH\fP to a +configuration file. The file can specify one shell argument per line. Lines +starting with \fB#\fP are ignored. For more details, see \fBCONFIGURATION +FILES\fP below. +.sp +ripgrep will automatically detect if stdin exists and search stdin for a regex +pattern, e.g. \fBls | rg foo\fP. In some environments, stdin may exist when +it shouldn't. To turn off stdin detection, one can explicitly specify the +directory to search, e.g. \fBrg foo ./\fP. +.sp +Tip: to disable all smart filtering and make ripgrep behave a bit more like +classical grep, use \fBrg -uuu\fP. +. +. +.SH REGEX SYNTAX +ripgrep uses Rust's regex engine by default, which documents its syntax: +\fIhttps://docs.rs/regex/1.*/regex/#syntax\fP +.sp +ripgrep uses byte-oriented regexes, which has some additional documentation: +\fIhttps://docs.rs/regex/1.*/regex/bytes/index.html#syntax\fP +.sp +To a first approximation, ripgrep uses Perl-like regexes without look-around or +backreferences. This makes them very similar to the "extended" (ERE) regular +expressions supported by *egrep*, but with a few additional features like +Unicode character classes. +.sp +If you're using ripgrep with the *-P/--pcre2* flag, then please consult +\fIhttps://www.pcre.org\fP or the PCRE2 man pages for documentation on the +supported syntax. +. +. +.SH POSITIONAL ARGUMENTS +.TP 12 +\fIPATTERN\fP +A regular expression used for searching. To match a pattern beginning with a +dash, use the \fB-e/--regexp\fP option. +.TP 12 +\fIPATH\fP +A file or directory to search. Directories are searched recursively. File paths +specified explicitly on the command line override glob and ignore rules. +. +. +.SH OPTIONS +This section documents all flags that ripgrep accepts. Flags are grouped into +categories below according to their function. +.sp +Note that many options can be turned on and off. In some cases, those flags +are not listed explicitly below. For example, the \fB--column\fP flag (listed +below) enables column numbers in ripgrep's output, but the \fB--no-column\fP +flag (not listed below) disables them. The reverse can also exist. For example, +the \fB--no-ignore\fP flag (listed below) disables ripgrep's \fBgitignore\fP +logic, but the \fB--ignore\fP flag (not listed below) enables it. These flags +are useful for overriding a ripgrep configuration file (or alias) on the +command line. Each flag's documentation notes whether an inverted flag exists. +In all cases, the flag specified last takes precedence. +. +.SS INPUT OPTIONS +!!input!! +. +.SS SEARCH OPTIONS +!!search!! +. +.SS FILTER OPTIONS +!!filter!! +. +.SS OUTPUT OPTIONS +!!output!! +. +.SS LOGGING OPTIONS +!!logging!! +. +.SS OTHER BEHAVIORS +!!other-behaviors!! +. +. +.SH EXIT STATUS +If ripgrep finds a match, then the exit status of the program is \fB0\fP. +If no match could be found, then the exit status is \fB1\fP. If an error +occurred, then the exit status is always \fB2\fP unless ripgrep was run with +the \fB-q/--quiet\fP flag and a match was found. In summary: +.sp +.IP \(bu 3n +\fB0\fP exit status occurs only when at least one match was found, and if +no error occurred, unless \fB-q/--quiet\fP was given. +. +.IP \(bu 3n +\fB1\fP exit status occurs only when no match was found and no error occurred. +. +.IP \(bu 3n +\fB2\fP exit status occurs when an error occurred. This is true for both +catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g., +unable to read a file). +. +. +.SH AUTOMATIC FILTERING +ripgrep does a fair bit of automatic filtering by default. This section +describes that filtering and how to control it. +.sp +\fBTIP\fP: To disable automatic filtering, use \fBrg -uuu\fP. +.sp +ripgrep's automatic "smart" filtering is one of the most apparent +differentiating features between ripgrep and other tools like \fBgrep\fP. As +such, its behavior may be surprising to users that aren't expecting it. +.sp +ripgrep does four types of filtering automatically: +.sp +. +.IP 1. 3n +Files and directories that match ignore rules are not searched. +.IP 2. 3n +Hidden files and directories are not searched. +.IP 3. 3n +Binary files (files with a \fBNUL\fP byte) are not searched. +.IP 4. 3n +Symbolic links are not followed. +.PP +The first type of filtering is the most sophisticated. ripgrep will attempt to +respect your \fBgitignore\fP rules as faithfully as possible. In particular, +this includes the following: +. +.IP \(bu 3n +Any global rules, e.g., in \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Any rules in relevant \fB.gitignore\fP files. +. +.IP \(bu 3n +Any local rules, e.g., in \fB.git/info/exclude\fP. +.PP +In some cases, ripgrep and \fBgit\fP will not always be in sync in terms +of which files are ignored. For example, a file that is ignored via +\fB.gitignore\fP but is tracked by \fBgit\fP would not be searched by ripgrep +even though \fBgit\fP tracks it. This is unlikely to ever be fixed. Instead, +you should either make sure your exclude rules match the files you track +precisely, or otherwise use \fBgit grep\fP for search. +.sp +Additional ignore rules can be provided outside of a \fBgit\fP context: +. +.IP \(bu 3n +Any rules in \fB.ignore\fP. +. +.IP \(bu 3n +Any rules in \fB.rgignore\fP. +. +.IP \(bu 3n +Any rules in files specified with the \fB--ignore-file\fP flag. +.PP +The precedence of ignore rules is as follows, with later items overriding +earlier items: +. +.IP \(bu 3n +Files given by \fB--ignore-file\fP. +. +.IP \(bu 3n +Global gitignore rules, e.g., from \fB$HOME/.config/git/ignore\fP. +. +.IP \(bu 3n +Local rules from \fB.git/info/exclude\fP. +. +.IP \(bu 3n +Rules from \fB.gitignore\fP. +. +.IP \(bu 3n +Rules from \fB.ignore\fP. +. +.IP \(bu 3n +Rules from \fB.rgignore\fP. +.PP +So for example, if \fIfoo\fP were in a \fB.gitignore\fP and \fB!\fP\fIfoo\fP +were in an \fB.rgignore\fP, then \fIfoo\fP would not be ignored since +\fB.rgignore\fP takes precedence over \fB.gitignore\fP. +.sp +Each of the types of filtering can be configured via command line flags: +. +.IP \(bu 3n +There are several flags starting with \fB--no-ignore\fP that toggle which, if +any, ignore rules are respected. \fB--no-ignore\fP by itself will disable all +of them. +. +.IP \(bu 3n +\fB-./--hidden\fP will force ripgrep to search hidden files and directories. +. +.IP \(bu 3n +\fB--binary\fP will force ripgrep to search binary files. +. +.IP \(bu 3n +\fB-L/--follow\fP will force ripgrep to follow symlinks. +.PP +As a special short hand, the \fB-u\fP flag can be specified up to three times. +Each additional time incrementally decreases filtering: +. +.IP \(bu 3n +\fB-u\fP is equivalent to \fB--no-ignore\fP. +. +.IP \(bu 3n +\fB-uu\fP is equivalent to \fB--no-ignore --hidden\fP. +. +.IP \(bu 3n +\fB-uuu\fP is equivalent to \fB--no-ignore --hidden --binary\fP. +.PP +In particular, \fBrg -uuu\fP should search the same exact content as \fBgrep +-r\fP. +. +. +.SH CONFIGURATION FILES +ripgrep supports reading configuration files that change ripgrep's default +behavior. The format of the configuration file is an "rc" style and is very +simple. It is defined by two rules: +. +.IP 1. 3n +Every line is a shell argument, after trimming whitespace. +. +.IP 2. 3n +Lines starting with \fB#\fP (optionally preceded by any amount of whitespace) +are ignored. +.PP +ripgrep will look for a single configuration file if and only if the +\fBRIPGREP_CONFIG_PATH\fP environment variable is set and is non-empty. +ripgrep will parse arguments from this file on startup and will behave as if +the arguments in this file were prepended to any explicit arguments given to +ripgrep on the command line. Note though that the \fBrg\fP command you run must +still be valid. That is, it must always contain at least one pattern at the +command line, even if the configuration file uses the \fB-e/--regexp\fP flag. +.sp +For example, if your ripgreprc file contained a single line: +.sp +.EX + \-\-smart\-case +.EE +.sp +then the following command +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-smart-case foo +.EE +.sp +Another example is adding types, like so: +.sp +.EX + \-\-type-add + web:*.{html,css,js}* +.EE +.sp +The above would behave identically to the following command: +.sp +.EX + rg \-\-type\-add 'web:*.{html,css,js}*' foo +.EE +.sp +The same applies to using globs. This: +.sp +.EX + \-\-glob=!.git +.EE +.sp +or this: +.sp +.EX + \-\-glob + !.git +.EE +.sp +would behave identically to the following command: +.sp +.EX + rg \-\-glob '!.git' foo +.EE +.sp +The bottom line is that every shell argument needs to be on its own line. So +for example, a config file containing +.sp +.EX + \-j 4 +.EE +is probably not doing what you intend. Instead, you want +.sp +.EX + \-j + 4 +.EE +.sp +ripgrep also provides a flag, \fB--no-config\fP, that when present will +suppress any and all support for configuration. This includes any future +support for auto-loading configuration files from pre-determined paths. +.sp +Conflicts between configuration files and explicit arguments are handled +exactly like conflicts in the same command line invocation. That is, +this command: +.sp +.EX + RIPGREP_CONFIG_PATH=wherever/.ripgreprc rg foo \-\-case\-sensitive +.EE +.sp +is exactly equivalent to +.sp +.EX + rg \-\-smart\-case foo \-\-case\-sensitive +.EE +.sp +in which case, the \fB--case-sensitive\fP flag would override the +\fB--smart-case\fP flag. +. +. +.SH SHELL COMPLETION +Shell completion files are included in the release tarball for Bash, Fish, Zsh +and PowerShell. +.sp +For \fBbash\fP, move \fBrg.bash\fP to \fB$XDG_CONFIG_HOME/bash_completion\fP or +\fB/etc/bash_completion.d/\fP. +.sp +For \fBfish\fP, move \fBrg.fish\fP to \fB$HOME/.config/fish/completions\fP. +.sp +For \fBzsh\fP, move \fB_rg\fP to one of your \fB$fpath\fP directories. +. +. +.SH CAVEATS +ripgrep may abort unexpectedly when using default settings if it searches a +file that is simultaneously truncated. This behavior can be avoided by passing +the \fB--no-mmap\fP flag which will forcefully disable the use of memory maps +in all cases. +.sp +ripgrep may use a large amount of memory depending on a few factors. Firstly, +if ripgrep uses parallelism for search (the default), then the entire +output for each individual file is buffered into memory in order to prevent +interleaving matches in the output. To avoid this, you can disable parallelism +with the \fB-j1\fP flag. Secondly, ripgrep always needs to have at least a +single line in memory in order to execute a search. A file with a very long +line can thus cause ripgrep to use a lot of memory. Generally, this only occurs +when searching binary data with the \fB-a/--text\fP flag enabled. (When the +\fB-a/--test\fP flag isn't enabled, ripgrep will replace all NUL bytes with +line terminators, which typically prevents exorbitant memory usage.) Thirdly, +when ripgrep searches a large file using a memory map, the process will likely +report its resident memory usage as the size of the file. However, this does +not mean ripgrep actually needed to use that much heap memory; the operating +system will generally handle this for you. +. +. +.SH VERSION +!!VERSION!! +. +. +.SH HOMEPAGE +\fIhttps://github.com/BurntSushi/ripgrep\fP +.sp +Please report bugs and feature requests to the issue tracker. Please do your +best to provide a reproducible test case for bugs. This should include the +corpus being searched, the \fBrg\fP command, the actual output and the expected +output. Please also include the output of running the same \fBrg\fP command but +with the \fB--debug\fP flag. +.sp +If you have questions that don't obviously fall into the "bug" or "feature +request" category, then they are welcome in the Discussions section of the +issue tracker: \fIhttps://github.com/BurntSushi/ripgrep/discussions\fP. +. +. +.SH AUTHORS +Andrew Gallant <\fIjamslam@gmail.com\fP> diff --git a/crates/core/flags/doc/template.short.help b/crates/core/flags/doc/template.short.help new file mode 100644 index 0000000000..871590b786 --- /dev/null +++ b/crates/core/flags/doc/template.short.help @@ -0,0 +1,27 @@ +ripgrep !!VERSION!! +Andrew Gallant + +ripgrep (rg) recursively searches the current directory for a regex pattern. +By default, ripgrep will respect gitignore rules and automatically skip hidden +files/directories and binary files. + +Use -h for short descriptions and --help for more details. + +Project home page: https://github.com/BurntSushi/ripgrep + +USAGE: + rg [OPTIONS] PATTERN [PATH ...] + rg [OPTIONS] -e PATTERN ... [PATH ...] + rg [OPTIONS] -f PATTERNFILE ... [PATH ...] + rg [OPTIONS] --files [PATH ...] + rg [OPTIONS] --type-list + command | rg [OPTIONS] PATTERN + rg [OPTIONS] --help + rg [OPTIONS] --version + +POSITIONAL ARGUMENTS: + A regular expression used for searching. + ... A file or directory to search. + +OPTIONS: +!!options!! diff --git a/crates/core/flags/mod.rs b/crates/core/flags/mod.rs new file mode 100644 index 0000000000..369d013c1c --- /dev/null +++ b/crates/core/flags/mod.rs @@ -0,0 +1,77 @@ +use std::ffi::OsString; + +pub(crate) use crate::flags::{ + args::{ + Args, BinaryMode, BufferMode, CaseMode, ColorChoice, ContextMode, + ContextSeparator, CountMode, EncodingMode, EngineChoice, + FieldContextSeparator, FieldMatchSeparator, FileMatchMode, + LoggingMode, MmapMode, PatternSource, SortMode, SortModeKind, + TypeChoice, + }, + complete::{ + bash::generate as generate_complete_bash, + fish::generate as generate_complete_fish, + powershell::generate as generate_complete_powershell, + }, + doc::{ + help::{ + generate_long as generate_help_long, + generate_short as generate_help_short, + }, + man::generate as generate_man_page, + }, + parse::parse, +}; + +mod args; +mod complete; +mod defs; +mod doc; +mod parse; + +trait Flag: std::fmt::Debug + 'static { + fn is_switch(&self) -> bool; + fn name_short(&self) -> Option { + None + } + fn name_long(&self) -> &'static str; + fn name_negated(&self) -> Option<&'static str> { + None + } + fn doc_variable(&self) -> Option<&'static str> { + None + } + fn doc_category(&self) -> &'static str; + fn doc_short(&self) -> &'static str; + fn doc_long(&self) -> &'static str; + fn doc_choices(&self) -> &'static [&'static str] { + &[] + } + + fn update(&self, value: FlagValue, args: &mut Args) -> anyhow::Result<()>; +} + +enum FlagValue { + Switch(bool), + Value(OsString), +} + +impl FlagValue { + fn unwrap_switch(self) -> bool { + match self { + FlagValue::Switch(yes) => yes, + FlagValue::Value(_) => { + unreachable!("got flag value but expected switch") + } + } + } + + fn unwrap_value(self) -> OsString { + match self { + FlagValue::Switch(yes) => { + unreachable!("got switch but expected flag value") + } + FlagValue::Value(v) => v, + } + } +} diff --git a/crates/core/flags/parse.rs b/crates/core/flags/parse.rs new file mode 100644 index 0000000000..675eeca12b --- /dev/null +++ b/crates/core/flags/parse.rs @@ -0,0 +1,195 @@ +use std::ffi::OsString; + +use { + aho_corasick::{AhoCorasick, Anchored, Input, Match, PatternID}, + anyhow::Context, +}; + +use crate::flags::{defs::FLAGS, Args, Flag, FlagValue}; + +pub(crate) fn parse( + rawargs: impl IntoIterator>, +) -> anyhow::Result { + let mut args = Args::default(); + Parser::new().parse(rawargs, &mut args)?; + Ok(args) +} + +/// Return the metadata for the flag of the given name. +pub(super) fn lookup(name: &str) -> Option<&'static dyn Flag> { + // N.B. Creating a new parser might look expensive, but it only builds + // the lookup trie exactly once. That is, we get a `&'static Parser` from + // `Parser::new()`. + match Parser::new().find_long(name) { + FlagLookup::Match(FlagMatch { flag, .. }) => Some(flag), + _ => None, + } +} + +#[derive(Debug)] +struct Parser { + short: Vec>, + long: AhoCorasick, + negated: AhoCorasick, +} + +impl Parser { + fn new() -> &'static Parser { + use std::sync::OnceLock; + + // Since a parser's state is immutable and completely determined by + // FLAGS, and since FLAGS is a constant, we can initialize it exactly + // once. + static P: OnceLock = OnceLock::new(); + P.get_or_init(|| { + // let start = std::time::Instant::now(); + let mut short = vec![None; 128]; + for (i, flag) in FLAGS.iter().enumerate() { + let Some(name) = flag.name_short() else { continue }; + let pid = PatternID::new(i).expect("small number of flags"); + short[usize::from(name)] = Some(pid); + } + let long = AhoCorasick::builder() + .kind(Some(aho_corasick::AhoCorasickKind::DFA)) + .match_kind(aho_corasick::MatchKind::LeftmostLongest) + .start_kind(aho_corasick::StartKind::Anchored) + .prefilter(false) + .build(FLAGS.iter().map(|f| f.name_long())) + .expect("automaton of flag names should never fail"); + let negated = AhoCorasick::builder() + .kind(Some(aho_corasick::AhoCorasickKind::DFA)) + .match_kind(aho_corasick::MatchKind::LeftmostLongest) + .start_kind(aho_corasick::StartKind::Anchored) + .prefilter(false) + // Not all flags have negations. If one isn't present, we just + // use the empty string. The empty string will match in the + // case of an unrecognized flag, but since no valid flags are + // empty, we can treat such a case as a non-match. + .build(FLAGS.iter().map(|f| f.name_negated().unwrap_or(""))) + .expect("automaton of flag names should never fail"); + // let dur = std::time::Instant::now().duration_since(start); + // eprintln!("PARSER INIT TIME: {:?}", dur); + Parser { short, long, negated } + }) + } + + fn parse(&self, rawargs: I, args: &mut Args) -> anyhow::Result<()> + where + I: IntoIterator, + O: Into, + { + let mut p = lexopt::Parser::from_args(rawargs); + while let Some(arg) = p.next().context("invalid CLI arguments")? { + let lookup = match arg { + lexopt::Arg::Value(value) => { + args.positional.push(value); + continue; + } + lexopt::Arg::Short(ch) => self.find_short(ch), + lexopt::Arg::Long(name) => self.find_long(name), + }; + let mat = match lookup { + FlagLookup::Match(mat) => mat, + FlagLookup::UnrecognizedShort(name) => { + anyhow::bail!("unrecognized flag -{name}") + } + FlagLookup::UnrecognizedLong(name) => { + anyhow::bail!("unrecognized flag --{name}") + } + }; + let value = if mat.negated { + // Negated flags are always switches, even if the non-negated + // flag is not. For example, --context-separator accepts a + // value, but --no-context-separator does not. + FlagValue::Switch(false) + } else if mat.flag.is_switch() { + FlagValue::Switch(true) + } else { + FlagValue::Value(p.value().with_context(|| { + format!("missing value for flag {mat}") + })?) + }; + mat.flag + .update(value, args) + .with_context(|| format!("error parsing flag {mat}"))?; + } + Ok(()) + } + + fn find_short(&self, ch: char) -> FlagLookup { + if !ch.is_ascii() { + return FlagLookup::UnrecognizedShort(ch); + } + let byte = u8::try_from(ch).unwrap(); + let Some(index) = self.short[usize::from(byte)] else { + return FlagLookup::UnrecognizedShort(ch); + }; + let flag = FLAGS[index]; + FlagLookup::Match(FlagMatch { flag, name: Err(ch), negated: false }) + } + + fn find_long(&self, name: &str) -> FlagLookup { + if let Some(mat) = find_full_non_empty(&self.long, name) { + let flag = FLAGS[mat.pattern()]; + FlagLookup::Match(FlagMatch { + flag, + name: Ok(flag.name_long()), + negated: false, + }) + } else if let Some(mat) = find_full_non_empty(&self.negated, name) { + let flag = FLAGS[mat.pattern()]; + FlagLookup::Match(FlagMatch { + flag, + name: Ok(flag.name_negated().unwrap()), + negated: true, + }) + } else { + FlagLookup::UnrecognizedLong(name.to_string()) + } + } +} + +#[derive(Debug)] +enum FlagLookup { + Match(FlagMatch), + UnrecognizedShort(char), + UnrecognizedLong(String), +} + +#[derive(Debug)] +struct FlagMatch { + flag: &'static dyn Flag, + name: Result<&'static str, char>, + negated: bool, +} + +impl std::fmt::Display for FlagMatch { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self.name { + Ok(long) => write!(f, "--{long}"), + Err(short) => write!(f, "-{short}"), + } + } +} + +/// Look for a match of `name` in the given Aho-Corasick automaton. +/// +/// This only returns a match if the one found has a length equivalent to the +/// length of the name given. +fn find_full_non_empty(ac: &AhoCorasick, name: &str) -> Option { + let input = Input::new(name).anchored(Anchored::Yes); + let m = ac.find(input)?; + // The match must match the name completely. Consider, for example, if the + // name given is `colors`. If the `colors` flag didn't appear in the FLAGS + // list for some reason, then `colors` would match the `color` flag. + // + // Additionally, if the given name represents a flag that truly does not + // exist, then it will still match one of the empty string patterns in the + // Aho-Corasick automaton. (If a flag doesn't have a negation, we add an + // empty string to the automaton so that the pattern IDs all line up.) + // Thus, matching the empty string is equivalent to a non-match. + if m.range().len() != name.len() { + return None; + } + Some(m) +} diff --git a/crates/core/main.rs b/crates/core/main.rs index 7cc59dd977..92f999a519 100644 --- a/crates/core/main.rs +++ b/crates/core/main.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] + use std::{ io::{self, Write}, time::Instant, @@ -13,6 +15,7 @@ mod messages; mod app; mod args; mod config; +mod flags; mod logger; mod search; mod subject; diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index abbc0209ea..ff1bea5931 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -124,7 +124,7 @@ impl BinaryDetection { /// source data from an encoding to UTF-8 before searching. /// /// An `Encoding` will always be cheap to clone. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Encoding(&'static encoding_rs::Encoding); impl Encoding {