From 8dac8968915957d68e46f3307765816574780fc3 Mon Sep 17 00:00:00 2001 From: Christopher Harrison Date: Mon, 14 Aug 2023 09:44:33 +0100 Subject: [PATCH] CLI Changes Sub-PR: New structure and parsing logic (#591) * Add env feature to clap and cargo update * Dead Code: WIP CLI changes * Dead Code: CLI feature parity Although see clap-rs/clap#5020 * Note about not (yet) using infer_subcommands See clap-rs/clap#5021 * Use clap/wrap_help Although see clap-rs/clap#5022 * WIP: Strip out previous CLI argument parser setup [skip ci] * WIP: Normalise arguments for caller [skip ci] * Don't check for file-ness in the CLI argument parser * File and directory canonicalisation for the argument parser * Expose CLI types so they can be used downstream --- Cargo.lock | 83 +++++++------ topiary-cli/Cargo.toml | 3 +- topiary-cli/src/cli.rs | 198 +++++++++++++++++++++++++++++++ topiary-cli/src/configuration.rs | 14 +++ topiary-cli/src/main.rs | 85 +------------ topiary-cli/src/visualisation.rs | 23 ++++ topiary-cli/src/visualise.rs | 24 ---- 7 files changed, 288 insertions(+), 142 deletions(-) create mode 100644 topiary-cli/src/cli.rs create mode 100644 topiary-cli/src/visualisation.rs delete mode 100644 topiary-cli/src/visualise.rs diff --git a/Cargo.lock b/Cargo.lock index 52408e58..fc53f59d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -92,9 +92,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ "anstyle", "bstr", @@ -124,7 +124,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -230,9 +230,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.11" +version = "4.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" +checksum = "74bb1b4028935821b2d6b439bba2e970bdcf740832732437ead910c632e30d7d" dependencies = [ "clap_builder", "clap_derive", @@ -241,26 +241,27 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.11" +version = "4.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" +checksum = "5ae467cbb0111869b765e13882a1dbbd6cb52f58203d8b80c44f667d4dd19843" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -505,7 +506,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -646,9 +647,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "js-sys" @@ -852,18 +853,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.64" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.29" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ "proc-macro2", ] @@ -983,9 +984,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -998,9 +999,9 @@ dependencies = [ [[package]] name = "scopeguard" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" @@ -1028,14 +1029,14 @@ checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] name = "serde_json" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5062a995d481b2308b6064e9af76011f2921c35f97b0468811ed9f6cd91dfed" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" dependencies = [ "itoa", "ryu", @@ -1079,9 +1080,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.25" +version = "2.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e3fc8c0c74267e2df136e5e5fb656a464158aa57624053375eb9c8c6e25ae2" +checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" dependencies = [ "proc-macro2", "quote", @@ -1111,6 +1112,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" +dependencies = [ + "rustix 0.37.23", + "windows-sys", +] + [[package]] name = "termtree" version = "0.4.1" @@ -1145,7 +1156,7 @@ checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -1179,7 +1190,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.25", + "syn 2.0.26", ] [[package]] @@ -1229,9 +1240,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.19.12" +version = "0.19.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c500344a19072298cd05a7224b3c0c629348b78692bf48466c5238656e315a78" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" dependencies = [ "indexmap", "serde", @@ -1353,9 +1364,9 @@ dependencies = [ [[package]] name = "tree-sitter-ocaml" -version = "0.20.3" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0615dd8d80e7f5fe49875aeae795726ca5dc37267f80f82a60dfa375657906b" +checksum = "fd1163abc658cf8ae0ecffbd8f4bd3ee00a2b98729de74f3b08f0e24f3ac208a" dependencies = [ "cc", "tree-sitter", @@ -1408,9 +1419,9 @@ checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" [[package]] name = "unicode-ident" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-width" @@ -1635,9 +1646,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.9" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529" +checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7" dependencies = [ "memchr", ] diff --git a/topiary-cli/Cargo.toml b/topiary-cli/Cargo.toml index aab330e1..fff96eb6 100644 --- a/topiary-cli/Cargo.toml +++ b/topiary-cli/Cargo.toml @@ -26,7 +26,8 @@ path = "src/main.rs" [dependencies] # For now we just load the tree-sitter language parsers statically. # Eventually we will want to dynamically load them, like Helix does. -clap = { workspace = true, features = ["derive"] } +# NOTE clap/wrap_help isn't perfect (see clap-rs/clap#5022) +clap = { workspace = true, features = ["derive", "env", "wrap_help"] } env_logger = { workspace = true } futures = { workspace = true } directories = { workspace = true } diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs new file mode 100644 index 00000000..04ff9b0e --- /dev/null +++ b/topiary-cli/src/cli.rs @@ -0,0 +1,198 @@ +//! Command line interface argument parsing. + +use clap::{ArgGroup, Args, Parser, Subcommand}; +use std::path::PathBuf; +use topiary::SupportedLanguage; + +use crate::{ + configuration, + error::{CLIResult, TopiaryError}, + visualisation, +}; + +#[derive(Debug, Parser)] +// NOTE infer_subcommands would be useful, but our heavy use of aliases is problematic (see +// clap-rs/clap#5021) +#[command(about, author, long_about = None, version)] +pub struct Cli { + // Global options + #[command(flatten)] + pub global: GlobalArgs, + + // Subcommands + #[command(subcommand)] + pub command: Commands, +} + +// These are "true" global arguments that are relevant to all subcommands +// NOTE Global arguments must be optional, even when defaults are specified +#[derive(Args, Debug)] +pub struct GlobalArgs { + /// Configuration file + #[arg( + short = 'C', + long, + display_order = 100, + env = "TOPIARY_CONFIG_FILE", + global = true, + hide_env_values = true + )] + pub configuration: Option, + + /// Configuration collation mode + #[arg( + long, + default_value = "coalesce", + display_order = 101, + env = "TOPIARY_CONFIG_COLLATION", + global = true, + hide_env_values = true, + + // FIXME There appears to be a bug with clap: If this argument is specified via its + // environment variable, then the required argument (--configuration) *only* works if it is + // also specified via its environment variable. If you use the CLI argument, it complains + // that the argument doesn't exist. This behaviour only occurs with subcommands, but that + // is exactly our use case, here. (See clap-rs/clap#5020) + requires = "configuration" + )] + pub configuration_collation: Option, +} + +// These are "parser" global arguments; i.e., those that are relevant to all subcommands that will +// parse input. They will need to be added to all such subcommands, with #[command(flatten)]. +#[derive(Args, Debug)] +pub struct ParseArgs { + /// Consume as much as possible in the presence of parsing errors + #[arg(short, long)] + tolerate_parsing_errors: bool, +} + +#[derive(Debug, Subcommand)] +pub enum Commands { + /// Format inputs + // NOTE FILES... => Read input(s) from disk, format in place + // --language | --query => Read input from stdin, output to stdout + #[command( + alias = "format", + display_order = 1, + + // Require exactly one of --language, --query, or FILES... + group = ArgGroup::new("source") + .multiple(false) + .required(true) + .args(&["language", "query", "files"]) + )] + Fmt { + #[command(flatten)] + parse: ParseArgs, + + /// Do not check that formatting twice gives the same output + #[arg(short, long)] + skip_idempotence: bool, + + /// Topiary supported language (for formatting stdin) + #[arg(short, long)] + language: Option, + + /// Topiary query file (for formatting stdin) + #[arg(short, long)] + query: Option, + + /// Input files and directories (omit to read from stdin) + files: Vec, + }, + + /// Visualise the input's Tree-sitter parse tree + // NOTE FILE => Read input from disk, visualisation output to stdout + // --language | --query => Read input from stdin, visualisation output to stdout + #[command( + aliases = &["visualise", "visualize", "view"], + display_order = 2, + + // Require exactly one of --language, --query, or FILE + group = ArgGroup::new("source") + .multiple(false) + .required(true) + .args(&["language", "query", "file"]) + )] + Vis { + #[command(flatten)] + parse: ParseArgs, + + /// Visualisation format + #[arg(short, long, default_value = "dot")] + format: visualisation::Format, + + /// Topiary supported language (for formatting stdin) + #[arg(short, long)] + language: Option, + + /// Topiary query file (for formatting stdin) + #[arg(short, long)] + query: Option, + + /// Input file (omit to read from stdin) + file: Option, + }, + + /// Print the current configuration + #[command(alias = "config", display_order = 3)] + Cfg, +} + +/// Given a vector of paths, recursively expand those that identify as directories, in place +fn traverse_fs(files: &mut Vec) -> CLIResult<()> { + let mut expanded = vec![]; + + for file in &mut *files { + if file.is_dir() { + let mut subfiles = file.read_dir()?.flatten().map(|f| f.path()).collect(); + traverse_fs(&mut subfiles)?; + expanded.append(&mut subfiles); + } else { + expanded.push(file.to_path_buf()); + } + } + + *files = expanded; + Ok(()) +} + +/// Parse CLI arguments and normalise them for the caller +pub fn get_args() -> CLIResult { + let mut args = Cli::parse(); + + // NOTE We do not check that input files are actual files (with Path::is_file), because that + // would break in the case of, for example, named pipes; thus also adding a platform dimension + // to the check, which is simply not worth the complexity. We _could_ check by opening each + // file, but that's going to be done sooner-or-later by Topiary, so there's no need. + + match &mut args.command { + Commands::Fmt { files, .. } => { + // If we're given a list of FILES... then we assume them to all be on disk, even if "-" + // is passed as an argument (i.e., interpret this as a valid filename, rather than as + // stdin). We deduplicate this list to avoid formatting the same file multiple times + // and recursively expand directories until we're left with a list of unique + // (potential) files as input sources. + files.sort_unstable(); + files.dedup(); + traverse_fs(files)?; + } + + Commands::Vis { + file: Some(file), .. + } => { + // Make sure our FILE is not a directory + if file.is_dir() { + return Err(TopiaryError::Bin( + format!("Cannot visualise directory \"{}\"; please provide a single file from disk or stdin.", file.to_string_lossy()), + None, + )); + } + } + + _ => {} + } + + Ok(args) +} diff --git a/topiary-cli/src/configuration.rs b/topiary-cli/src/configuration.rs index 0dcc8781..f0288914 100644 --- a/topiary-cli/src/configuration.rs +++ b/topiary-cli/src/configuration.rs @@ -1,9 +1,23 @@ +use clap::ValueEnum; use directories::ProjectDirs; use std::{env::current_dir, path::PathBuf}; use topiary::{default_configuration_toml, Configuration}; use crate::error::{CLIResult, TopiaryError}; +/// Collation mode for configuration values +#[derive(Clone, Debug, ValueEnum)] +pub enum CollationMode { + /// When multiple sources of configuration are available, values are coalesced. That is, new + /// values are added to the final configuration, whereas existing values are overridden when + /// higher priority versions are specified. + Coalesce, + + /// When multiple sources of configuration are available, the highest priority source is taken. + /// Values from lower priority sources are discarded. + Override, +} + pub fn parse_configuration( config_override: Option, config_file: Option, diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 9b329d44..12b325eb 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -1,7 +1,8 @@ +mod cli; mod configuration; mod error; mod output; -mod visualise; +mod visualisation; use std::{ eprintln, @@ -12,91 +13,13 @@ use std::{ process::ExitCode, }; -use clap::{ArgGroup, Parser}; use configuration::parse_configuration; use crate::{ error::{CLIError, CLIResult, TopiaryError}, output::OutputFile, - visualise::Visualisation, }; -use topiary::{formatter, Language, Operation, SupportedLanguage, TopiaryQuery}; - -#[derive(Parser, Debug)] -#[command(author, version, about, long_about = None)] -// Require at least one of --language or --input-files (n.b., language > input) -#[command(group(ArgGroup::new("rule").multiple(true).required(true).args(&["language", "input_files"]),))] -struct Args { - /// Which language to parse and format - #[arg(short, long, value_enum, display_order = 1)] - language: Option, - - /// Path to an input file or multiple input files. If omitted, or equal - /// to "-", read from standard input. If multiple files are provided, - /// `in_place` is assumed. - #[arg(short = 'f', long, num_args = 0.., display_order = 2, default_values_t = ["-".to_string()])] - input_files: Vec, - - /// Which query file to use - #[arg(short, long, display_order = 3)] - query: Option, - - /// Path to an output file. If omitted, or equal to "-", write to standard - /// output. - #[arg(short, long, display_order = 4)] - output_file: Option, - - /// Format the input files in place. - #[arg(short, long, requires = "input_files", display_order = 5)] - in_place: bool, - - /// Visualise the syntax tree, rather than format. - #[arg( - short, - long, - value_enum, - aliases = &["view", "visualize"], - value_name = "OUTPUT_FORMAT", - conflicts_with_all = &["in_place", "skip_idempotence"], - require_equals = true, - num_args = 0..=1, - default_missing_value = "json", - display_order = 6 - )] - visualise: Option, - - /// Do not check that formatting twice gives the same output - #[arg(short, long, display_order = 7)] - skip_idempotence: bool, - - /// Output the full configuration to stderr before continuing - #[arg(long, display_order = 8)] - output_configuration: bool, - - /// Format as much as possible even if some of the input causes parsing errors - #[arg(short, long, display_order = 9)] - tolerate_parsing_errors: bool, - - /// Override all configuration with the provided file - #[arg(long, env = "TOPIARY_CONFIGURATION_OVERRIDE", display_order = 10)] - configuration_override: Option, - - /// Add the specified configuration file with the highest prority - #[arg(short, long, env = "TOPIARY_CONFIGURATION_FILE", display_order = 11)] - configuration_file: Option, -} - -// /// Collects all the values needed for the eventual formatting. This helper -// /// struct just makes it easy to collect them all in a Vec. -// /// If `--in-place` was specified or if `--input-files` was -// /// given more than one file, the input and output will be the same file and -// /// the entire FormatStruct will be placed in a Vector. In all other cases the vector -// /// will still be created, but it will be a singleton vector. -// struct FormatStruct<'a> { -// input: &'a dyn Read, -// output: BufWriter, -// language: &'a Language, -// } +use topiary::{formatter, Language, Operation, TopiaryQuery}; #[tokio::main] async fn main() -> ExitCode { @@ -113,7 +36,7 @@ async fn run() -> CLIResult<()> { // Restructure Args to match our expected behaviour let args = { - let mut args = Args::parse(); + let mut args = cli::parse(); // Remove duplicates from the input_files, (among other things, avoids being able to pass "-" twice) args.input_files.sort_unstable(); diff --git a/topiary-cli/src/visualisation.rs b/topiary-cli/src/visualisation.rs new file mode 100644 index 00000000..5defe40f --- /dev/null +++ b/topiary-cli/src/visualisation.rs @@ -0,0 +1,23 @@ +use clap::ValueEnum; +use topiary::Visualisation; + +/// Visualisation output formats for Tree-sitter parse trees +// NOTE While redundant, we cannot implement clap::ValueEnum for topiary::Visualisation without +// breaking the orphan rules. So we have to maintain a local copy for the sake of the CLI. +#[derive(Clone, Debug, ValueEnum)] +pub enum Format { + /// GraphViz DOT serialisation + Dot, + + /// JSON serialisation + Json, +} + +impl From for Visualisation { + fn from(visualisation: Format) -> Self { + match visualisation { + Format::Dot => Self::GraphViz, + Format::Json => Self::Json, + } + } +} diff --git a/topiary-cli/src/visualise.rs b/topiary-cli/src/visualise.rs deleted file mode 100644 index f3686e68..00000000 --- a/topiary-cli/src/visualise.rs +++ /dev/null @@ -1,24 +0,0 @@ -// This is somewhat redundant, but we cannot implement clap::ValueEnum for topiary::Visualisation -// without breaking the orphan rules. So we have to maintain a local copy for the sake of the CLI. - -use clap::ValueEnum; - -#[derive(ValueEnum, Clone, Copy, Debug)] -pub enum Visualisation { - // JSON is first as it's the default and - // we want it displayed first in the help - Json, - - // All other output formats should be listed - // in alphabetical order - Dot, -} - -impl From for topiary::Visualisation { - fn from(visualisation: Visualisation) -> Self { - match visualisation { - Visualisation::Dot => Self::GraphViz, - Visualisation::Json => Self::Json, - } - } -}