From 562330d0e2828ba276b76a753d524b0fba7311f6 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Thu, 31 Aug 2023 12:58:06 -0700
Subject: [PATCH] Require Wasmtime options come before wasm modules

This commit implements a new behavior for the CLI of the `wasmtime`
executable which will require that options for Wasmtime itself come
before the wasm module being run. Currently they're allowed to come
afterwards, but instead all arguments and flags coming after a module
will be interpreted as arguments for the module itself.

This feature has a bit of a storied history at this point, and the
breadcrumbs are:

* Originally landed in #6737
* Reverted for 12.0.0 in #6830
* Reverted for 13.0.0 in #6944

This PR is intended to be landed as a sibling of #6925, another
independent overhaul of Wasmtime's own options on the CLI, for the
Wasmtime 14.0.0 release. More information about the motivation for this
change, as well as consequences of the fallout, can be found on #6737.
---
 src/bin/wasmtime.rs    |  46 +++++++++++--------
 src/commands/run.rs    | 102 ++++++++++++++++++-----------------------
 tests/all/cli_tests.rs |  28 ++++-------
 3 files changed, 82 insertions(+), 94 deletions(-)

diff --git a/src/bin/wasmtime.rs b/src/bin/wasmtime.rs
index 6d425625dfa4..d32a0b63f373 100644
--- a/src/bin/wasmtime.rs
+++ b/src/bin/wasmtime.rs
@@ -4,7 +4,7 @@
 //! See `wasmtime --help` for usage.
 
 use anyhow::Result;
-use clap::{error::ErrorKind, Parser};
+use clap::Parser;
 use wasmtime_cli::commands::{
     CompileCommand, ConfigCommand, ExploreCommand, RunCommand, SettingsCommand, WastCommand,
 };
@@ -27,10 +27,24 @@ use wasmtime_cli::commands::{
                   \n\
                   Invoking a specific function (e.g. `add`) in a WebAssembly module:\n\
                   \n  \
-                  wasmtime example.wasm --invoke add 1 2\n"
+                  wasmtime example.wasm --invoke add 1 2\n",
+
+    // This option enables the pattern below where we ask clap to parse twice
+    // sorta: once where it's trying to find a subcommand and once assuming
+    // a subcommand doesn't get passed. Clap should then, apparently,
+    // fill in the `subcommand` if found and otherwise fill in the
+    // `RunCommand`.
+    args_conflicts_with_subcommands = true
 )]
-enum Wasmtime {
-    // !!! IMPORTANT: if subcommands are added or removed, update `parse_module` in `src/commands/run.rs`. !!!
+struct Wasmtime {
+    #[clap(subcommand)]
+    subcommand: Option<Subcommand>,
+    #[clap(flatten)]
+    run: RunCommand,
+}
+
+#[derive(Parser)]
+enum Subcommand {
     /// Controls Wasmtime configuration settings
     Config(ConfigCommand),
     /// Compiles a WebAssembly module.
@@ -48,26 +62,20 @@ enum Wasmtime {
 impl Wasmtime {
     /// Executes the command.
     pub fn execute(self) -> Result<()> {
-        match self {
-            Self::Config(c) => c.execute(),
-            Self::Compile(c) => c.execute(),
-            Self::Explore(c) => c.execute(),
-            Self::Run(c) => c.execute(),
-            Self::Settings(c) => c.execute(),
-            Self::Wast(c) => c.execute(),
+        let subcommand = self.subcommand.unwrap_or(Subcommand::Run(self.run));
+        match subcommand {
+            Subcommand::Config(c) => c.execute(),
+            Subcommand::Compile(c) => c.execute(),
+            Subcommand::Explore(c) => c.execute(),
+            Subcommand::Run(c) => c.execute(),
+            Subcommand::Settings(c) => c.execute(),
+            Subcommand::Wast(c) => c.execute(),
         }
     }
 }
 
 fn main() -> Result<()> {
-    Wasmtime::try_parse()
-        .unwrap_or_else(|e| match e.kind() {
-            ErrorKind::InvalidSubcommand | ErrorKind::UnknownArgument => {
-                Wasmtime::Run(RunCommand::parse())
-            }
-            _ => e.exit(),
-        })
-        .execute()
+    Wasmtime::parse().execute()
 }
 
 #[test]
diff --git a/src/commands/run.rs b/src/commands/run.rs
index 398b9bf686dc..3a3f080c9dca 100644
--- a/src/commands/run.rs
+++ b/src/commands/run.rs
@@ -6,11 +6,8 @@
 )]
 
 use anyhow::{anyhow, bail, Context as _, Error, Result};
-use clap::builder::{OsStringValueParser, TypedValueParser};
 use clap::Parser;
 use once_cell::sync::Lazy;
-use std::ffi::OsStr;
-use std::ffi::OsString;
 use std::fs::File;
 use std::io::Write;
 use std::path::{Path, PathBuf};
@@ -38,18 +35,6 @@ use wasmtime_wasi_threads::WasiThreadsCtx;
 // #[cfg(feature = "wasi-http")]
 // use wasmtime_wasi_http::WasiHttpCtx;
 
-fn parse_module(s: OsString) -> anyhow::Result<PathBuf> {
-    // Do not accept wasmtime subcommand names as the module name
-    match s.to_str() {
-        Some("help") | Some("config") | Some("run") | Some("wast") | Some("compile") => {
-            bail!("module name cannot be the same as a subcommand")
-        }
-        #[cfg(unix)]
-        Some("-") => Ok(PathBuf::from("/dev/stdin")),
-        _ => Ok(s.into()),
-    }
-}
-
 fn parse_env_var(s: &str) -> Result<(String, Option<String>)> {
     let mut parts = s.splitn(2, '=');
     Ok((
@@ -118,7 +103,7 @@ static AFTER_HELP: Lazy<String> = Lazy::new(|| crate::FLAG_EXPLANATIONS.to_strin
 
 /// Runs a WebAssembly module
 #[derive(Parser)]
-#[structopt(name = "run", trailing_var_arg = true, after_help = AFTER_HELP.as_str())]
+#[structopt(name = "run", after_help = AFTER_HELP.as_str())]
 pub struct RunCommand {
     #[clap(flatten)]
     common: CommonOptions,
@@ -192,14 +177,6 @@ pub struct RunCommand {
     #[clap(long = "wasi-nn-graph", value_name = "FORMAT::HOST_DIR", value_parser = parse_graphs)]
     graphs: Vec<(String, String)>,
 
-    /// The path of the WebAssembly module to run
-    #[clap(
-        required = true,
-        value_name = "MODULE",
-        value_parser = OsStringValueParser::new().try_map(parse_module),
-    )]
-    module: PathBuf,
-
     /// Load the given WebAssembly module before the main module
     #[clap(
         long = "preload",
@@ -243,11 +220,6 @@ pub struct RunCommand {
     #[clap(long = "coredump-on-trap", value_name = "PATH")]
     coredump_on_trap: Option<String>,
 
-    // NOTE: this must come last for trailing varargs
-    /// The arguments to pass to the module
-    #[clap(value_name = "ARGS")]
-    module_args: Vec<String>,
-
     /// Maximum size, in bytes, that a linear memory is allowed to reach.
     ///
     /// Growth beyond this limit will cause `memory.grow` instructions in
@@ -286,6 +258,14 @@ pub struct RunCommand {
     #[clap(long)]
     wmemcheck: bool,
 
+    /// The WebAssembly module to run and arguments to pass to it.
+    ///
+    /// Arguments passed to the wasm module will be configured as WASI CLI
+    /// arguments unless the `--invoke` CLI argument is passed in which case
+    /// arguments will be interpreted as arguments to the function specified.
+    #[clap(value_name = "WASM", trailing_var_arg = true, required = true)]
+    module_and_args: Vec<PathBuf>,
+
     /// Indicates that the implementation of WASI preview1 should be backed by
     /// the preview2 implementation for components.
     ///
@@ -357,7 +337,7 @@ impl RunCommand {
         let engine = Engine::new(&config)?;
 
         // Read the wasm module binary either as `*.wat` or a raw binary.
-        let main = self.load_module(&engine, &self.module)?;
+        let main = self.load_module(&engine, &self.module_and_args[0])?;
 
         // Validate coredump-on-trap argument
         if let Some(coredump_path) = self.coredump_on_trap.as_ref() {
@@ -449,8 +429,12 @@ impl RunCommand {
         // Load the main wasm module.
         match self
             .load_main_module(&mut store, &mut linker, &main, modules)
-            .with_context(|| format!("failed to run main module `{}`", self.module.display()))
-        {
+            .with_context(|| {
+                format!(
+                    "failed to run main module `{}`",
+                    self.module_and_args[0].display()
+                )
+            }) {
             Ok(()) => (),
             Err(e) => {
                 // Exit the process if Wasmtime understands the error;
@@ -499,27 +483,25 @@ impl RunCommand {
         Ok(listeners)
     }
 
-    fn compute_argv(&self) -> Vec<String> {
+    fn compute_argv(&self) -> Result<Vec<String>> {
         let mut result = Vec::new();
 
-        // Add argv[0], which is the program name. Only include the base name of the
-        // main wasm module, to avoid leaking path information.
-        result.push(
-            self.module
-                .components()
-                .next_back()
-                .map(|c| c.as_os_str())
-                .and_then(OsStr::to_str)
-                .unwrap_or("")
-                .to_owned(),
-        );
-
-        // Add the remaining arguments.
-        for arg in self.module_args.iter() {
-            result.push(arg.clone());
+        for (i, arg) in self.module_and_args.iter().enumerate() {
+            // For argv[0], which is the program name. Only include the base
+            // name of the main wasm module, to avoid leaking path information.
+            let arg = if i == 0 {
+                arg.components().next_back().unwrap().as_os_str()
+            } else {
+                arg.as_ref()
+            };
+            result.push(
+                arg.to_str()
+                    .ok_or_else(|| anyhow!("failed to convert {arg:?} to utf-8"))?
+                    .to_string(),
+            );
         }
 
-        result
+        Ok(result)
     }
 
     fn setup_epoch_handler(
@@ -528,7 +510,7 @@ impl RunCommand {
         modules: Vec<(String, Module)>,
     ) -> Box<dyn FnOnce(&mut Store<Host>)> {
         if let Some(Profile::Guest { path, interval }) = &self.profile {
-            let module_name = self.module.to_str().unwrap_or("<main module>");
+            let module_name = self.module_and_args[0].to_str().unwrap_or("<main module>");
             let interval = *interval;
             store.data_mut().guest_profiler =
                 Some(Arc::new(GuestProfiler::new(module_name, interval, modules)));
@@ -634,9 +616,10 @@ impl RunCommand {
             CliLinker::Core(linker) => {
                 // Use "" as a default module name.
                 let module = module.unwrap_core();
-                linker
-                    .module(&mut *store, "", &module)
-                    .context(format!("failed to instantiate {:?}", self.module))?;
+                linker.module(&mut *store, "", &module).context(format!(
+                    "failed to instantiate {:?}",
+                    self.module_and_args[0]
+                ))?;
 
                 // If a function to invoke was given, invoke it.
                 let func = if let Some(name) = &self.invoke {
@@ -695,7 +678,7 @@ impl RunCommand {
                  is experimental and may break in the future"
             );
         }
-        let mut args = self.module_args.iter();
+        let mut args = self.module_and_args.iter().skip(1);
         let mut values = Vec::new();
         for ty in ty.params() {
             let val = match args.next() {
@@ -708,6 +691,9 @@ impl RunCommand {
                     }
                 }
             };
+            let val = val
+                .to_str()
+                .ok_or_else(|| anyhow!("argument is not valid utf-8: {val:?}"))?;
             values.push(match ty {
                 // TODO: integer parsing here should handle hexadecimal notation
                 // like `0x0...`, but the Rust standard library currently only
@@ -765,7 +751,9 @@ impl RunCommand {
         if !err.is::<wasmtime::Trap>() {
             return err;
         }
-        let source_name = self.module.to_str().unwrap_or_else(|| "unknown");
+        let source_name = self.module_and_args[0]
+            .to_str()
+            .unwrap_or_else(|| "unknown");
 
         if let Err(coredump_err) = generate_coredump(&err, &source_name, coredump_path) {
             eprintln!("warning: coredump failed to generate: {}", coredump_err);
@@ -1002,7 +990,7 @@ impl RunCommand {
 
     fn set_preview1_ctx(&self, store: &mut Store<Host>) -> Result<()> {
         let mut builder = WasiCtxBuilder::new();
-        builder.inherit_stdio().args(&self.compute_argv())?;
+        builder.inherit_stdio().args(&self.compute_argv()?)?;
 
         for (key, value) in self.vars.iter() {
             let value = match value {
@@ -1034,7 +1022,7 @@ impl RunCommand {
 
     fn set_preview2_ctx(&self, store: &mut Store<Host>) -> Result<()> {
         let mut builder = preview2::WasiCtxBuilder::new();
-        builder.inherit_stdio().args(&self.compute_argv());
+        builder.inherit_stdio().args(&self.compute_argv()?);
 
         for (key, value) in self.vars.iter() {
             let value = match value {
diff --git a/tests/all/cli_tests.rs b/tests/all/cli_tests.rs
index f144dd933e66..29561129ce61 100644
--- a/tests/all/cli_tests.rs
+++ b/tests/all/cli_tests.rs
@@ -604,7 +604,6 @@ fn wasm_flags() -> Result<()> {
     let stdout = run_wasmtime(&[
         "run",
         "tests/all/cli_tests/print-arguments.wat",
-        "--",
         "--argument",
         "-for",
         "the",
@@ -620,7 +619,7 @@ fn wasm_flags() -> Result<()> {
             command\n\
         "
     );
-    let stdout = run_wasmtime(&["run", "tests/all/cli_tests/print-arguments.wat", "--", "-"])?;
+    let stdout = run_wasmtime(&["run", "tests/all/cli_tests/print-arguments.wat", "-"])?;
     assert_eq!(
         stdout,
         "\
@@ -628,7 +627,7 @@ fn wasm_flags() -> Result<()> {
             -\n\
         "
     );
-    let stdout = run_wasmtime(&["run", "tests/all/cli_tests/print-arguments.wat", "--", "--"])?;
+    let stdout = run_wasmtime(&["run", "tests/all/cli_tests/print-arguments.wat", "--"])?;
     assert_eq!(
         stdout,
         "\
@@ -649,6 +648,7 @@ fn wasm_flags() -> Result<()> {
         "\
             print-arguments.wat\n\
             --\n\
+            --\n\
             -a\n\
             b\n\
         "
@@ -658,37 +658,30 @@ fn wasm_flags() -> Result<()> {
 
 #[test]
 fn name_same_as_builtin_command() -> Result<()> {
-    // A bare subcommand shouldn't run successfully.
-    //
-    // This is ambiguous between a missing module argument and a module named
-    // `run` with no other options.
+    // a bare subcommand shouldn't run successfully
     let output = get_wasmtime_command()?
         .current_dir("tests/all/cli_tests")
         .arg("run")
         .output()?;
     assert!(!output.status.success());
 
-    // Currently even `--` isn't enough to disambiguate, so this still is an
-    // error.
-    //
-    // NB: this will change in Wasmtime 14 when #6737 is relanded.
+    // a `--` prefix should let everything else get interpreted as a wasm
+    // module and arguments, even if the module has a name like `run`
     let output = get_wasmtime_command()?
         .current_dir("tests/all/cli_tests")
         .arg("--")
         .arg("run")
         .output()?;
-    assert!(!output.status.success(), "expected failure got {output:#?}");
+    assert!(output.status.success(), "expected success got {output:#?}");
 
-    // Passing `--foo` options before the module is also not enough to
-    // disambiguate for now.
-    //
-    // NB: this will change in Wasmtime 14 when #6737 is relanded.
+    // Passing options before the subcommand should work and doesn't require
+    // `--` to disambiguate
     let output = get_wasmtime_command()?
         .current_dir("tests/all/cli_tests")
         .arg("--disable-cache")
         .arg("run")
         .output()?;
-    assert!(!output.status.success(), "expected failure got {output:#?}");
+    assert!(output.status.success(), "expected success got {output:#?}");
     Ok(())
 }
 
@@ -708,7 +701,6 @@ fn wasm_flags_without_subcommand() -> Result<()> {
     let output = get_wasmtime_command()?
         .current_dir("tests/all/cli_tests/")
         .arg("print-arguments.wat")
-        .arg("--")
         .arg("-foo")
         .arg("bar")
         .output()?;