diff --git a/src/build/arg/mod.rs b/src/build/arg/mod.rs index 690cccb4a879..4078a0331091 100644 --- a/src/build/arg/mod.rs +++ b/src/build/arg/mod.rs @@ -7,8 +7,6 @@ use std::cmp::{Ord, Ordering}; use std::env; use std::ffi::{OsStr, OsString}; use std::fmt::{self, Display, Formatter}; -#[cfg(not(any(target_os = "windows", target_arch = "wasm32")))] -use std::os::unix::ffi::OsStrExt; use std::rc::Rc; use std::str; @@ -20,8 +18,6 @@ use yaml_rust; // Internal use crate::build::UsageParser; use crate::util::Key; -#[cfg(any(target_os = "windows", target_arch = "wasm32"))] -use crate::util::OsStrExt3; use crate::INTERNAL_ERROR_MSG; type Validator = Rc Result<(), String>>; @@ -2265,7 +2261,7 @@ impl<'help> Arg<'help> { /// [`ArgMatches::is_present`]: ./struct.ArgMatches.html#method.is_present /// [`Arg::default_value_if`]: ./struct.Arg.html#method.default_value_if pub fn default_value(self, val: &'help str) -> Self { - self.default_value_os(OsStr::from_bytes(val.as_bytes())) + self.default_value_os(OsStr::new(val)) } /// Provides a default value in the exact same manner as [`Arg::default_value`] @@ -2382,8 +2378,8 @@ impl<'help> Arg<'help> { ) -> Self { self.default_value_if_os( arg_id, - val.map(str::as_bytes).map(OsStr::from_bytes), - OsStr::from_bytes(default.as_bytes()), + val.map(|v| OsStr::new(v)), + OsStr::new(default), ) } @@ -2496,13 +2492,13 @@ impl<'help> Arg<'help> { /// [`Arg::default_value`]: ./struct.Arg.html#method.default_value pub fn default_value_ifs( mut self, - ifs: &[(T, Option<&'help str>, &'help str)], + ifs: &'help [(T, std::option::Option<&'help str>, &'help str)], ) -> Self { for (arg, val, default) in ifs { self = self.default_value_if_os( arg, - val.map(str::as_bytes).map(OsStr::from_bytes), - OsStr::from_bytes(default.as_bytes()), + val.map(|v| OsStr::new(v)), + OsStr::new(default), ); } self diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 64072b32de38..22a09bc9bee2 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -23,9 +23,7 @@ use crate::parse::errors::Result as ClapResult; use crate::parse::features::suggestions; use crate::parse::Validator; use crate::parse::{ArgMatcher, SubCommand}; -use crate::util::{self, ChildGraph, Key, OsStrExt2, EMPTY_HASH}; -#[cfg(all(feature = "debug", any(target_os = "windows", target_arch = "wasm32")))] -use crate::util::OsStrExt3; +use crate::util::{self, ChildGraph, Key, OsStrOps, EMPTY_HASH}; use crate::INTERNAL_ERROR_MSG; use crate::INVALID_UTF8; @@ -385,6 +383,7 @@ where let mut pos_counter = 1; while let Some(arg) = it.next() { let arg_os = arg.into(); + let arg_os_ops = OsStrOps::from(&arg_os); debugln!( "Parser::get_matches_with: Begin parsing '{:?}' ({:?})", arg_os, @@ -395,7 +394,7 @@ where // Is this a new argument, or values from a previous option? let starts_new_arg = self.is_new_arg(&arg_os, needs_val_of); if !self.is_set(AS::TrailingValues) - && arg_os.starts_with(b"--") + && arg_os_ops.starts_with("--") && arg_os.len() == 2 && starts_new_arg { @@ -430,7 +429,7 @@ where } if starts_new_arg { - if arg_os.starts_with(b"--") { + if arg_os_ops.starts_with("--") { needs_val_of = self.parse_long_arg(matcher, &arg_os)?; debugln!( "Parser:get_matches_with: After parse_long_arg {:?}", @@ -442,7 +441,7 @@ where } _ => (), } - } else if arg_os.starts_with(b"-") && arg_os.len() != 1 { + } else if arg_os_ops.starts_with("-") && arg_os.len() != 1 { // Try to parse short args like normal, if AllowLeadingHyphen or // AllowNegativeNumbers is set, parse_short_arg will *not* throw // an error, and instead return Ok(None) @@ -647,7 +646,7 @@ where break; } else if !((self.is_set(AS::AllowLeadingHyphen) || self.is_set(AS::AllowNegativeNumbers)) - && arg_os.starts_with(b"-")) + && arg_os_ops.starts_with("-")) && !self.is_set(AS::InferSubcommands) { return Err(ClapError::unknown_argument( @@ -718,6 +717,7 @@ where // Checks if the arg matches a subcommand name, or any of it's aliases (if defined) fn possible_subcommand(&self, arg_os: &OsStr) -> (bool, Option<&str>) { debugln!("Parser::possible_subcommand: arg={:?}", arg_os); + /* fn starts(h: &str, n: &OsStr) -> bool { #[cfg(target_os = "windows")] use crate::util::OsStrExt3; @@ -728,7 +728,13 @@ where let h_bytes = OsStr::new(h).as_bytes(); h_bytes.starts_with(n_bytes) + + let h = OsStr::new(h); + let h_ops = OsStrOps::from(&h); } + */ + + let arg_os_ops = OsStrOps::from(&arg_os); if self.is_set(AS::ArgsNegateSubcommands) && self.is_set(AS::ValidArgFound) { return (false, None); @@ -739,7 +745,7 @@ where } } else { let v = sc_names!(self.app) - .filter(|s| starts(s, &*arg_os)) + .filter(|s| arg_os_ops.arg_starts_with(s)) .collect::>(); if v.len() == 1 { @@ -842,8 +848,10 @@ where }; debugln!("Parser::is_new_arg: arg_allows_tac={:?}", arg_allows_tac); + let arg_os_ops = OsStrOps::from(&arg_os); + // Is this a new argument, or values from a previous option? - let mut ret = if arg_os.starts_with(b"--") { + let mut ret = if arg_os_ops.starts_with("--") { debugln!("Parser::is_new_arg: -- found"); if arg_os.len() == 2 && !arg_allows_tac { return true; // We have to return true so override everything else @@ -851,7 +859,7 @@ where return false; } true - } else if arg_os.starts_with(b"-") { + } else if arg_os_ops.starts_with("-") { debugln!("Parser::is_new_arg: - found"); // a singe '-' by itself is a value and typically means "stdin" on unix systems arg_os.len() != 1 @@ -1011,18 +1019,22 @@ where // Update the curent index self.cur_idx.set(self.cur_idx.get() + 1); + let full_arg_ops = OsStrOps::from(&full_arg); + let mut val = None; debug!("Parser::parse_long_arg: Does it contain '='..."); - let arg = if full_arg.contains_byte(b'=') { - let (p0, p1) = full_arg.trim_left_matches(b'-').split_at_byte(b'='); + let arg = if full_arg_ops.contains_byte(b'=') { + let full_arg_trimmed = full_arg_ops.trim_start_matches(b'-'); + let full_arg_trimmed_ops = OsStrOps::from(&full_arg_trimmed); + let (p0, p1) = full_arg_trimmed_ops.split_at_byte(b'='); sdebugln!("Yes '{:?}'", p1); - val = Some(p1); - p0 + val = p1.map(|s| s.into_owned()); + p0.into_owned() } else { sdebugln!("No"); - full_arg.trim_left_matches(b'-') + full_arg_ops.trim_start_matches(b'-').into_owned() }; - if let Some(opt) = self.app.args.get(&KeyType::Long(arg.into())) { + if let Some(opt) = self.app.args.get(&KeyType::Long(arg.clone().into())) { debugln!( "Parser::parse_long_arg: Found valid opt or flag '{}'", opt.to_string() @@ -1032,9 +1044,10 @@ where self.seen.push(opt.id); if opt.is_set(ArgSettings::TakesValue) { - return Ok(self.parse_opt(val, opt, val.is_some(), matcher)?); + let is_some = val.is_some(); + return Ok(self.parse_opt(val, opt, is_some, matcher)?); } - self.check_for_help_and_version_str(arg)?; + self.check_for_help_and_version_str(&arg)?; self.parse_flag(opt, matcher)?; return Ok(ParseResult::Flag); @@ -1056,7 +1069,9 @@ where full_arg: &OsStr, ) -> ClapResult { debugln!("Parser::parse_short_arg: full_arg={:?}", full_arg); - let arg_os = full_arg.trim_left_matches(b'-'); + let full_arg_ops = OsStrOps::from(&full_arg); + let arg_os = full_arg_ops.trim_start_matches(b'-'); + let arg_os_ops = OsStrOps::from(&arg_os); let arg = arg_os.to_string_lossy(); // If AllowLeadingHyphen is set, we want to ensure `-val` gets parsed as `-val` and not @@ -1116,7 +1131,7 @@ where arg_os.split_at(i).1.as_bytes(), arg_os.split_at(i).1 ); - Some(arg_os.split_at(i).1) + Some(arg_os_ops.split_at(i).1.into_owned()) } else { None }; @@ -1140,7 +1155,7 @@ where fn parse_opt( &self, - val: Option<&OsStr>, + val: Option, opt: &Arg<'b>, had_eq: bool, matcher: &mut ArgMatcher, @@ -1155,8 +1170,9 @@ where debug!("Parser::parse_opt; Checking for val..."); if let Some(fv) = val { - has_eq = fv.starts_with(&[b'=']) || had_eq; - let v = fv.trim_left_matches(b'='); + let fv_ops = OsStrOps::from(&fv); + has_eq = fv_ops.starts_with("=") || had_eq; + let v = fv_ops.trim_start_matches(b'='); if !empty_vals && (v.is_empty() || (needs_eq && !has_eq)) { sdebugln!("Found Empty - Error"); return Err(ClapError::empty_value( @@ -1171,7 +1187,7 @@ where fv, fv.starts_with(&[b'=']) ); - self.add_val_to_arg(opt, v, matcher)?; + self.add_val_to_arg(opt, &v, matcher)?; } else if needs_eq && !(empty_vals || min_vals_zero) { sdebugln!("None, but requires equals...Error"); return Err(ClapError::empty_value( @@ -1221,11 +1237,12 @@ where Ok(self.add_single_val_to_arg(arg, val, matcher)?) } else { let mut iret = ParseResult::ValuesDone; - for v in val.split(delim as u32 as u8) { - iret = self.add_single_val_to_arg(arg, v, matcher)?; + let val_ops = OsStrOps::from(&val); + for v in val_ops.split(delim as u32 as u8) { + iret = self.add_single_val_to_arg(arg, &v, matcher)?; } // If there was a delimiter used, we're not looking for more values - if val.contains_byte(delim as u32 as u8) + if val_ops.contains_byte(delim as u32 as u8) || arg.is_set(ArgSettings::RequireDelimiter) { iret = ParseResult::ValuesDone; @@ -1400,8 +1417,8 @@ where sdebugln!(" has conditional defaults"); let mut done = false; if $m.get($a.id).is_none() { - for &(arg, val, default) in vm.values() { - let add = if let Some(a) = $m.get(arg) { + for (arg, val, default) in vm.values() { + let add = if let Some(a) = $m.get(*arg) { if let Some(v) = val { a.vals.iter().any(|value| v == value) } else { @@ -1411,7 +1428,7 @@ where false }; if add { - $_self.add_val_to_arg($a, OsStr::new(default), $m)?; + $_self.add_val_to_arg($a, OsStr::new(&default), $m)?; done = true; break; } diff --git a/src/util/mod.rs b/src/util/mod.rs index 182937bf2662..04dbe0a2db72 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -7,7 +7,5 @@ mod strext; pub use self::fnv::{Key, EMPTY_HASH, HELP_HASH, VERSION_HASH}; pub use self::graph::ChildGraph; pub use self::map::{Values, VecMap}; -pub use self::osstringext::OsStrExt2; -#[cfg(any(target_os = "windows", target_arch = "wasm32"))] -pub use self::osstringext::OsStrExt3; +pub use self::osstringext::OsStrOps; pub use self::strext::_StrExt; diff --git a/src/util/osstringext.rs b/src/util/osstringext.rs index 29c1b933cd54..5384b558344a 100644 --- a/src/util/osstringext.rs +++ b/src/util/osstringext.rs @@ -1,3 +1,342 @@ +use std::borrow::Cow; +use std::ffi::OsStr; +use std::str; + +#[cfg(unix)] +use std::os::unix::ffi::OsStrExt; + +#[cfg(windows)] +use std::ffi::OsString; +#[cfg(windows)] +use std::os::windows::ffi::{OsStrExt, OsStringExt}; + +#[derive(Debug, Clone)] +pub enum OsStrOps<'a> { + #[cfg(not(unix))] + Str(&'a str), // can be represented as UTF-8, just delegate + #[cfg(unix)] + Bytes(&'a [u8]), // Unix - can work on the raw bytes safely + #[cfg(windows)] + Wide(Vec), // Windows - invalid UTF-8, work on wide chars +} + + +impl<'a, T: ?Sized + AsRef> From<&'a T> for OsStrOps<'a> { + fn from(s: &'a T) -> Self { + let s = s.as_ref(); + + #[cfg(unix)] + return OsStrOps::Bytes(s.as_bytes()); + + #[cfg(not(unix))] + { + if let Some(utf8) = s.to_str() { + return OsStrOps::Str(utf8); + } + } + + #[cfg(windows)] + return OsStrOps::Wide(s.encode_wide().collect()); + + #[cfg(not(any(windows, unix)))] + panic!("Non-Unicode OsString on unsupported platform"); + } +} + +impl OsStrOps<'_> { + pub fn starts_with>(&self, s: S) -> bool { + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => v.starts_with(s.as_ref()), + #[cfg(unix)] + OsStrOps::Bytes(v) => v.starts_with(s.as_ref().as_bytes()), + #[cfg(windows)] + OsStrOps::Wide(v) => v.starts_with(&s.as_ref().encode_utf16().collect::>()), + } + } + + pub fn arg_starts_with>(&self, s: S) -> bool { + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => s.as_ref().starts_with(v), + #[cfg(unix)] + OsStrOps::Bytes(v) => s.as_ref().as_bytes().starts_with(v), + #[cfg(windows)] + OsStrOps::Wide(v) => s.as_ref().encode_utf16().collect::>().starts_with(v), + } + } + + pub fn contains_byte(&self, b: u8) -> bool { + assert!(b <= 127); + + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => v.contains(b as char), + #[cfg(unix)] + OsStrOps::Bytes(v) => v.contains(&b), + #[cfg(windows)] + OsStrOps::Wide(v) => v.contains(&u16::from(b)), + } + } + + pub fn split_at_byte(&self, b: u8) -> (Cow, Option>) { + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => { + let c = b as char; + let mut iter = v.splitn(2, |n| n == c); + let before = iter.next(); + let after = iter.next(); + + ( + before.map(|s| Cow::Borrowed(s.as_ref())).unwrap(), + after.map(|s| Cow::Borrowed(s.as_ref())), + ) + } + #[cfg(unix)] + OsStrOps::Bytes(v) => { + let mut iter = v.splitn(2, |n| *n == b); + let before = iter.next(); + let after = iter.next(); + + ( + before.map(|s| Cow::Borrowed(OsStr::from_bytes(s))).unwrap(), + after.map(|s| Cow::Borrowed(OsStr::from_bytes(s))), + ) + } + #[cfg(windows)] + OsStrOps::Wide(v) => { + assert!(b <= 127); + + let mut iter = v.splitn(2, |n| *n == u16::from(b)); + let before = iter.next(); + let after = iter.next(); + + ( + before.map(|s| Cow::Owned(OsString::from_wide(s))).unwrap(), + after.map(|s| Cow::Owned(OsString::from_wide(s))), + ) + } + } + } + + pub fn len(&self) -> usize { + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => v.len(), + #[cfg(unix)] + OsStrOps::Bytes(v) => v.len(), + #[cfg(windows)] + OsStrOps::Wide(v) => v.len(), + } + } + + #[allow(dead_code)] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn split_at(&self, i: usize) -> (Cow, Cow) { + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => { + let bits = v.split_at(i); + ( + Cow::Borrowed(bits.0.as_ref()), + Cow::Borrowed(bits.1.as_ref()), + ) + } + #[cfg(unix)] + OsStrOps::Bytes(v) => { + let bits = v.split_at(i); + ( + Cow::Borrowed(OsStr::from_bytes(bits.0)), + Cow::Borrowed(OsStr::from_bytes(bits.1)), + ) + } + #[cfg(windows)] + OsStrOps::Wide(v) => { + let bits = v.split_at(i); + ( + Cow::Owned(OsString::from_wide(bits.0)), + Cow::Owned(OsString::from_wide(bits.1)), + ) + } + } + } + + pub fn trim_start_matches(&self, b: u8) -> Cow { + assert!(b <= 127); + + match &self { + #[cfg(not(unix))] + OsStrOps::Str(v) => Cow::Borrowed(v.trim_start_matches(b as char).as_ref()), + #[cfg(unix)] + OsStrOps::Bytes(v) => match v.iter().copied().position(|n| n != b) { + Some(0) => Cow::Borrowed(OsStr::from_bytes(v)), + Some(pos) => Cow::Borrowed(OsStr::from_bytes(&v[pos..])), + None => Cow::Borrowed(OsStr::from_bytes(&v[v.len()..])), + }, + #[cfg(windows)] + OsStrOps::Wide(v) => match v.iter().copied().position(|n| n != u16::from(b)) { + Some(0) => Cow::Owned(OsString::from_wide(v)), + Some(pos) => Cow::Owned(OsString::from_wide(&v[pos..])), + None => Cow::Owned(OsString::from_wide(&v[v.len()..])), + }, + } + } + + pub fn split(&self, b: u8) -> OsSplit { + assert!(b <= 127); + + OsSplit { + sep: b, + val: &self, + pos: 0, + } + } +} + +#[derive(Clone, Debug)] +pub struct OsSplit<'a> { + sep: u8, + val: &'a OsStrOps<'a>, + pos: usize, +} + +impl<'a> Iterator for OsSplit<'a> { + type Item = Cow<'a, OsStr>; + + fn next(&mut self) -> Option> { + if self.pos == self.val.len() { + return None; + } + + let start = self.pos; + + match &self.val { + #[cfg(not(unix))] + OsStrOps::Str(v) => { + for b in &v.as_bytes()[start..] { + self.pos += 1; + // This is safe because sep is asserted < 128 in split() + if *b == self.sep { + return Some(Cow::Borrowed(v[start..self.pos - 1].as_ref())); + } + } + + Some(Cow::Borrowed(v[start..].as_ref())) + } + #[cfg(unix)] + OsStrOps::Bytes(v) => { + for b in &v[start..] { + self.pos += 1; + if *b == self.sep { + return Some(Cow::Borrowed(OsStr::from_bytes(&v[start..self.pos - 1]))); + } + } + + Some(Cow::Borrowed(OsStr::from_bytes(&v[start..]))) + } + #[cfg(windows)] + OsStrOps::Wide(v) => { + for b in &v[start..] { + self.pos += 1; + if *b == u16::from(self.sep) { + return Some(Cow::Owned(OsString::from_wide(&v[start..self.pos - 1]))); + } + } + + Some(Cow::Owned(OsString::from_wide(&v[start..]))) + } + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::ffi::OsString; + + #[test] + fn test_starts_with() { + let s = OsString::from("foo bar baz moop"); + let x = OsStrOps::from(&s); + + assert!(x.starts_with("foo bar")); + assert!(!x.starts_with("oo bar")); + } + + #[test] + fn test_contains_byte() { + let s = OsString::from("foo=bar"); + let x = OsStrOps::from(&s); + + assert!(x.contains_byte(b'=')); + assert!(!x.contains_byte(b'z')); + } + + #[test] + fn test_split_at() { + let s = OsString::from("foo=bar"); + let x = OsStrOps::from(&s); + let y = x.split_at(4); + assert_eq!(y.0, OsString::from("foo=")); + assert_eq!(y.1, OsString::from("bar")); + } + + #[test] + fn test_split_at_byte() { + let s = OsString::from("foo=bar"); + let x = OsStrOps::from(&s); + let y = x.split_at_byte(b'='); + assert_eq!(y.0, OsString::from("foo")); + assert_eq!(y.1.unwrap(), OsString::from("bar")); + + let s = OsString::from("foobar"); + let x = OsStrOps::from(&s); + let y = x.split_at_byte(b'='); + assert_eq!(y.0, OsString::from("foobar")); + assert!(y.1.is_none()); + } + + #[test] + fn test_trim_start_matches() { + let s = OsString::from("--foo"); + let x = OsStrOps::from(&s); + let y = x.trim_start_matches(b'-'); + assert_eq!(y, OsString::from("foo")); + + let s = OsString::from("foo"); + let x = OsStrOps::from(&s); + let y = x.trim_start_matches(b'-'); + assert_eq!(y, OsString::from("foo")); + + let s = OsString::from("----"); + let x = OsStrOps::from(&s); + let y = x.trim_start_matches(b'-'); + assert_eq!(y, OsString::from("")); + } + + #[test] + fn test_split() { + let s = OsString::from("foo/bar/baz"); + let x = OsStrOps::from(&s); + let y: Vec<_> = x.split(b'/').collect(); + + assert_eq!( + vec![ + OsString::from("foo"), + OsString::from("bar"), + OsString::from("baz") + ], + y + ); + } +} + + +/* use std::ffi::OsStr; #[cfg(not(any(target_os = "windows", target_arch = "wasm32")))] use std::os::unix::ffi::OsStrExt; @@ -113,3 +452,4 @@ impl<'a> Iterator for OsSplit<'a> { Some(OsStr::from_bytes(&self.val[start..])) } } +*/