From 772261836b59b0c44e741ef6b533f21b535e15ff Mon Sep 17 00:00:00 2001 From: Kevin K Date: Fri, 22 Jan 2016 12:58:56 -0500 Subject: [PATCH] feat(UTF-8): adds support for invalid utf8 in values Closes #269 --- src/app/macros.rs | 8 +- src/app/parser.rs | 216 ++++++++++++++++++++---------------- src/app/settings.rs | 56 +++++++++- src/args/arg_matches.rs | 10 ++ src/errors.rs | 10 +- src/macros.rs | 4 + src/osstringext.rs | 5 + src/utf8.rs | 34 ------ tests/posix_compatible.rs | 8 +- tests/unicode.rs | 30 ----- tests/utf8.rs | 223 ++++++++++++++++++++++++++++++++++++++ 11 files changed, 434 insertions(+), 170 deletions(-) delete mode 100644 tests/unicode.rs create mode 100644 tests/utf8.rs diff --git a/src/app/macros.rs b/src/app/macros.rs index f982afd3ffb5..ded812767a47 100644 --- a/src/app/macros.rs +++ b/src/app/macros.rs @@ -1,5 +1,6 @@ macro_rules! remove_overriden { ($me:ident, $name:expr) => ({ + debugln!("macro=remove_overriden!;"); if let Some(ref o) = $me.opts.iter().filter(|o| o.name == *$name).next() { if let Some(ref ora) = o.requires { for a in ora { @@ -55,6 +56,7 @@ macro_rules! remove_overriden { macro_rules! arg_post_processing( ($me:ident, $arg:ident, $matcher:ident) => ({ use args::AnyArg; + debugln!("macro=arg_post_processing!;"); // Handle POSIX overrides debug!("Is '{}' in overrides...", $arg.to_string()); if $me.overrides.contains(&$arg.name()) { @@ -78,10 +80,10 @@ macro_rules! arg_post_processing( } else { sdebugln!("No"); } // Handle conflicts - debugln!("Does '{}' have conflicts...", $arg.to_string()); + debug!("Does '{}' have conflicts...", $arg.to_string()); if let Some(bl) = $arg.blacklist() { for name in bl { - sdebugln!("\tYes '{}'", name); + sdebugln!("\n\tYes '{}'", name); $me.blacklist.push(name); vec_remove!($me.overrides, name); vec_remove!($me.required, name); @@ -109,6 +111,7 @@ macro_rules! arg_post_processing( macro_rules! _handle_group_reqs{ ($me:ident, $arg:ident) => ({ use args::AnyArg; + debugln!("macro=_handle_group_reqs!;"); for grp in $me.groups.values() { let mut found = false; for name in grp.args.iter() { @@ -142,6 +145,7 @@ macro_rules! _handle_group_reqs{ macro_rules! validate_multiples { ($_self:ident, $a:ident, $m:ident) => { + debugln!("macro=validate_multiples!;"); if $m.contains(&$a.name) && !$a.settings.is_set(ArgSettings::Multiple) { // Not the first time, and we don't allow multiples return Err(Error::unexpected_multiple_usage($a, &*$_self.create_current_usage($m))) diff --git a/src/app/parser.rs b/src/app/parser.rs index 2a692e542dc0..1f776116bd20 100644 --- a/src/app/parser.rs +++ b/src/app/parser.rs @@ -3,6 +3,8 @@ use std::slice::Iter; use std::io::{self, BufWriter, Write}; use std::ffi::{OsStr, OsString}; use std::fmt::Display; +#[cfg(feature = "debug")] +use std::os::unix::ffi::OsStrExt; use vec_map::VecMap; @@ -67,6 +69,28 @@ impl<'a, 'b> Default for Parser<'a, 'b> { } } +macro_rules! parse_positional { + ($_self:ident, $p:ident, $arg_os:ident, $pos_only:ident, $pos_counter:ident, $matcher:ident) => { + debugln!("macro=parse_positional!;"); + validate_multiples!($_self, $p, $matcher); + + if let Err(e) = $_self.add_val_to_arg($p, &$arg_os, $matcher) { + return Err(e); + } + if !$pos_only && + ($_self.settings.is_set(AppSettings::TrailingVarArg) && + $pos_counter == $_self.positionals.len()) { + $pos_only = true; + } + arg_post_processing!($_self, $p, $matcher); + // Only increment the positional counter if it doesn't allow multiples + if !$p.settings.is_set(ArgSettings::Multiple) { + $pos_counter += 1; + } + }; +} + + impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { pub fn with_name(n: String) -> Self { Parser { meta: AppMeta::with_name(n), ..Default::default() } @@ -423,6 +447,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { where I: Iterator, T: Into { + debugln!("fn=get_matches_with;"); // First we create the `--help` and `--version` arguments and add them if // necessary self.create_help_and_version(); @@ -433,11 +458,15 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { let mut pos_counter = 1; while let Some(arg) = it.next() { let arg_os = arg.into(); + debugln!("Begin parsing '{:?}' ({:?})", arg_os, &*arg_os.as_bytes()); // Is this a new argument, or values from a previous option? + debug!("Starts new arg..."); let starts_new_arg = if arg_os.starts_with(b"-") { + sdebugln!("Yes"); !(arg_os.len() == 1) } else { + sdebugln!("No"); false }; @@ -454,7 +483,6 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { } } } - let mut skip = false; if arg_os.starts_with(b"--") { if arg_os.len() == 2 { // The user has passed '--' which means only positional args follow no matter @@ -464,48 +492,34 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { } needs_val_of = try!(self.parse_long_arg(matcher, &arg_os)); + continue; } else if arg_os.starts_with(b"-") && arg_os.len() != 1 { needs_val_of = try!(self.parse_short_arg(matcher, &arg_os)); - } else { - skip = true; + continue; } - if !skip { continue; } - let arg_str = arg_os.to_str().expect(INVALID_UTF8); + + // let arg_str = arg_os.to_str().expect(INVALID_UTF8); if self.subcommands.iter().any(|s| &s.0.meta.name[..] == &*arg_os) { if &*arg_os == "help" && self.settings.is_set(AppSettings::NeedsSubcommandHelp) { return self._help(); } - subcmd_name = Some(arg_str.to_owned()); + // subcommands only support valid UTF-8 + subcmd_name = Some(arg_os.to_str().expect(INVALID_UTF8).to_owned()); break; } else if let Some(candidate) = suggestions::did_you_mean( - arg_str, + &*arg_os.to_string_lossy(), self.subcommands.iter().map(|s| &s.0.meta.name)) { return Err( - Error::invalid_subcommand(arg_str, + Error::invalid_subcommand(arg_os.to_string_lossy().into_owned(), candidate, self.meta.bin_name.as_ref().unwrap_or(&self.meta.name), &*self.create_current_usage(matcher))); } } - if let Some(p) = self.positionals.get(&pos_counter) { - validate_multiples!(self, p, matcher); - - try!(self.add_val_to_arg(p, &arg_os, matcher)); - - if !pos_only && - (self.settings.is_set(AppSettings::TrailingVarArg) && - pos_counter == self.positionals.len()) { - pos_only = true; - } - arg_post_processing!(self, p, matcher); - - // Only increment the positional counter if it doesn't allow multiples - if !p.settings.is_set(ArgSettings::Multiple) { - pos_counter += 1; - } + parse_positional!(self, p, arg_os, pos_only, pos_counter, matcher); } else { if self.settings.is_set(AppSettings::AllowExternalSubcommands) { // let arg_str = arg_os.to_str().expect(INVALID_UTF8); @@ -567,51 +581,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { try!(self.validate_required(matcher)); } if let Some(sc_name) = subcmd_name { - use std::fmt::Write; - let mut mid_string = String::new(); - if !self.settings.is_set(AppSettings::SubcommandsNegateReqs) { - let mut hs: Vec<&str> = self.required.iter().map(|n| &**n).collect(); - for k in matcher.arg_names() { - hs.push(k); - } - let reqs = self.get_required_from(&hs, Some(matcher)); - - for s in reqs.iter() { - write!(&mut mid_string, " {}", s).expect(INTERNAL_ERROR_MSG); - } - } - mid_string.push_str(" "); - if let Some(ref mut sc) = self.subcommands - .iter_mut() - .filter(|s| &s.0.meta.name[..] == &sc_name) - .next() { - let mut sc_matcher = ArgMatcher::new(); - // bin_name should be parent's bin_name + [] + the sc's name separated by - // a space - sc.0.meta.usage = Some(format!("{}{}{}", - self.meta.bin_name.as_ref().unwrap_or(&String::new()), - if self.meta.bin_name.is_some() { - &*mid_string - } else { - "" - }, - &*sc.0.meta.name)); - sc.0.meta.bin_name = Some(format!("{}{}{}", - self.meta.bin_name.as_ref().unwrap_or(&String::new()), - if self.meta.bin_name.is_some() { - " " - } else { - "" - }, - &*sc.0.meta.name)); - if let Err(e) = sc.0.get_matches_with(&mut sc_matcher, it) { - e.exit(); - } - matcher.subcommand(SubCommand { - name: sc.0.meta.name.clone(), - matches: sc_matcher.into(), - }); - } + try!(self.parse_subcommand(sc_name, matcher, it)); } else if self.is_set(AppSettings::SubcommandRequired) { let bn = self.meta.bin_name.as_ref().unwrap_or(&self.meta.name); return Err(Error::missing_subcommand(bn, &self.create_current_usage(matcher))); @@ -636,6 +606,60 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { } Ok(()) } + + fn parse_subcommand(&mut self, sc_name: String, matcher: &mut ArgMatcher<'a>, it: &mut I) -> ClapResult<()> + where I: Iterator, + T: Into + { + use std::fmt::Write; + debugln!("fn=parse_subcommand;"); + let mut mid_string = String::new(); + if !self.settings.is_set(AppSettings::SubcommandsNegateReqs) { + let mut hs: Vec<&str> = self.required.iter().map(|n| &**n).collect(); + for k in matcher.arg_names() { + hs.push(k); + } + let reqs = self.get_required_from(&hs, Some(matcher)); + + for s in reqs.iter() { + write!(&mut mid_string, " {}", s).expect(INTERNAL_ERROR_MSG); + } + } + mid_string.push_str(" "); + if let Some(ref mut sc) = self.subcommands + .iter_mut() + .filter(|s| &s.0.meta.name[..] == &sc_name) + .next() { + let mut sc_matcher = ArgMatcher::new(); + // bin_name should be parent's bin_name + [] + the sc's name separated by + // a space + sc.0.meta.usage = Some(format!("{}{}{}", + self.meta.bin_name.as_ref().unwrap_or(&String::new()), + if self.meta.bin_name.is_some() { + &*mid_string + } else { + "" + }, + &*sc.0.meta.name)); + sc.0.meta.bin_name = Some(format!("{}{}{}", + self.meta.bin_name.as_ref().unwrap_or(&String::new()), + if self.meta.bin_name.is_some() { + " " + } else { + "" + }, + &*sc.0.meta.name)); + if let Err(e) = sc.0.get_matches_with(&mut sc_matcher, it) { + e.exit(); + } + matcher.subcommand(SubCommand { + name: sc.0.meta.name.clone(), + matches: sc_matcher.into(), + }); + } + Ok(()) + } + fn blacklisted_from(&self, name: &str, matcher: &ArgMatcher) -> Option { for k in matcher.arg_names() { if let Some(f) = self.flags.iter().filter(|f| &f.name == &k).next() { @@ -780,6 +804,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { } fn create_help_and_version(&mut self) { + debugln!("fn=create_help_and_version;"); // name is "hclap_help" because flags are sorted by name if !self.flags.iter().any(|a| a.long.is_some() && a.long.unwrap() == "help") { if self.help_short.is_none() && !self.short_list.contains(&'h') { @@ -958,9 +983,12 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { debugln!("Found valid short opt -{} in '{}'", c, arg); // Check for trailing concatenated value let p: Vec<_> = arg.splitn(2, c).collect(); - let i = p[0].as_bytes().len(); - let val = if i != 0 { - Some(full_arg.split_at(i + 1).1) + debugln!("arg: {:?}, arg_os: {:?}, full_arg: {:?}", arg, arg_os, full_arg); + debugln!("p[0]: {:?}, p[1]: {:?}", p[0].as_bytes(), p[1].as_bytes()); + let i = p[0].as_bytes().len() + 1; + let val = if p[1].as_bytes().len() > 0 { + debugln!("setting val: {:?} (bytes), {:?} (ascii)", arg_os.split_at(i).1.as_bytes(), arg_os.split_at(i).1); + Some(arg_os.split_at(i).1) } else { None }; @@ -999,7 +1027,8 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { validate_multiples!(self, opt, matcher); debug!("Checking for val..."); - if let Some(v) = val { + if let Some(mut v) = val { + v = v.trim_left_matches(b'='); if !opt.is_set(ArgSettings::EmptyValues) && v.len() == 0 { sdebugln!("Found Empty - Error"); return Err(Error::empty_value(opt, &*self.create_current_usage(matcher))); @@ -1037,9 +1066,13 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { fn validate_value(&self, arg: &A, val: &OsStr, matcher: &ArgMatcher<'a>) -> ClapResult> where A: AnyArg<'a, 'b> { + debugln!("fn=validate_value; val={:?}", val); + if self.is_set(AppSettings::StrictUtf8) && val.to_str().is_none() { + return Err(Error::invalid_utf8(&*self.create_current_usage(matcher))); + } if let Some(ref p_vals) = arg.possible_vals() { - let val_str = val.to_str().expect(INVALID_UTF8); - if !p_vals.contains(&val_str.into()) { + let val_str = val.to_string_lossy(); + if !p_vals.contains(&&*val_str) { return Err( Error::invalid_value(val_str, p_vals, @@ -1048,22 +1081,12 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { } } if !arg.is_set(ArgSettings::EmptyValues) && - val == "" && // .is_empty() doesn't exist for OsStr + val.is_empty() && matcher.contains(&*arg.name()) { return Err(Error::empty_value(arg, &*self.create_current_usage(matcher))); } if let Some(ref vtor) = arg.validator() { - let v = if !self.settings.is_set(AppSettings::StrictUtf8) { - val.to_string_lossy().into_owned() - } else { - match val.to_str() { - Some(s) => s.to_owned(), - None => { - return Err(Error::invalid_utf8(&*self.create_current_usage(matcher))); - } - } - }; - if let Err(e) = vtor(v) { + if let Err(e) = vtor(val.to_string_lossy().into_owned()) { return Err(Error::value_validation(e)); } } @@ -1140,8 +1163,10 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { fn validate_blacklist(&self, matcher: &mut ArgMatcher) -> ClapResult<()> { + debugln!("fn=validate_blacklist;"); macro_rules! build_err { ($me:ident, $name:expr, $matcher:ident) => ({ + debugln!("macro=build_err;"); let c_with = $me.blacklisted_from($name, &$matcher); debugln!("'{:?}' conflicts with '{}'", c_with, $name); let usg = $me.create_current_usage($matcher); @@ -1256,14 +1281,16 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { }, &*self.create_current_usage(matcher))); } - } - if let Some(max) = pos.max_vals { + } else if let Some(max) = pos.max_vals { if (ma.vals.len() as u8) > max { - return Err(Error::too_many_values( - ma.vals.get(&ma.vals.keys() - .last() - .expect(INTERNAL_ERROR_MSG)) - .expect(INTERNAL_ERROR_MSG).to_str().expect(INVALID_UTF8), + return Err( + Error::too_many_values( + ma.vals.get(&ma.vals.keys() + .last() + .expect(INTERNAL_ERROR_MSG)) + .expect(INTERNAL_ERROR_MSG) + .to_string_lossy() + .into_owned(), pos, &*self.create_current_usage(matcher))); } @@ -1365,6 +1392,7 @@ impl<'a, 'b> Parser<'a, 'b> where 'a: 'b { // after all arguments were parsed, but before any subcommands have been parsed // (so as to give subcommands their own usage recursively) fn create_usage(&self, used: &[&str]) -> String { + debugln!("fn=create_usage;"); let mut usage = String::with_capacity(75); usage.push_str("USAGE:\n\t"); if let Some(u) = self.meta.usage_str { diff --git a/src/app/settings.rs b/src/app/settings.rs index d3a64555e804..57b65a6d1b63 100644 --- a/src/app/settings.rs +++ b/src/app/settings.rs @@ -29,7 +29,7 @@ pub struct AppFlags(Flags); impl AppFlags { pub fn new() -> Self { - AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_STRICT) + AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_NONE) } pub fn set(&mut self, s: AppSettings) { @@ -329,7 +329,61 @@ pub enum AppSettings { /// } /// ``` AllowExternalSubcommands, + /// Specifies that any invalid UTF-8 code points should be treated as an error and fail + /// with a `ErrorKind::InvalidUtf8` error. + /// + /// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands + /// only allow valid UTF-8 code points. + /// + /// # Examples + /// + /// ```ignore + /// # use clap::{App, Arg, AppSettings, ErrorKind}; + /// use std::ffi::OsString; + /// + /// let m = App::new("myprog") + /// .setting(AppSettings::StrictUtf8) + /// .arg_from_usage(" 'some positional arg'") + /// .get_matches_from_safe( + /// vec![ + /// OsString::from("myprog"), + /// OsString::from_vec(vec![0xe9])]); + /// + /// assert!(m.is_err()); + /// assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); + /// } + /// ``` StrictUtf8, + /// Specifies that any invalid UTF-8 code points should *not* be treated as an error. This is + /// the default behavior of `clap` + /// + /// **NOTE:** Using argument values with invalid UTF-8 code points requires using Either + /// `ArgMatches::os_value(s)_of` or `ArgMatches::lossy_value(s)_of` for those particular + /// arguments which may have have invalid UTF-8 values + /// + /// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands + /// only allow valid UTF-8 code points. + /// + /// # Examples + /// + /// ```ignore + /// # use clap::{App, Arg, AppSettings}; + /// use std::ffi::OsString; + /// use std::os::unix::ffi::OsStrExt; + /// + /// let r = App::new("myprog") + /// .setting(AppSettings::StrictUtf8) + /// .arg_from_usage(" 'some positional arg'") + /// .get_matches_from_safe( + /// vec![ + /// OsString::from("myprog"), + /// OsString::from_vec(vec![0xe9])]); + /// + /// assert!(r.is_ok()); + /// let m = r.unwrap(); + /// assert_eq!(m.os_value_of("arg").unwrap().as_bytes(), &[0xe9]); + /// } + /// ``` AllowInvalidUtf8, #[doc(hidden)] NeedsLongVersion, diff --git a/src/args/arg_matches.rs b/src/args/arg_matches.rs index 003fd0f7ed5e..ddefbce0eaf2 100644 --- a/src/args/arg_matches.rs +++ b/src/args/arg_matches.rs @@ -2,6 +2,7 @@ use std::ffi::{OsString, OsStr}; use std::collections::HashMap; use std::iter::Map; use std::slice; +use std::borrow::Cow; use vec_map; @@ -119,6 +120,15 @@ impl<'a> ArgMatches<'a> { None } + pub fn lossy_value_of>(&'a self, name: S) -> Option> { + if let Some(arg) = self.args.get(name.as_ref()) { + if let Some(v) = arg.vals.values().nth(0) { + return Some(v.to_string_lossy()); + } + } + None + } + pub fn os_value_of>(&self, name: S) -> Option<&OsStr> { self.args.get(name.as_ref()).map(|arg| arg.vals.values().nth(0).map(|v| v.as_os_str())).unwrap_or(None) } diff --git a/src/errors.rs b/src/errors.rs index 0662d8b0ca68..d8cc8ada909d 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -234,9 +234,6 @@ pub enum ErrorKind { /// Occurs when the user provides a value containing invalid UTF-8 for an argument and /// `AppSettings::StrictUtf8` is set. /// - /// **Note:** This is the default setting and behavior. If you wish to *allow* invalid UTF-8 in - /// argument values, use `AppSettings::AllowInvalidUtf8` - /// /// # Platform Speicific /// /// Non-Windows platforms only (such as Linux, Unix, OSX, etc.) @@ -244,16 +241,17 @@ pub enum ErrorKind { /// # Examples /// /// ```ignore - /// # use clap::{App, Arg, ErrorKind}; + /// # use clap::{App, Arg, ErrorKind, AppSettings}; /// # use std::os::unix::ffi::OsStringExt; /// # use std::ffi::OsString; /// let result = App::new("myprog") - /// .arg(Arg::with_name("debug") + /// .setting(AppSettings::StrictUtf8) + /// .arg(Arg::with_name("utf8") /// .short("u") /// .takes_value(true)) /// .get_matches_from_safe(vec![OsString::from("myprog"), /// OsString::from("-u") - /// OsString::from_vec(vec![0x20, 0xE9])]); + /// OsString::from_vec(vec![0xE9])]); /// assert!(result.is_err()); /// assert_eq!(result.unwrap_err().kind, ErrorKind::InvalidUtf8); /// ``` diff --git a/src/macros.rs b/src/macros.rs index f30eb920b6d8..697fe95b98b6 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -65,6 +65,7 @@ macro_rules! load_yaml { // used in src/args/arg_builder/option.rs macro_rules! print_opt_help { ($opt:ident, $spc:expr, $w:ident) => { + debugln!("macro=print_opt_help!;"); if let Some(h) = $opt.help { if h.contains("{n}") { let mut hel = h.split("{n}"); @@ -96,6 +97,7 @@ macro_rules! print_opt_help { // src/app/mod.rs macro_rules! write_spaces { ($num:expr, $w:ident) => ({ + debugln!("macro=write_spaces!;"); for _ in 0..$num { try!(write!($w, " ")); } @@ -105,6 +107,7 @@ macro_rules! write_spaces { // convenience macro for remove an item from a vec macro_rules! vec_remove { ($vec:expr, $to_rem:ident) => { + debugln!("macro=write_spaces!;"); { let mut ix = None; $vec.dedup(); @@ -127,6 +130,7 @@ macro_rules! vec_remove { // item. macro_rules! for_match { ($it:ident, $($p:pat => $($e:expr);+),*) => { + debugln!("macro=for_match!;"); for i in $it { match i { $( diff --git a/src/osstringext.rs b/src/osstringext.rs index 75dcaac8080b..c38936c98baf 100644 --- a/src/osstringext.rs +++ b/src/osstringext.rs @@ -8,6 +8,7 @@ pub trait OsStrExt2 { fn trim_left_matches(&self, b: u8) -> &OsStr; fn len(&self) -> usize; fn contains_byte(&self, b: u8) -> bool; + fn is_empty(&self) -> bool; } impl OsStrExt2 for OsStr { @@ -21,6 +22,10 @@ impl OsStrExt2 for OsStr { return true; } + fn is_empty(&self) -> bool { + self.as_bytes().is_empty() + } + fn contains_byte(&self, byte: u8) -> bool { for b in self.as_bytes() { if b == &byte { return true; } diff --git a/src/utf8.rs b/src/utf8.rs index ddb8ef67872b..5b0935f308f2 100644 --- a/src/utf8.rs +++ b/src/utf8.rs @@ -1,35 +1 @@ -// use std::ffi::OsStr; -// use std::borrow::Cow; -// -// pub trait Utf8Rule { type Out; fn into(&OsStr) -> ::Out; } -// -// #[derive(Copy, Clone, Debug, PartialEq)] -// pub struct Strict<'a>; -// impl<'a> Utf8Rule for Strict<'a> { type Out = &'a str; } -// -// #[derive(Copy, Clone, Debug, PartialEq)] -// pub struct Lossy<'a>; -// impl<'a> Utf8Rule for Lossy<'a> { type Out = Cow<'a, str>; } -// -// #[derive(Copy, Clone, Debug, PartialEq)] -// pub struct AllowInvalid<'a>; -// impl<'a> Utf8Rule for AllowInvalid<'a> { type Out = &'a OsStr; } -// -// #[derive(Copy, Clone, Debug, PartialEq)] -// pub enum Utf8 { -// Strict, -// Lossy, -// AllowInvalid, -// } -// -// impl Utf8 { -// pub fn into(&self) -> U::Out { -// match *self { -// Utf::Strict => Strict::, -// Utf::Lossy =>, -// Utf::AllowInvalid =>, -// } -// } -// } - pub const INVALID_UTF8: &'static str = "unexpected invalid UTF-8 code point"; diff --git a/tests/posix_compatible.rs b/tests/posix_compatible.rs index c0ebaebb48c0..12577a779196 100644 --- a/tests/posix_compatible.rs +++ b/tests/posix_compatible.rs @@ -116,9 +116,11 @@ fn conflict_overriden_2() { .arg(Arg::from_usage("-c, --color 'third flag'") .mutually_overrides_with("flag")) .get_matches_from_safe(vec!["myprog", "-f", "-d", "-c"]); - assert!(result.is_err()); - let err = result.err().unwrap(); - assert_eq!(err.kind, ErrorKind::ArgumentConflict); + assert!(result.is_ok()); + let m = result.unwrap(); + assert!(m.is_present("color")); + assert!(m.is_present("debug")); + assert!(!m.is_present("flag")); } #[test] diff --git a/tests/unicode.rs b/tests/unicode.rs deleted file mode 100644 index 7e5868ff223c..000000000000 --- a/tests/unicode.rs +++ /dev/null @@ -1,30 +0,0 @@ -#![cfg(not(windows))] - -extern crate clap; - -use std::ffi::OsString; -use std::os::unix::ffi::OsStringExt; -use clap::{App, Arg, AppSettings, ErrorKind}; - -#[test] -fn invalid_unicode_safe() { - let m = App::new("bad_unicode") - .arg(Arg::from_usage(" 'some arg'")) - .get_matches_from_safe(vec![OsString::from_vec(vec![0x20]), - OsString::from_vec(vec![0xe9])]); - assert!(m.is_err()); - if let Err(err) = m { - assert_eq!(err.kind, ErrorKind::InvalidUtf8); - } -} - -#[test] -fn invalid_unicode_lossy() { - let m = App::new("bad_unicode") - .arg(Arg::from_usage(" 'some arg'")) - .setting(AppSettings::AllowInvalidUtf8) - .get_matches_from(vec![OsString::from_vec(vec![0x20]), - OsString::from_vec(vec![0xe9])]); - assert!(m.is_present("arg")); - assert_eq!(m.value_of("arg").unwrap(), "\u{FFFD}"); -} diff --git a/tests/utf8.rs b/tests/utf8.rs new file mode 100644 index 000000000000..373bbd58ae09 --- /dev/null +++ b/tests/utf8.rs @@ -0,0 +1,223 @@ +#![cfg(not(windows))] + +extern crate clap; + +use std::ffi::OsString; +use std::os::unix::ffi::OsStringExt; +use clap::{App, Arg, AppSettings, ErrorKind}; + +#[test] +fn invalid_utf8_strict_positional() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage(" 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_space() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("-a"), + OsString::from_vec(vec![0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_equals() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_short_no_space() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_long_space() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("--arg"), + OsString::from_vec(vec![0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_strict_option_long_equals() { + let m = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .setting(AppSettings::StrictUtf8) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]); + assert!(m.is_err()); + assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8); +} + +#[test] +fn invalid_utf8_lossy_positional() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage(" 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_lossy_option_short_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("-a"), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_lossy_option_short_equals() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_lossy_option_short_no_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_lossy_option_long_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("--arg"), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_lossy_option_long_equals() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.lossy_value_of("arg").unwrap(), "\u{FFFD}"); +} + +#[test] +fn invalid_utf8_positional() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage(" 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +} + +#[test] +fn invalid_utf8_option_short_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("-a"), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +} + +#[test] +fn invalid_utf8_option_short_equals() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0x3d, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +} + +#[test] +fn invalid_utf8_option_short_no_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x61, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +} + +#[test] +fn invalid_utf8_option_long_space() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from("--arg"), + OsString::from_vec(vec![0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +} + +#[test] +fn invalid_utf8_option_long_equals() { + let r = App::new("bad_utf8") + .arg(Arg::from_usage("-a, --arg 'some arg'")) + .get_matches_from_safe(vec![OsString::from(""), + OsString::from_vec(vec![0x2d, 0x2d, 0x61, 0x72, 0x67, 0x3d, 0xe9])]); + assert!(r.is_ok()); + let m = r.unwrap(); + assert!(m.is_present("arg")); + assert_eq!(&*m.os_value_of("arg").unwrap(), &*OsString::from_vec(vec![0xe9])); +}