diff --git a/src/libstd/build.rs b/src/libstd/build.rs index 9d6e8c4cafdc..8641f28b23fa 100644 --- a/src/libstd/build.rs +++ b/src/libstd/build.rs @@ -68,7 +68,6 @@ fn main() { println!("cargo:rustc-link-lib=advapi32"); println!("cargo:rustc-link-lib=ws2_32"); println!("cargo:rustc-link-lib=userenv"); - println!("cargo:rustc-link-lib=shell32"); } else if target.contains("fuchsia") { println!("cargo:rustc-link-lib=zircon"); println!("cargo:rustc-link-lib=fdio"); diff --git a/src/libstd/sys/windows/args.rs b/src/libstd/sys/windows/args.rs index 4784633edc14..9e9198e05ee0 100644 --- a/src/libstd/sys/windows/args.rs +++ b/src/libstd/sys/windows/args.rs @@ -11,12 +11,14 @@ #![allow(dead_code)] // runtime init functions not used during testing use os::windows::prelude::*; +use sys::windows::os::current_exe; use sys::c; -use slice; -use ops::Range; use ffi::OsString; -use libc::{c_int, c_void}; use fmt; +use vec; +use core::iter; +use slice; +use path::PathBuf; pub unsafe fn init(_argc: isize, _argv: *const *const u8) { } @@ -24,20 +26,146 @@ pub unsafe fn cleanup() { } pub fn args() -> Args { unsafe { - let mut nArgs: c_int = 0; - let lpCmdLine = c::GetCommandLineW(); - let szArgList = c::CommandLineToArgvW(lpCmdLine, &mut nArgs); - - // szArcList can be NULL if CommandLinToArgvW failed, - // but in that case nArgs is 0 so we won't actually - // try to read a null pointer - Args { cur: szArgList, range: 0..(nArgs as isize) } + let lp_cmd_line = c::GetCommandLineW(); + let parsed_args_list = parse_lp_cmd_line( + lp_cmd_line as *const u16, + || current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new())); + + Args { parsed_args_list: parsed_args_list.into_iter() } } } +/// Implements the Windows command-line argument parsing algorithm. +/// +/// Microsoft's documentation for the Windows CLI argument format can be found at +/// . +/// +/// Windows includes a function to do this in shell32.dll, +/// but linking with that DLL causes the process to be registered as a GUI application. +/// GUI applications add a bunch of overhead, even if no windows are drawn. See +/// . +/// +/// This function was tested for equivalence to the shell32.dll implementation in +/// Windows 10 Pro v1803, using an exhaustive test suite available at +/// or +/// . +unsafe fn parse_lp_cmd_line OsString>(lp_cmd_line: *const u16, exe_name: F) + -> Vec { + const BACKSLASH: u16 = '\\' as u16; + const QUOTE: u16 = '"' as u16; + const TAB: u16 = '\t' as u16; + const SPACE: u16 = ' ' as u16; + let mut ret_val = Vec::new(); + if lp_cmd_line.is_null() || *lp_cmd_line == 0 { + ret_val.push(exe_name()); + return ret_val; + } + let mut cmd_line = { + let mut end = 0; + while *lp_cmd_line.offset(end) != 0 { + end += 1; + } + slice::from_raw_parts(lp_cmd_line, end as usize) + }; + // The executable name at the beginning is special. + cmd_line = match cmd_line[0] { + // The executable name ends at the next quote mark, + // no matter what. + QUOTE => { + let args = { + let mut cut = cmd_line[1..].splitn(2, |&c| c == QUOTE); + if let Some(exe) = cut.next() { + ret_val.push(OsString::from_wide(exe)); + } + cut.next() + }; + if let Some(args) = args { + args + } else { + return ret_val; + } + } + // Implement quirk: when they say whitespace here, + // they include the entire ASCII control plane: + // "However, if lpCmdLine starts with any amount of whitespace, CommandLineToArgvW + // will consider the first argument to be an empty string. Excess whitespace at the + // end of lpCmdLine is ignored." + 0...SPACE => { + ret_val.push(OsString::new()); + &cmd_line[1..] + }, + // The executable name ends at the next whitespace, + // no matter what. + _ => { + let args = { + let mut cut = cmd_line.splitn(2, |&c| c > 0 && c <= SPACE); + if let Some(exe) = cut.next() { + ret_val.push(OsString::from_wide(exe)); + } + cut.next() + }; + if let Some(args) = args { + args + } else { + return ret_val; + } + } + }; + let mut cur = Vec::new(); + let mut in_quotes = false; + let mut was_in_quotes = false; + let mut backslash_count: usize = 0; + for &c in cmd_line { + match c { + // backslash + BACKSLASH => { + backslash_count += 1; + was_in_quotes = false; + }, + QUOTE if backslash_count % 2 == 0 => { + cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2)); + backslash_count = 0; + if was_in_quotes { + cur.push('"' as u16); + was_in_quotes = false; + } else { + was_in_quotes = in_quotes; + in_quotes = !in_quotes; + } + } + QUOTE if backslash_count % 2 != 0 => { + cur.extend(iter::repeat(b'\\' as u16).take(backslash_count / 2)); + backslash_count = 0; + was_in_quotes = false; + cur.push(b'"' as u16); + } + SPACE | TAB if !in_quotes => { + cur.extend(iter::repeat(b'\\' as u16).take(backslash_count)); + if !cur.is_empty() || was_in_quotes { + ret_val.push(OsString::from_wide(&cur[..])); + cur.truncate(0); + } + backslash_count = 0; + was_in_quotes = false; + } + _ => { + cur.extend(iter::repeat(b'\\' as u16).take(backslash_count)); + backslash_count = 0; + was_in_quotes = false; + cur.push(c); + } + } + } + cur.extend(iter::repeat(b'\\' as u16).take(backslash_count)); + // include empty quoted strings at the end of the arguments list + if !cur.is_empty() || was_in_quotes || in_quotes { + ret_val.push(OsString::from_wide(&cur[..])); + } + ret_val +} + pub struct Args { - range: Range, - cur: *mut *mut u16, + parsed_args_list: vec::IntoIter, } pub struct ArgsInnerDebug<'a> { @@ -46,19 +174,7 @@ pub struct ArgsInnerDebug<'a> { impl<'a> fmt::Debug for ArgsInnerDebug<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("[")?; - let mut first = true; - for i in self.args.range.clone() { - if !first { - f.write_str(", ")?; - } - first = false; - - // Here we do allocation which could be avoided. - fmt::Debug::fmt(&unsafe { os_string_from_ptr(*self.args.cur.offset(i)) }, f)?; - } - f.write_str("]")?; - Ok(()) + self.args.parsed_args_list.as_slice().fmt(f) } } @@ -70,38 +186,82 @@ impl Args { } } -unsafe fn os_string_from_ptr(ptr: *mut u16) -> OsString { - let mut len = 0; - while *ptr.offset(len) != 0 { len += 1; } - - // Push it onto the list. - let ptr = ptr as *const u16; - let buf = slice::from_raw_parts(ptr, len as usize); - OsStringExt::from_wide(buf) -} - impl Iterator for Args { type Item = OsString; - fn next(&mut self) -> Option { - self.range.next().map(|i| unsafe { os_string_from_ptr(*self.cur.offset(i)) } ) - } - fn size_hint(&self) -> (usize, Option) { self.range.size_hint() } + fn next(&mut self) -> Option { self.parsed_args_list.next() } + fn size_hint(&self) -> (usize, Option) { self.parsed_args_list.size_hint() } } impl DoubleEndedIterator for Args { - fn next_back(&mut self) -> Option { - self.range.next_back().map(|i| unsafe { os_string_from_ptr(*self.cur.offset(i)) } ) - } + fn next_back(&mut self) -> Option { self.parsed_args_list.next_back() } } impl ExactSizeIterator for Args { - fn len(&self) -> usize { self.range.len() } + fn len(&self) -> usize { self.parsed_args_list.len() } } -impl Drop for Args { - fn drop(&mut self) { - // self.cur can be null if CommandLineToArgvW previously failed, - // but LocalFree ignores NULL pointers - unsafe { c::LocalFree(self.cur as *mut c_void); } +#[cfg(test)] +mod tests { + use sys::windows::args::*; + use ffi::OsString; + + fn chk(string: &str, parts: &[&str]) { + let mut wide: Vec = OsString::from(string).encode_wide().collect(); + wide.push(0); + let parsed = unsafe { + parse_lp_cmd_line(wide.as_ptr() as *const u16, || OsString::from("TEST.EXE")) + }; + let expected: Vec = parts.iter().map(|k| OsString::from(k)).collect(); + assert_eq!(parsed.as_slice(), expected.as_slice()); + } + + #[test] + fn empty() { + chk("", &["TEST.EXE"]); + chk("\0", &["TEST.EXE"]); + } + + #[test] + fn single_words() { + chk("EXE one_word", &["EXE", "one_word"]); + chk("EXE a", &["EXE", "a"]); + chk("EXE 😅", &["EXE", "😅"]); + chk("EXE 😅🤦", &["EXE", "😅🤦"]); + } + + #[test] + fn official_examples() { + chk(r#"EXE "abc" d e"#, &["EXE", "abc", "d", "e"]); + chk(r#"EXE a\\\b d"e f"g h"#, &["EXE", r#"a\\\b"#, "de fg", "h"]); + chk(r#"EXE a\\\"b c d"#, &["EXE", r#"a\"b"#, "c", "d"]); + chk(r#"EXE a\\\\"b c" d e"#, &["EXE", r#"a\\b c"#, "d", "e"]); + } + + #[test] + fn whitespace_behavior() { + chk(r#" test"#, &["", "test"]); + chk(r#" test"#, &["", "test"]); + chk(r#" test test2"#, &["", "test", "test2"]); + chk(r#" test test2"#, &["", "test", "test2"]); + chk(r#"test test2 "#, &["test", "test2"]); + chk(r#"test test2 "#, &["test", "test2"]); + chk(r#"test "#, &["test"]); + } + + #[test] + fn genius_quotes() { + chk(r#"EXE "" """#, &["EXE", "", ""]); + chk(r#"EXE "" """"#, &["EXE", "", "\""]); + chk( + r#"EXE "this is """all""" in the same argument""#, + &["EXE", "this is \"all\" in the same argument"] + ); + chk(r#"EXE "a"""#, &["EXE", "a\""]); + chk(r#"EXE "a"" a"#, &["EXE", "a\"", "a"]); + // quotes cannot be escaped in command names + chk(r#""EXE" check"#, &["EXE", "check"]); + chk(r#""EXE check""#, &["EXE check"]); + chk(r#""EXE """for""" check"#, &["EXE ", r#"for""#, "check"]); + chk(r#""EXE \"for\" check"#, &[r#"EXE \"#, r#"for""#, "check"]); } } diff --git a/src/libstd/sys/windows/c.rs b/src/libstd/sys/windows/c.rs index c84874a3e880..fa21f459a8a8 100644 --- a/src/libstd/sys/windows/c.rs +++ b/src/libstd/sys/windows/c.rs @@ -1035,9 +1035,6 @@ extern "system" { pub fn SetLastError(dwErrCode: DWORD); pub fn GetCommandLineW() -> *mut LPCWSTR; - pub fn LocalFree(ptr: *mut c_void); - pub fn CommandLineToArgvW(lpCmdLine: *mut LPCWSTR, - pNumArgs: *mut c_int) -> *mut *mut u16; pub fn GetTempPathW(nBufferLength: DWORD, lpBuffer: LPCWSTR) -> DWORD; pub fn OpenProcessToken(ProcessHandle: HANDLE, diff --git a/src/test/run-make-fulldeps/tools.mk b/src/test/run-make-fulldeps/tools.mk index 3de358fa5000..79399281804f 100644 --- a/src/test/run-make-fulldeps/tools.mk +++ b/src/test/run-make-fulldeps/tools.mk @@ -76,7 +76,7 @@ endif # Extra flags needed to compile a working executable with the standard library ifdef IS_WINDOWS ifdef IS_MSVC - EXTRACFLAGS := ws2_32.lib userenv.lib shell32.lib advapi32.lib + EXTRACFLAGS := ws2_32.lib userenv.lib advapi32.lib else EXTRACFLAGS := -lws2_32 -luserenv endif