From 9db5511ab26f35b4cc4fce1579e122b33a6e5644 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 11 Dec 2022 19:49:29 +0100 Subject: [PATCH] make unix path handling on Windows hosts preserve absoluteness --- README.md | 2 +- src/shims/os_str.rs | 51 ++++++++++++++++++++++++-------- src/shims/unix/fs.rs | 2 +- test-cargo-miri/src/main.rs | 27 ++++++++++++++--- test-cargo-miri/subcrate/main.rs | 29 ++++++++++++++---- test-cargo-miri/subcrate/test.rs | 27 +++++++++++++++-- tests/pass/shims/fs.rs | 14 +++++---- 7 files changed, 119 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 8cc7e84695..9a4dea949d 100644 --- a/README.md +++ b/README.md @@ -581,7 +581,7 @@ extern "Rust" { /// Performs conversion of path separators as needed. /// /// Usually Miri performs this kind of conversion automatically. However, manual conversion - /// might be necessary when reading an environment variable that was set of the host + /// might be necessary when reading an environment variable that was set on the host /// (such as TMPDIR) and using it as a target path. /// /// Only works with isolation disabled. diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs index bc7ca82997..e42ebc187b 100644 --- a/src/shims/os_str.rs +++ b/src/shims/os_str.rs @@ -174,7 +174,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let this = self.eval_context_ref(); let os_str = this.read_os_str_from_c_str(ptr)?; - Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) { + Ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) { Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), Cow::Owned(y) => Cow::Owned(PathBuf::from(y)), }) @@ -188,10 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let this = self.eval_context_ref(); let os_str = this.read_os_str_from_wide_str(ptr)?; - Ok(this - .convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost) - .into_owned() - .into()) + Ok(this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into()) } /// Write a Path to the machine memory (as a null-terminated sequence of bytes), @@ -203,8 +200,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_c_str(&os_str, ptr, size) } @@ -217,8 +214,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_wide_str(&os_str, ptr, size) } @@ -230,18 +227,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { memkind: MemoryKind, ) -> InterpResult<'tcx, Pointer>> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.alloc_os_str_as_c_str(&os_str, memkind) } - fn convert_path_separator<'a>( + fn convert_path<'a>( &self, os_str: Cow<'a, OsStr>, direction: PathConversion, ) -> Cow<'a, OsStr> { let this = self.eval_context_ref(); let target_os = &this.tcx.sess.target.os; + #[cfg(windows)] return if target_os == "windows" { // Windows-on-Windows, all fine. @@ -252,10 +250,35 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { PathConversion::HostToTarget => ('\\', '/'), PathConversion::TargetToHost => ('/', '\\'), }; - let converted = os_str + let mut converted = os_str .encode_wide() .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) .collect::>(); + // We also have to ensure that absolute paths remain absolute. + match direction { + PathConversion::HostToTarget => { + // If this is an absolute Windows path that starts with a drive letter (`C:/...` + // after separator conversion), it would not be considered absolute by Unix + // target code. + if converted.get(1).copied() == Some(':' as u16) + && converted.get(2).copied() == Some('/' as u16) + { + // We add a `/` at the beginning, to store the absolute Windows + // path in something that looks like an absolute Unix path. + converted.insert(0, '/' as u16); + } + } + PathConversion::TargetToHost => { + // If the path is `\C:\`, the leading backslash was probably added by the above code + // and we should get rid of it again. + if converted.get(0).copied() == Some('\\' as u16) + && converted.get(2).copied() == Some(':' as u16) + && converted.get(3).copied() == Some('\\' as u16) + { + converted.remove(0); + } + } + } Cow::Owned(OsString::from_wide(&converted)) }; #[cfg(unix)] @@ -270,6 +293,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { .iter() .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) .collect::>(); + // TODO: Once we actually support file system things on Windows targets, we'll probably + // have to also do something clever for absolute path preservation here, like above. Cow::Owned(OsString::from_vec(converted)) } else { // Unix-on-Unix, all is fine. diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs index bf99412af6..8b869a6525 100644 --- a/src/shims/unix/fs.rs +++ b/src/shims/unix/fs.rs @@ -1667,7 +1667,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // 'readlink' truncates the resolved path if the provided buffer is not large // enough, and does *not* add a null terminator. That means we cannot use the usual // `write_path_to_c_str` and have to re-implement parts of it ourselves. - let resolved = this.convert_path_separator( + let resolved = this.convert_path( Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget, ); diff --git a/test-cargo-miri/src/main.rs b/test-cargo-miri/src/main.rs index 41c52b7017..048dbbbaa0 100644 --- a/test-cargo-miri/src/main.rs +++ b/test-cargo-miri/src/main.rs @@ -2,6 +2,7 @@ use byteorder::{BigEndian, ByteOrder}; use std::env; #[cfg(unix)] use std::io::{self, BufRead}; +use std::path::PathBuf; fn main() { // Check env var set by `build.rs`. @@ -21,12 +22,30 @@ fn main() { // If there were no arguments, access stdin and test working dir. // (We rely on the test runner to always disable isolation when passing no arguments.) if std::env::args().len() <= 1 { + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be crate root. - // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); assert_eq!(env_dir, crate_dir); #[cfg(unix)] diff --git a/test-cargo-miri/subcrate/main.rs b/test-cargo-miri/subcrate/main.rs index 4ce80b3707..1cb8091f87 100644 --- a/test-cargo-miri/subcrate/main.rs +++ b/test-cargo-miri/subcrate/main.rs @@ -4,13 +4,30 @@ use std::path::PathBuf; fn main() { println!("subcrate running"); + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be workspace root, i.e., one level up from crate root. - // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = PathBuf::from(crate_dir); - let crate_dir = crate_dir.parent().unwrap().to_string_lossy(); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); + let crate_dir = crate_dir.parent().unwrap(); assert_eq!(env_dir, crate_dir); } diff --git a/test-cargo-miri/subcrate/test.rs b/test-cargo-miri/subcrate/test.rs index 77e3c2878c..619d8c72fd 100644 --- a/test-cargo-miri/subcrate/test.rs +++ b/test-cargo-miri/subcrate/test.rs @@ -1,16 +1,37 @@ use std::env; +use std::path::PathBuf; + use byteorder::{ByteOrder, LittleEndian}; fn main() { println!("subcrate testing"); + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be crate root. // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); assert_eq!(env_dir, crate_dir); // Make sure we can call dev-dependencies. diff --git a/tests/pass/shims/fs.rs b/tests/pass/shims/fs.rs index e3ebbc8c8d..a7d4800fae 100644 --- a/tests/pass/shims/fs.rs +++ b/tests/pass/shims/fs.rs @@ -15,6 +15,7 @@ use std::io::{Error, ErrorKind, IsTerminal, Read, Result, Seek, SeekFrom, Write} use std::path::{Path, PathBuf}; fn main() { + test_path_conversion(); test_file(); test_file_clone(); test_file_create_new(); @@ -28,15 +29,11 @@ fn main() { test_directory(); test_canonicalize(); test_from_raw_os_error(); - test_path_conversion(); } -fn tmp() -> PathBuf { +fn host_to_target_path(path: String) -> PathBuf { use std::ffi::{CStr, CString}; - let path = std::env::var("MIRI_TEMP") - .unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap()); - // These are host paths. We need to convert them to the target. let path = CString::new(path).unwrap(); let mut out = Vec::with_capacity(1024); @@ -51,6 +48,13 @@ fn tmp() -> PathBuf { } } +fn tmp() -> PathBuf { + let path = std::env::var("MIRI_TEMP") + .unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap()); + // These are host paths. We need to convert them to the target. + host_to_target_path(path) +} + /// Prepare: compute filename and make sure the file does not exist. fn prepare(filename: &str) -> PathBuf { let path = tmp().join(filename);