diff --git a/README.md b/README.md index 909e5bd510..9a4dea949d 100644 --- a/README.md +++ b/README.md @@ -576,6 +576,21 @@ extern "Rust" { /// Miri-provided extern function to deallocate memory. fn miri_dealloc(ptr: *mut u8, size: usize, align: usize); + + /// Convert a path from the host Miri runs on to the target Miri interprets. + /// Performs conversion of path separators as needed. + /// + /// Usually Miri performs this kind of conversion automatically. However, manual conversion + /// might be necessary when reading an environment variable that was set on the host + /// (such as TMPDIR) and using it as a target path. + /// + /// Only works with isolation disabled. + /// + /// `in` must point to a null-terminated string, and will be read as the input host path. + /// `out` must point to at least `out_size` many bytes, and the result will be stored there + /// with a null terminator. + /// Returns 0 if the `out` buffer was large enough, and the required size otherwise. + fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize; } ``` diff --git a/src/shims/foreign_items.rs b/src/shims/foreign_items.rs index 8370e02b58..abfa73db64 100644 --- a/src/shims/foreign_items.rs +++ b/src/shims/foreign_items.rs @@ -1,4 +1,4 @@ -use std::{collections::hash_map::Entry, io::Write, iter}; +use std::{collections::hash_map::Entry, io::Write, iter, path::Path}; use log::trace; @@ -442,6 +442,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } this.machine.static_roots.push(alloc_id); } + "miri_host_to_target_path" => { + let [ptr, out, out_size] = this.check_shim(abi, Abi::Rust, link_name, args)?; + let ptr = this.read_pointer(ptr)?; + let out = this.read_pointer(out)?; + let out_size = this.read_scalar(out_size)?.to_machine_usize(this)?; + + // The host affects program behavior here, so this requires isolation to be disabled. + this.check_no_isolation("`miri_host_to_target_path`")?; + + // We read this as a plain OsStr and write it as a path, which will convert it to the target. + let path = this.read_os_str_from_c_str(ptr)?.to_owned(); + let (success, needed_size) = this.write_path_to_c_str(Path::new(&path), out, out_size)?; + // Return value: 0 on success, otherwise the size it would have needed. + this.write_int(if success { 0 } else { needed_size }, dest)?; + } // Obtains the size of a Miri backtrace. See the README for details. "miri_backtrace_size" => { diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs index bc7ca82997..0375a228a2 100644 --- a/src/shims/os_str.rs +++ b/src/shims/os_str.rs @@ -174,7 +174,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let this = self.eval_context_ref(); let os_str = this.read_os_str_from_c_str(ptr)?; - Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) { + Ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) { Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), Cow::Owned(y) => Cow::Owned(PathBuf::from(y)), }) @@ -188,10 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { let this = self.eval_context_ref(); let os_str = this.read_os_str_from_wide_str(ptr)?; - Ok(this - .convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost) - .into_owned() - .into()) + Ok(this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into()) } /// Write a Path to the machine memory (as a null-terminated sequence of bytes), @@ -203,8 +200,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_c_str(&os_str, ptr, size) } @@ -217,8 +214,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_wide_str(&os_str, ptr, size) } @@ -230,18 +227,20 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { memkind: MemoryKind, ) -> InterpResult<'tcx, Pointer>> { let this = self.eval_context_mut(); - let os_str = this - .convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); + let os_str = + this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.alloc_os_str_as_c_str(&os_str, memkind) } - fn convert_path_separator<'a>( + #[allow(clippy::get_first)] + fn convert_path<'a>( &self, os_str: Cow<'a, OsStr>, direction: PathConversion, ) -> Cow<'a, OsStr> { let this = self.eval_context_ref(); let target_os = &this.tcx.sess.target.os; + #[cfg(windows)] return if target_os == "windows" { // Windows-on-Windows, all fine. @@ -252,24 +251,71 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { PathConversion::HostToTarget => ('\\', '/'), PathConversion::TargetToHost => ('/', '\\'), }; - let converted = os_str + let mut converted = os_str .encode_wide() .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) .collect::>(); + // We also have to ensure that absolute paths remain absolute. + match direction { + PathConversion::HostToTarget => { + // If this is an absolute Windows path that starts with a drive letter (`C:/...` + // after separator conversion), it would not be considered absolute by Unix + // target code. + if converted.get(1).copied() == Some(b':' as u16) + && converted.get(2).copied() == Some(b'/' as u16) + { + // We add a `/` at the beginning, to store the absolute Windows + // path in something that looks like an absolute Unix path. + converted.insert(0, b'/' as u16); + } + } + PathConversion::TargetToHost => { + // If the path is `\C:\`, the leading backslash was probably added by the above code + // and we should get rid of it again. + if converted.get(0).copied() == Some(b'\\' as u16) + && converted.get(2).copied() == Some(b':' as u16) + && converted.get(3).copied() == Some(b'\\' as u16) + { + converted.remove(0); + } + } + } Cow::Owned(OsString::from_wide(&converted)) }; #[cfg(unix)] return if target_os == "windows" { // Windows target, Unix host. let (from, to) = match direction { - PathConversion::HostToTarget => ('/', '\\'), - PathConversion::TargetToHost => ('\\', '/'), + PathConversion::HostToTarget => (b'/', b'\\'), + PathConversion::TargetToHost => (b'\\', b'/'), }; - let converted = os_str + let mut converted = os_str .as_bytes() .iter() - .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) + .map(|&wchar| if wchar == from { to } else { wchar }) .collect::>(); + // We also have to ensure that absolute paths remain absolute. + match direction { + PathConversion::HostToTarget => { + // If this start withs a `\`, we add `\\?` so it starts with `\\?\` which is + // some magic path on Windos that *is* considered absolute. + if converted.get(0).copied() == Some(b'\\') { + converted.splice(0..0, b"\\\\?".iter().copied()); + } + } + PathConversion::TargetToHost => { + // If this starts with `//?/`, it was probably produced by the above code and we + // remove the `//?` that got added to get the Unix path back out. + if converted.get(0).copied() == Some(b'/') + && converted.get(1).copied() == Some(b'/') + && converted.get(2).copied() == Some(b'?') + && converted.get(3).copied() == Some(b'/') + { + // Remove first 3 characters + converted.splice(0..3, std::iter::empty()); + } + } + } Cow::Owned(OsString::from_vec(converted)) } else { // Unix-on-Unix, all is fine. diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs index bf99412af6..8b869a6525 100644 --- a/src/shims/unix/fs.rs +++ b/src/shims/unix/fs.rs @@ -1667,7 +1667,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // 'readlink' truncates the resolved path if the provided buffer is not large // enough, and does *not* add a null terminator. That means we cannot use the usual // `write_path_to_c_str` and have to re-implement parts of it ourselves. - let resolved = this.convert_path_separator( + let resolved = this.convert_path( Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget, ); diff --git a/test-cargo-miri/src/main.rs b/test-cargo-miri/src/main.rs index 41c52b7017..048dbbbaa0 100644 --- a/test-cargo-miri/src/main.rs +++ b/test-cargo-miri/src/main.rs @@ -2,6 +2,7 @@ use byteorder::{BigEndian, ByteOrder}; use std::env; #[cfg(unix)] use std::io::{self, BufRead}; +use std::path::PathBuf; fn main() { // Check env var set by `build.rs`. @@ -21,12 +22,30 @@ fn main() { // If there were no arguments, access stdin and test working dir. // (We rely on the test runner to always disable isolation when passing no arguments.) if std::env::args().len() <= 1 { + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be crate root. - // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); assert_eq!(env_dir, crate_dir); #[cfg(unix)] diff --git a/test-cargo-miri/subcrate/main.rs b/test-cargo-miri/subcrate/main.rs index 4ce80b3707..1cb8091f87 100644 --- a/test-cargo-miri/subcrate/main.rs +++ b/test-cargo-miri/subcrate/main.rs @@ -4,13 +4,30 @@ use std::path::PathBuf; fn main() { println!("subcrate running"); + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be workspace root, i.e., one level up from crate root. - // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = PathBuf::from(crate_dir); - let crate_dir = crate_dir.parent().unwrap().to_string_lossy(); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); + let crate_dir = crate_dir.parent().unwrap(); assert_eq!(env_dir, crate_dir); } diff --git a/test-cargo-miri/subcrate/test.rs b/test-cargo-miri/subcrate/test.rs index 77e3c2878c..619d8c72fd 100644 --- a/test-cargo-miri/subcrate/test.rs +++ b/test-cargo-miri/subcrate/test.rs @@ -1,16 +1,37 @@ use std::env; +use std::path::PathBuf; + use byteorder::{ByteOrder, LittleEndian}; fn main() { println!("subcrate testing"); + fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path( + path: *const i8, + out: *mut i8, + out_size: usize, + ) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } + } + // CWD should be crate root. // We have to normalize slashes, as the env var might be set for a different target's conventions. let env_dir = env::current_dir().unwrap(); - let env_dir = env_dir.to_string_lossy().replace("\\", "/"); - let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap(); - let crate_dir = crate_dir.to_string_lossy().replace("\\", "/"); + let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap()); assert_eq!(env_dir, crate_dir); // Make sure we can call dev-dependencies. diff --git a/tests/pass-dep/shims/libc-fs.rs b/tests/pass-dep/shims/libc-fs.rs index 93c0fad9c1..ba5b269f65 100644 --- a/tests/pass-dep/shims/libc-fs.rs +++ b/tests/pass-dep/shims/libc-fs.rs @@ -5,7 +5,7 @@ #![feature(io_error_uncategorized)] use std::convert::TryInto; -use std::ffi::CString; +use std::ffi::{CStr, CString}; use std::fs::{canonicalize, remove_dir_all, remove_file, File}; use std::io::{Error, ErrorKind, Write}; use std::os::unix::ffi::OsStrExt; @@ -23,20 +23,21 @@ fn main() { } fn tmp() -> PathBuf { - std::env::var("MIRI_TEMP") - .map(|tmp| { - // MIRI_TEMP is set outside of our emulated - // program, so it may have path separators that don't - // correspond to our target platform. We normalize them here - // before constructing a `PathBuf` - - #[cfg(windows)] - return PathBuf::from(tmp.replace("/", "\\")); - - #[cfg(not(windows))] - return PathBuf::from(tmp.replace("\\", "/")); - }) - .unwrap_or_else(|_| std::env::temp_dir()) + let path = std::env::var("MIRI_TEMP") + .unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap()); + // These are host paths. We need to convert them to the target. + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } } /// Prepare: compute filename and make sure the file does not exist. diff --git a/tests/pass-dep/shims/libc-misc.rs b/tests/pass-dep/shims/libc-misc.rs index 2a4300fcd0..20e96a92c7 100644 --- a/tests/pass-dep/shims/libc-misc.rs +++ b/tests/pass-dep/shims/libc-misc.rs @@ -7,15 +7,23 @@ use std::os::unix::io::AsRawFd; use std::path::PathBuf; fn tmp() -> PathBuf { - std::env::var("MIRI_TEMP") - .map(|tmp| { - // MIRI_TEMP is set outside of our emulated - // program, so it may have path separators that don't - // correspond to our target platform. We normalize them here - // before constructing a `PathBuf` - return PathBuf::from(tmp.replace("\\", "/")); - }) - .unwrap_or_else(|_| std::env::temp_dir()) + use std::ffi::{CStr, CString}; + + let path = std::env::var("MIRI_TEMP") + .unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap()); + // These are host paths. We need to convert them to the target. + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } } /// Test allocating variant of `realpath`. diff --git a/tests/pass/shims/env/current_dir.rs b/tests/pass/shims/env/current_dir.rs index 069b462ab3..ca90912eab 100644 --- a/tests/pass/shims/env/current_dir.rs +++ b/tests/pass/shims/env/current_dir.rs @@ -3,8 +3,9 @@ use std::env; use std::io::ErrorKind; fn main() { - // Test that `getcwd` is available + // Test that `getcwd` is available and an absolute path let cwd = env::current_dir().unwrap(); + assert!(cwd.is_absolute(), "cwd {:?} is not absolute", cwd); // Test that changing dir to `..` actually sets the current directory to the parent of `cwd`. // The only exception here is if `cwd` is the root directory, then changing directory must // keep the current directory equal to `cwd`. diff --git a/tests/pass/shims/fs.rs b/tests/pass/shims/fs.rs index 65cf6fe66b..a7d4800fae 100644 --- a/tests/pass/shims/fs.rs +++ b/tests/pass/shims/fs.rs @@ -15,6 +15,7 @@ use std::io::{Error, ErrorKind, IsTerminal, Read, Result, Seek, SeekFrom, Write} use std::path::{Path, PathBuf}; fn main() { + test_path_conversion(); test_file(); test_file_clone(); test_file_create_new(); @@ -30,21 +31,28 @@ fn main() { test_from_raw_os_error(); } +fn host_to_target_path(path: String) -> PathBuf { + use std::ffi::{CStr, CString}; + + let path = CString::new(path).unwrap(); + let mut out = Vec::with_capacity(1024); + + unsafe { + extern "Rust" { + fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize; + } + let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity()); + assert_eq!(ret, 0); + let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap(); + PathBuf::from(out) + } +} + fn tmp() -> PathBuf { - std::env::var("MIRI_TEMP") - .map(|tmp| { - // MIRI_TEMP is set outside of our emulated - // program, so it may have path separators that don't - // correspond to our target platform. We normalize them here - // before constructing a `PathBuf` - - #[cfg(windows)] - return PathBuf::from(tmp.replace("/", "\\")); - - #[cfg(not(windows))] - return PathBuf::from(tmp.replace("\\", "/")); - }) - .unwrap_or_else(|_| std::env::temp_dir()) + let path = std::env::var("MIRI_TEMP") + .unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap()); + // These are host paths. We need to convert them to the target. + host_to_target_path(path) } /// Prepare: compute filename and make sure the file does not exist. @@ -71,6 +79,12 @@ fn prepare_with_content(filename: &str, content: &[u8]) -> PathBuf { path } +fn test_path_conversion() { + let tmp = tmp(); + assert!(tmp.is_absolute(), "{:?} is not absolute", tmp); + assert!(tmp.is_dir(), "{:?} is not a directory", tmp); +} + fn test_file() { let bytes = b"Hello, World!\n"; let path = prepare("miri_test_fs_file.txt");