Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make unix path handling on Windows hosts (and vice versa) preserve absoluteness #2725

Merged
merged 4 commits into from
Dec 12, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -576,6 +576,21 @@ extern "Rust" {

/// Miri-provided extern function to deallocate memory.
fn miri_dealloc(ptr: *mut u8, size: usize, align: usize);

/// Convert a path from the host Miri runs on to the target Miri interprets.
/// Performs conversion of path separators as needed.
///
/// Usually Miri performs this kind of conversion automatically. However, manual conversion
/// might be necessary when reading an environment variable that was set on the host
/// (such as TMPDIR) and using it as a target path.
///
/// Only works with isolation disabled.
///
/// `in` must point to a null-terminated string, and will be read as the input host path.
/// `out` must point to at least `out_size` many bytes, and the result will be stored there
/// with a null terminator.
/// Returns 0 if the `out` buffer was large enough, and the required size otherwise.
fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize;
}
```

17 changes: 16 additions & 1 deletion src/shims/foreign_items.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::hash_map::Entry, io::Write, iter};
use std::{collections::hash_map::Entry, io::Write, iter, path::Path};

use log::trace;

@@ -442,6 +442,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
this.machine.static_roots.push(alloc_id);
}
"miri_host_to_target_path" => {
let [ptr, out, out_size] = this.check_shim(abi, Abi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let out = this.read_pointer(out)?;
let out_size = this.read_scalar(out_size)?.to_machine_usize(this)?;

// The host affects program behavior here, so this requires isolation to be disabled.
this.check_no_isolation("`miri_host_to_target_path`")?;

// We read this as a plain OsStr and write it as a path, which will convert it to the target.
let path = this.read_os_str_from_c_str(ptr)?.to_owned();
let (success, needed_size) = this.write_path_to_c_str(Path::new(&path), out, out_size)?;
// Return value: 0 on success, otherwise the size it would have needed.
this.write_int(if success { 0 } else { needed_size }, dest)?;
}

// Obtains the size of a Miri backtrace. See the README for details.
"miri_backtrace_size" => {
80 changes: 63 additions & 17 deletions src/shims/os_str.rs
Original file line number Diff line number Diff line change
@@ -174,7 +174,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_c_str(ptr)?;

Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
Ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
})
@@ -188,10 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_wide_str(ptr)?;

Ok(this
.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost)
.into_owned()
.into())
Ok(this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
}

/// Write a Path to the machine memory (as a null-terminated sequence of bytes),
@@ -203,8 +200,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_c_str(&os_str, ptr, size)
}

@@ -217,8 +214,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_wide_str(&os_str, ptr, size)
}

@@ -230,18 +227,20 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
memkind: MemoryKind<MiriMemoryKind>,
) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.alloc_os_str_as_c_str(&os_str, memkind)
}

fn convert_path_separator<'a>(
#[allow(clippy::get_first)]
fn convert_path<'a>(
&self,
os_str: Cow<'a, OsStr>,
direction: PathConversion,
) -> Cow<'a, OsStr> {
let this = self.eval_context_ref();
let target_os = &this.tcx.sess.target.os;

#[cfg(windows)]
return if target_os == "windows" {
// Windows-on-Windows, all fine.
@@ -252,24 +251,71 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
PathConversion::HostToTarget => ('\\', '/'),
PathConversion::TargetToHost => ('/', '\\'),
};
let converted = os_str
let mut converted = os_str
.encode_wide()
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
.collect::<Vec<_>>();
// We also have to ensure that absolute paths remain absolute.
match direction {
PathConversion::HostToTarget => {
// If this is an absolute Windows path that starts with a drive letter (`C:/...`
// after separator conversion), it would not be considered absolute by Unix
// target code.
if converted.get(1).copied() == Some(b':' as u16)
&& converted.get(2).copied() == Some(b'/' as u16)
{
// We add a `/` at the beginning, to store the absolute Windows
// path in something that looks like an absolute Unix path.
converted.insert(0, b'/' as u16);
}
}
PathConversion::TargetToHost => {
// If the path is `\C:\`, the leading backslash was probably added by the above code
// and we should get rid of it again.
if converted.get(0).copied() == Some(b'\\' as u16)
&& converted.get(2).copied() == Some(b':' as u16)
&& converted.get(3).copied() == Some(b'\\' as u16)
{
converted.remove(0);
}
}
}
Cow::Owned(OsString::from_wide(&converted))
};
#[cfg(unix)]
return if target_os == "windows" {
// Windows target, Unix host.
let (from, to) = match direction {
PathConversion::HostToTarget => ('/', '\\'),
PathConversion::TargetToHost => ('\\', '/'),
PathConversion::HostToTarget => (b'/', b'\\'),
PathConversion::TargetToHost => (b'\\', b'/'),
};
let converted = os_str
let mut converted = os_str
.as_bytes()
.iter()
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
.map(|&wchar| if wchar == from { to } else { wchar })
.collect::<Vec<_>>();
// We also have to ensure that absolute paths remain absolute.
match direction {
PathConversion::HostToTarget => {
// If this start withs a `\`, we add `\\?` so it starts with `\\?\` which is
// some magic path on Windos that *is* considered absolute.
if converted.get(0).copied() == Some(b'\\') {
converted.splice(0..0, b"\\\\?".iter().copied());
}
}
PathConversion::TargetToHost => {
// If this starts with `//?/`, it was probably produced by the above code and we
// remove the `//?` that got added to get the Unix path back out.
if converted.get(0).copied() == Some(b'/')
&& converted.get(1).copied() == Some(b'/')
&& converted.get(2).copied() == Some(b'?')
&& converted.get(3).copied() == Some(b'/')
{
// Remove first 3 characters
converted.splice(0..3, std::iter::empty());
}
}
}
Cow::Owned(OsString::from_vec(converted))
} else {
// Unix-on-Unix, all is fine.
2 changes: 1 addition & 1 deletion src/shims/unix/fs.rs
Original file line number Diff line number Diff line change
@@ -1667,7 +1667,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// 'readlink' truncates the resolved path if the provided buffer is not large
// enough, and does *not* add a null terminator. That means we cannot use the usual
// `write_path_to_c_str` and have to re-implement parts of it ourselves.
let resolved = this.convert_path_separator(
let resolved = this.convert_path(
Cow::Borrowed(resolved.as_ref()),
crate::shims::os_str::PathConversion::HostToTarget,
);
27 changes: 23 additions & 4 deletions test-cargo-miri/src/main.rs
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@ use byteorder::{BigEndian, ByteOrder};
use std::env;
#[cfg(unix)]
use std::io::{self, BufRead};
use std::path::PathBuf;

fn main() {
// Check env var set by `build.rs`.
@@ -21,12 +22,30 @@ fn main() {
// If there were no arguments, access stdin and test working dir.
// (We rely on the test runner to always disable isolation when passing no arguments.)
if std::env::args().len() <= 1 {
fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
assert_eq!(env_dir, crate_dir);

#[cfg(unix)]
29 changes: 23 additions & 6 deletions test-cargo-miri/subcrate/main.rs
Original file line number Diff line number Diff line change
@@ -4,13 +4,30 @@ use std::path::PathBuf;
fn main() {
println!("subcrate running");

fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be workspace root, i.e., one level up from crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = PathBuf::from(crate_dir);
let crate_dir = crate_dir.parent().unwrap().to_string_lossy();
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
let crate_dir = crate_dir.parent().unwrap();
assert_eq!(env_dir, crate_dir);
}
27 changes: 24 additions & 3 deletions test-cargo-miri/subcrate/test.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
use std::env;

use std::path::PathBuf;

use byteorder::{ByteOrder, LittleEndian};

fn main() {
println!("subcrate testing");

fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
assert_eq!(env_dir, crate_dir);

// Make sure we can call dev-dependencies.
31 changes: 16 additions & 15 deletions tests/pass-dep/shims/libc-fs.rs
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
#![feature(io_error_uncategorized)]

use std::convert::TryInto;
use std::ffi::CString;
use std::ffi::{CStr, CString};
use std::fs::{canonicalize, remove_dir_all, remove_file, File};
use std::io::{Error, ErrorKind, Write};
use std::os::unix::ffi::OsStrExt;
@@ -23,20 +23,21 @@ fn main() {
}

fn tmp() -> PathBuf {
std::env::var("MIRI_TEMP")
.map(|tmp| {
// MIRI_TEMP is set outside of our emulated
// program, so it may have path separators that don't
// correspond to our target platform. We normalize them here
// before constructing a `PathBuf`

#[cfg(windows)]
return PathBuf::from(tmp.replace("/", "\\"));

#[cfg(not(windows))]
return PathBuf::from(tmp.replace("\\", "/"));
})
.unwrap_or_else(|_| std::env::temp_dir())
let path = std::env::var("MIRI_TEMP")
.unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap());
// These are host paths. We need to convert them to the target.
let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

/// Prepare: compute filename and make sure the file does not exist.
Loading