Skip to content

Commit

Permalink
Auto merge of rust-lang#2725 - RalfJung:host-to-target-path, r=RalfJung
Browse files Browse the repository at this point in the history
make unix path handling on Windows hosts (and vice versa) preserve absoluteness

Also adds a magic Miri extern function so this conversion can be invoked by the program. That avoids having to duplicate that logic.
  • Loading branch information
bors committed Dec 12, 2022
2 parents 8da3bce + 03f1ce4 commit 1fca5a9
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 71 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,21 @@ extern "Rust" {

/// Miri-provided extern function to deallocate memory.
fn miri_dealloc(ptr: *mut u8, size: usize, align: usize);

/// Convert a path from the host Miri runs on to the target Miri interprets.
/// Performs conversion of path separators as needed.
///
/// Usually Miri performs this kind of conversion automatically. However, manual conversion
/// might be necessary when reading an environment variable that was set on the host
/// (such as TMPDIR) and using it as a target path.
///
/// Only works with isolation disabled.
///
/// `in` must point to a null-terminated string, and will be read as the input host path.
/// `out` must point to at least `out_size` many bytes, and the result will be stored there
/// with a null terminator.
/// Returns 0 if the `out` buffer was large enough, and the required size otherwise.
fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize;
}
```

Expand Down
17 changes: 16 additions & 1 deletion src/shims/foreign_items.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::hash_map::Entry, io::Write, iter};
use std::{collections::hash_map::Entry, io::Write, iter, path::Path};

use log::trace;

Expand Down Expand Up @@ -442,6 +442,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
this.machine.static_roots.push(alloc_id);
}
"miri_host_to_target_path" => {
let [ptr, out, out_size] = this.check_shim(abi, Abi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let out = this.read_pointer(out)?;
let out_size = this.read_scalar(out_size)?.to_machine_usize(this)?;

// The host affects program behavior here, so this requires isolation to be disabled.
this.check_no_isolation("`miri_host_to_target_path`")?;

// We read this as a plain OsStr and write it as a path, which will convert it to the target.
let path = this.read_os_str_from_c_str(ptr)?.to_owned();
let (success, needed_size) = this.write_path_to_c_str(Path::new(&path), out, out_size)?;
// Return value: 0 on success, otherwise the size it would have needed.
this.write_int(if success { 0 } else { needed_size }, dest)?;
}

// Obtains the size of a Miri backtrace. See the README for details.
"miri_backtrace_size" => {
Expand Down
80 changes: 63 additions & 17 deletions src/shims/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_c_str(ptr)?;

Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
Ok(match this.convert_path(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
})
Expand All @@ -188,10 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_wide_str(ptr)?;

Ok(this
.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost)
.into_owned()
.into())
Ok(this.convert_path(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
}

/// Write a Path to the machine memory (as a null-terminated sequence of bytes),
Expand All @@ -203,8 +200,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_c_str(&os_str, ptr, size)
}

Expand All @@ -217,8 +214,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_wide_str(&os_str, ptr, size)
}

Expand All @@ -230,18 +227,20 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
memkind: MemoryKind<MiriMemoryKind>,
) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
let this = self.eval_context_mut();
let os_str = this
.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
let os_str =
this.convert_path(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.alloc_os_str_as_c_str(&os_str, memkind)
}

fn convert_path_separator<'a>(
#[allow(clippy::get_first)]
fn convert_path<'a>(
&self,
os_str: Cow<'a, OsStr>,
direction: PathConversion,
) -> Cow<'a, OsStr> {
let this = self.eval_context_ref();
let target_os = &this.tcx.sess.target.os;

#[cfg(windows)]
return if target_os == "windows" {
// Windows-on-Windows, all fine.
Expand All @@ -252,24 +251,71 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
PathConversion::HostToTarget => ('\\', '/'),
PathConversion::TargetToHost => ('/', '\\'),
};
let converted = os_str
let mut converted = os_str
.encode_wide()
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
.collect::<Vec<_>>();
// We also have to ensure that absolute paths remain absolute.
match direction {
PathConversion::HostToTarget => {
// If this is an absolute Windows path that starts with a drive letter (`C:/...`
// after separator conversion), it would not be considered absolute by Unix
// target code.
if converted.get(1).copied() == Some(b':' as u16)
&& converted.get(2).copied() == Some(b'/' as u16)
{
// We add a `/` at the beginning, to store the absolute Windows
// path in something that looks like an absolute Unix path.
converted.insert(0, b'/' as u16);
}
}
PathConversion::TargetToHost => {
// If the path is `\C:\`, the leading backslash was probably added by the above code
// and we should get rid of it again.
if converted.get(0).copied() == Some(b'\\' as u16)
&& converted.get(2).copied() == Some(b':' as u16)
&& converted.get(3).copied() == Some(b'\\' as u16)
{
converted.remove(0);
}
}
}
Cow::Owned(OsString::from_wide(&converted))
};
#[cfg(unix)]
return if target_os == "windows" {
// Windows target, Unix host.
let (from, to) = match direction {
PathConversion::HostToTarget => ('/', '\\'),
PathConversion::TargetToHost => ('\\', '/'),
PathConversion::HostToTarget => (b'/', b'\\'),
PathConversion::TargetToHost => (b'\\', b'/'),
};
let converted = os_str
let mut converted = os_str
.as_bytes()
.iter()
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
.map(|&wchar| if wchar == from { to } else { wchar })
.collect::<Vec<_>>();
// We also have to ensure that absolute paths remain absolute.
match direction {
PathConversion::HostToTarget => {
// If this start withs a `\`, we add `\\?` so it starts with `\\?\` which is
// some magic path on Windos that *is* considered absolute.
if converted.get(0).copied() == Some(b'\\') {
converted.splice(0..0, b"\\\\?".iter().copied());
}
}
PathConversion::TargetToHost => {
// If this starts with `//?/`, it was probably produced by the above code and we
// remove the `//?` that got added to get the Unix path back out.
if converted.get(0).copied() == Some(b'/')
&& converted.get(1).copied() == Some(b'/')
&& converted.get(2).copied() == Some(b'?')
&& converted.get(3).copied() == Some(b'/')
{
// Remove first 3 characters
converted.splice(0..3, std::iter::empty());
}
}
}
Cow::Owned(OsString::from_vec(converted))
} else {
// Unix-on-Unix, all is fine.
Expand Down
2 changes: 1 addition & 1 deletion src/shims/unix/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// 'readlink' truncates the resolved path if the provided buffer is not large
// enough, and does *not* add a null terminator. That means we cannot use the usual
// `write_path_to_c_str` and have to re-implement parts of it ourselves.
let resolved = this.convert_path_separator(
let resolved = this.convert_path(
Cow::Borrowed(resolved.as_ref()),
crate::shims::os_str::PathConversion::HostToTarget,
);
Expand Down
27 changes: 23 additions & 4 deletions test-cargo-miri/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use byteorder::{BigEndian, ByteOrder};
use std::env;
#[cfg(unix)]
use std::io::{self, BufRead};
use std::path::PathBuf;

fn main() {
// Check env var set by `build.rs`.
Expand All @@ -21,12 +22,30 @@ fn main() {
// If there were no arguments, access stdin and test working dir.
// (We rely on the test runner to always disable isolation when passing no arguments.)
if std::env::args().len() <= 1 {
fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
assert_eq!(env_dir, crate_dir);

#[cfg(unix)]
Expand Down
29 changes: 23 additions & 6 deletions test-cargo-miri/subcrate/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,30 @@ use std::path::PathBuf;
fn main() {
println!("subcrate running");

fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be workspace root, i.e., one level up from crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = PathBuf::from(crate_dir);
let crate_dir = crate_dir.parent().unwrap().to_string_lossy();
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
let crate_dir = crate_dir.parent().unwrap();
assert_eq!(env_dir, crate_dir);
}
27 changes: 24 additions & 3 deletions test-cargo-miri/subcrate/test.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
use std::env;

use std::path::PathBuf;

use byteorder::{ByteOrder, LittleEndian};

fn main() {
println!("subcrate testing");

fn host_to_target_path(path: String) -> PathBuf {
use std::ffi::{CStr, CString};

let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(
path: *const i8,
out: *mut i8,
out_size: usize,
) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

// CWD should be crate root.
// We have to normalize slashes, as the env var might be set for a different target's conventions.
let env_dir = env::current_dir().unwrap();
let env_dir = env_dir.to_string_lossy().replace("\\", "/");
let crate_dir = env::var_os("CARGO_MANIFEST_DIR").unwrap();
let crate_dir = crate_dir.to_string_lossy().replace("\\", "/");
let crate_dir = host_to_target_path(env::var("CARGO_MANIFEST_DIR").unwrap());
assert_eq!(env_dir, crate_dir);

// Make sure we can call dev-dependencies.
Expand Down
31 changes: 16 additions & 15 deletions tests/pass-dep/shims/libc-fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#![feature(io_error_uncategorized)]

use std::convert::TryInto;
use std::ffi::CString;
use std::ffi::{CStr, CString};
use std::fs::{canonicalize, remove_dir_all, remove_file, File};
use std::io::{Error, ErrorKind, Write};
use std::os::unix::ffi::OsStrExt;
Expand All @@ -23,20 +23,21 @@ fn main() {
}

fn tmp() -> PathBuf {
std::env::var("MIRI_TEMP")
.map(|tmp| {
// MIRI_TEMP is set outside of our emulated
// program, so it may have path separators that don't
// correspond to our target platform. We normalize them here
// before constructing a `PathBuf`

#[cfg(windows)]
return PathBuf::from(tmp.replace("/", "\\"));

#[cfg(not(windows))]
return PathBuf::from(tmp.replace("\\", "/"));
})
.unwrap_or_else(|_| std::env::temp_dir())
let path = std::env::var("MIRI_TEMP")
.unwrap_or_else(|_| std::env::temp_dir().into_os_string().into_string().unwrap());
// These are host paths. We need to convert them to the target.
let path = CString::new(path).unwrap();
let mut out = Vec::with_capacity(1024);

unsafe {
extern "Rust" {
fn miri_host_to_target_path(path: *const i8, out: *mut i8, out_size: usize) -> usize;
}
let ret = miri_host_to_target_path(path.as_ptr(), out.as_mut_ptr(), out.capacity());
assert_eq!(ret, 0);
let out = CStr::from_ptr(out.as_ptr()).to_str().unwrap();
PathBuf::from(out)
}
}

/// Prepare: compute filename and make sure the file does not exist.
Expand Down
Loading

0 comments on commit 1fca5a9

Please sign in to comment.