diff --git a/src/helpers.rs b/src/helpers.rs index 9e1fa34370..0245540deb 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -1,4 +1,5 @@ use std::mem; +use std::ffi::{OsStr, OsString}; use rustc::hir::def_id::{DefId, CRATE_DEF_INDEX}; use rustc::mir; @@ -402,4 +403,60 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx } } } + + /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what + /// the Unix APIs usually handle. + fn read_os_string_from_c_string(&mut self, scalar: Scalar) -> InterpResult<'tcx, OsString> { + let bytes = self.eval_context_mut().memory.read_c_str(scalar)?; + Ok(bytes_to_os_str(bytes)?.into()) + } + + /// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what + /// the Unix APIs usually handle. This function returns `Ok(false)` without trying to write if + /// `size` is not large enough to fit the contents of `os_string` plus a null terminator. It + /// returns `Ok(true)` if the writing process was successful. + fn write_os_str_to_c_string( + &mut self, + os_str: &OsStr, + scalar: Scalar, + size: u64 + ) -> InterpResult<'tcx, bool> { + let bytes = os_str_to_bytes(os_str)?; + // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null + // terminator to memory using the `ptr` pointer would cause an overflow. + if size <= bytes.len() as u64 { + return Ok(false); + } + // FIXME: We should use `Iterator::chain` instead when rust-lang/rust#65704 lands. + self.eval_context_mut().memory.write_bytes(scalar, [bytes, &[0]].concat())?; + Ok(true) + } +} + +#[cfg(target_os = "unix")] +fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + std::os::unix::ffi::OsStringExt::into_bytes(os_str) +} + +#[cfg(target_os = "unix")] +fn bytes_to_os_str<'tcx, 'a>(bytes: &'a[u8]) -> InterpResult<'tcx, &'a OsStr> { + Ok(std::os::unix::ffi::OsStringExt::from_bytes(bytes)) +} + +// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the +// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually +// valid. +#[cfg(not(target_os = "unix"))] +fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + os_str + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) +} + +#[cfg(not(target_os = "unix"))] +fn bytes_to_os_str<'tcx, 'a>(bytes: &'a[u8]) -> InterpResult<'tcx, &'a OsStr> { + let s = std::str::from_utf8(bytes) + .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; + Ok(&OsStr::new(s)) } diff --git a/src/shims/env.rs b/src/shims/env.rs index 28f1c7d2ef..2dc47d74ff 100644 --- a/src/shims/env.rs +++ b/src/shims/env.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; +use std::ffi::OsString; use std::env; -use std::path::Path; use crate::stacked_borrows::Tag; use crate::*; + use rustc::ty::layout::Size; use rustc_mir::interpret::{Memory, Pointer}; @@ -127,18 +128,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx // If we cannot get the current directory, we return null match env::current_dir() { Ok(cwd) => { - // It is not clear what happens with non-utf8 paths here - let mut bytes = cwd.display().to_string().into_bytes(); - // If `size` is smaller or equal than the `bytes.len()`, writing `bytes` plus the - // required null terminator to memory using the `buf` pointer would cause an - // overflow. The desired behavior in this case is to return null. - if (bytes.len() as u64) < size { - // We add a `/0` terminator - bytes.push(0); - // This is ok because the buffer was strictly larger than `bytes`, so after - // adding the null terminator, the buffer size is larger or equal to - // `bytes.len()`, meaning that `bytes` actually fit inside tbe buffer. - this.memory.write_bytes(buf, bytes.iter().copied())?; + if this.write_os_str_to_c_string(&OsString::from(cwd), buf, size)? { return Ok(buf); } let erange = this.eval_libc("ERANGE")?; @@ -154,14 +144,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx this.check_no_isolation("chdir")?; - let path_bytes = this - .memory - .read_c_str(this.read_scalar(path_op)?.not_undef()?)?; - - let path = Path::new( - std::str::from_utf8(path_bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?, - ); + let path = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?; match env::set_current_dir(path) { Ok(()) => Ok(0), diff --git a/src/shims/fs.rs b/src/shims/fs.rs index 902f5f609d..08c9f3ec06 100644 --- a/src/shims/fs.rs +++ b/src/shims/fs.rs @@ -95,11 +95,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx throw_unsup_format!("unsupported flags {:#x}", flag & !mirror); } - let path_bytes = this - .memory - .read_c_str(this.read_scalar(path_op)?.not_undef()?)?; - let path = std::str::from_utf8(path_bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?; + let path: std::path::PathBuf = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?.into(); let fd = options.open(path).map(|file| { let mut fh = &mut this.machine.file_handler; @@ -214,11 +210,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx this.check_no_isolation("unlink")?; - let path_bytes = this - .memory - .read_c_str(this.read_scalar(path_op)?.not_undef()?)?; - let path = std::str::from_utf8(path_bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", path_bytes))?; + let path = this.read_os_string_from_c_string(this.read_scalar(path_op)?.not_undef()?)?; let result = remove_file(path).map(|_| 0); diff --git a/tests/compile-fail/chdir_invalid_path.rs b/tests/compile-fail/chdir_invalid_path.rs deleted file mode 100644 index 22b0d723aa..0000000000 --- a/tests/compile-fail/chdir_invalid_path.rs +++ /dev/null @@ -1,11 +0,0 @@ -// compile-flags: -Zmiri-disable-isolation - -extern { - pub fn chdir(dir: *const u8) -> i32; -} - -fn main() { - let path = vec![0xc3u8, 0x28, 0]; - // test that `chdir` errors with invalid utf-8 path - unsafe { chdir(path.as_ptr()) }; //~ ERROR is not a valid utf-8 string -}