diff --git a/crates/wasi-common/src/fdentry.rs b/crates/wasi-common/src/fdentry.rs index 2bad6f79ab92..73027cda1d4b 100644 --- a/crates/wasi-common/src/fdentry.rs +++ b/crates/wasi-common/src/fdentry.rs @@ -162,6 +162,14 @@ impl FdEntry { Ok(()) } } + + /// Test whether this descriptor is considered a tty within WASI. + /// Note that since WASI itself lacks an `isatty` syscall and relies + /// on a conservative approximation, we use the same approximation here. + pub(crate) fn isatty(&self) -> bool { + self.file_type == wasi::__WASI_FILETYPE_CHARACTER_DEVICE + && (self.rights_base & (wasi::__WASI_RIGHTS_FD_SEEK | wasi::__WASI_RIGHTS_FD_TELL)) == 0 + } } /// This allows an `OsHandle` to be temporarily borrowed from a diff --git a/crates/wasi-common/src/hostcalls_impl/fs.rs b/crates/wasi-common/src/hostcalls_impl/fs.rs index 7cfd6ff427f2..d59668a9ab8a 100644 --- a/crates/wasi-common/src/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/hostcalls_impl/fs.rs @@ -4,6 +4,7 @@ use crate::ctx::WasiCtx; use crate::fdentry::{Descriptor, FdEntry}; use crate::helpers::*; use crate::memory::*; +use crate::sandboxed_tty_writer::SandboxedTTYWriter; use crate::sys::fdentry_impl::determine_type_rights; use crate::sys::hostcalls_impl::fs_helpers::path_open_rights; use crate::sys::{host_impl, hostcalls_impl}; @@ -12,6 +13,7 @@ use filetime::{set_file_handle_times, FileTime}; use log::trace; use std::fs::File; use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::ops::DerefMut; use std::time::{Duration, SystemTime, UNIX_EPOCH}; pub(crate) unsafe fn fd_close( @@ -168,7 +170,7 @@ pub(crate) unsafe fn fd_read( .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READ, 0)? { Descriptor::OsHandle(file) => file.read_vectored(&mut iovs), - Descriptor::Stdin => io::stdin().lock().read_vectored(&mut iovs), + Descriptor::Stdin => io::stdin().read_vectored(&mut iovs), _ => return Err(Error::EBADF), }; @@ -372,21 +374,35 @@ pub(crate) unsafe fn fd_write( let iovs: Vec = iovs.iter().map(|vec| host::ciovec_to_host(vec)).collect(); // perform unbuffered writes - let host_nwritten = match wasi_ctx - .get_fd_entry_mut(fd)? - .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)? - { - Descriptor::OsHandle(file) => file.write_vectored(&iovs)?, + let entry = wasi_ctx.get_fd_entry_mut(fd)?; + let isatty = entry.isatty(); + let desc = entry.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?; + let host_nwritten = match desc { + Descriptor::OsHandle(file) => { + if isatty { + SandboxedTTYWriter::new(file.deref_mut()).write_vectored(&iovs)? + } else { + file.write_vectored(&iovs)? + } + } Descriptor::Stdin => return Err(Error::EBADF), Descriptor::Stdout => { // lock for the duration of the scope let stdout = io::stdout(); let mut stdout = stdout.lock(); - let nwritten = stdout.write_vectored(&iovs)?; + let nwritten = if isatty { + SandboxedTTYWriter::new(&mut stdout).write_vectored(&iovs)? + } else { + stdout.write_vectored(&iovs)? + }; stdout.flush()?; nwritten } - Descriptor::Stderr => io::stderr().lock().write_vectored(&iovs)?, + // Always sanitize stderr, even if it's not directly connected to a tty, + // because stderr is meant for diagnostics rather than binary output, + // and may be redirected to a file which could end up being displayed + // on a tty later. + Descriptor::Stderr => SandboxedTTYWriter::new(&mut io::stderr()).write_vectored(&iovs)?, }; trace!(" | *nwritten={:?}", host_nwritten); diff --git a/crates/wasi-common/src/lib.rs b/crates/wasi-common/src/lib.rs index bad4770b1e8f..3834f253d0e2 100644 --- a/crates/wasi-common/src/lib.rs +++ b/crates/wasi-common/src/lib.rs @@ -26,6 +26,7 @@ mod error; mod fdentry; mod helpers; mod hostcalls_impl; +mod sandboxed_tty_writer; mod sys; #[macro_use] mod macros; diff --git a/crates/wasi-common/src/old/snapshot_0/fdentry.rs b/crates/wasi-common/src/old/snapshot_0/fdentry.rs index d6b69eed47fd..44cc4ce40324 100644 --- a/crates/wasi-common/src/old/snapshot_0/fdentry.rs +++ b/crates/wasi-common/src/old/snapshot_0/fdentry.rs @@ -162,6 +162,14 @@ impl FdEntry { Ok(()) } } + + /// Test whether this descriptor is considered a tty within WASI. + /// Note that since WASI itself lacks an `isatty` syscall and relies + /// on a conservative approximation, we use the same approximation here. + pub(crate) fn isatty(&self) -> bool { + self.file_type == wasi::__WASI_FILETYPE_CHARACTER_DEVICE + && (self.rights_base & (wasi::__WASI_RIGHTS_FD_SEEK | wasi::__WASI_RIGHTS_FD_TELL)) == 0 + } } /// This allows an `OsHandle` to be temporarily borrowed from a diff --git a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs index da51e01f2eb9..002e9d6749eb 100644 --- a/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs +++ b/crates/wasi-common/src/old/snapshot_0/hostcalls_impl/fs.rs @@ -8,10 +8,12 @@ use crate::old::snapshot_0::sys::fdentry_impl::determine_type_rights; use crate::old::snapshot_0::sys::hostcalls_impl::fs_helpers::path_open_rights; use crate::old::snapshot_0::sys::{host_impl, hostcalls_impl}; use crate::old::snapshot_0::{helpers, host, wasi, wasi32, Error, Result}; +use crate::sandboxed_tty_writer::SandboxedTTYWriter; use filetime::{set_file_handle_times, FileTime}; use log::trace; use std::fs::File; use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::ops::DerefMut; use std::time::{Duration, SystemTime, UNIX_EPOCH}; pub(crate) unsafe fn fd_close(wasi_ctx: &mut WasiCtx, fd: wasi::__wasi_fd_t) -> Result<()> { @@ -160,7 +162,7 @@ pub(crate) unsafe fn fd_read( .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_READ, 0)? { Descriptor::OsHandle(file) => file.read_vectored(&mut iovs), - Descriptor::Stdin => io::stdin().lock().read_vectored(&mut iovs), + Descriptor::Stdin => io::stdin().read_vectored(&mut iovs), _ => return Err(Error::EBADF), }; @@ -357,21 +359,35 @@ pub(crate) unsafe fn fd_write( let iovs: Vec = iovs.iter().map(|vec| host::ciovec_to_host(vec)).collect(); // perform unbuffered writes - let host_nwritten = match wasi_ctx - .get_fd_entry_mut(fd)? - .as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)? - { - Descriptor::OsHandle(file) => file.write_vectored(&iovs)?, + let entry = wasi_ctx.get_fd_entry_mut(fd)?; + let isatty = entry.isatty(); + let desc = entry.as_descriptor_mut(wasi::__WASI_RIGHTS_FD_WRITE, 0)?; + let host_nwritten = match desc { + Descriptor::OsHandle(file) => { + if isatty { + SandboxedTTYWriter::new(file.deref_mut()).write_vectored(&iovs)? + } else { + file.write_vectored(&iovs)? + } + } Descriptor::Stdin => return Err(Error::EBADF), Descriptor::Stdout => { // lock for the duration of the scope let stdout = io::stdout(); let mut stdout = stdout.lock(); - let nwritten = stdout.write_vectored(&iovs)?; + let nwritten = if isatty { + SandboxedTTYWriter::new(&mut stdout).write_vectored(&iovs)? + } else { + stdout.write_vectored(&iovs)? + }; stdout.flush()?; nwritten } - Descriptor::Stderr => io::stderr().lock().write_vectored(&iovs)?, + // Always sanitize stderr, even if it's not directly connected to a tty, + // because stderr is meant for diagnostics rather than binary output, + // and may be redirected to a file which could end up being displayed + // on a tty later. + Descriptor::Stderr => SandboxedTTYWriter::new(&mut io::stderr()).write_vectored(&iovs)?, }; trace!(" | *nwritten={:?}", host_nwritten); diff --git a/crates/wasi-common/src/sandboxed_tty_writer.rs b/crates/wasi-common/src/sandboxed_tty_writer.rs new file mode 100644 index 000000000000..2f927d0d2f90 --- /dev/null +++ b/crates/wasi-common/src/sandboxed_tty_writer.rs @@ -0,0 +1,178 @@ +use std::io::{Result, Write}; + +/// An adapter around a `Write` stream that guarantees that its output +/// is valid UTF-8 and contains no control characters. It does this by +/// replacing characters with inert control pictures and replacement +/// characters. +pub(crate) struct SandboxedTTYWriter<'writer, Writer> +where + Writer: Write, +{ + inner: &'writer mut Writer, +} + +impl<'writer, Writer> SandboxedTTYWriter<'writer, Writer> +where + Writer: Write, +{ + /// Construct a new `SandboxedTTYWriter` with the given inner `Writer`. + pub(crate) fn new(inner: &'writer mut Writer) -> Self { + Self { inner } + } + + /// Write a single character to the output. + pub(crate) fn write_char(&mut self, c: char) -> Result<()> { + self.inner.write( + match c { + '\u{0000}' => '␀', + '\u{0001}' => '␁', + '\u{0002}' => '␂', + '\u{0003}' => '␃', + '\u{0004}' => '␄', + '\u{0005}' => '␅', + '\u{0006}' => '␆', + '\u{0007}' => '␇', + '\u{0008}' => '␈', + '\u{0009}' => '\t', + '\u{000A}' => '\n', + '\u{000B}' => '␋', + '\u{000C}' => '␌', + '\u{000D}' => '\r', + '\u{000E}' => '␎', + '\u{000F}' => '␏', + '\u{0010}' => '␐', + '\u{0011}' => '␑', + '\u{0012}' => '␒', + '\u{0013}' => '␓', + '\u{0014}' => '␔', + '\u{0015}' => '␕', + '\u{0016}' => '␖', + '\u{0017}' => '␗', + '\u{0018}' => '␘', + '\u{0019}' => '␙', + '\u{001A}' => '␚', + '\u{001B}' => '␛', + '\u{001C}' => '␜', + '\u{001D}' => '␝', + '\u{001E}' => '␞', + '\u{001F}' => '␟', + '\u{007F}' => '␡', + x if x.is_control() => '�', + x => x, + } + .encode_utf8(&mut [0; 4]) // UTF-8 encoding of a `char` is at most 4 bytes. + .as_bytes(), + )?; + + Ok(()) + } + + /// Write a string to the output. + pub(crate) fn write_str(&mut self, s: &str) -> Result { + let mut result = 0; + + for c in s.chars() { + self.write_char(c)?; + // Note that we use the encoding length of the given char, rather than + // how many bytes we actually wrote, because our users don't know about + // what's really being written. + result += c.len_utf8(); + } + + Ok(result) + } +} + +impl<'writer, Writer> Write for SandboxedTTYWriter<'writer, Writer> +where + Writer: Write, +{ + fn write(&mut self, buf: &[u8]) -> Result { + let mut input = buf; + let mut result = 0; + + // Decode the string without heap-allocating it. See the example here + // for more details: + // https://doc.rust-lang.org/std/str/struct.Utf8Error.html#examples + loop { + match std::str::from_utf8(input) { + Ok(valid) => { + result += self.write_str(valid)?; + break; + } + Err(error) => { + let (valid, after_valid) = input.split_at(error.valid_up_to()); + result += self.write_str(unsafe { std::str::from_utf8_unchecked(valid) })?; + self.write_char('�')?; + + if let Some(invalid_sequence_length) = error.error_len() { + // An invalid sequence was encountered. Tell the application we've + // written those bytes (though actually, we replaced them with U+FFFD). + result += invalid_sequence_length; + // Set up `input` to resume writing after the end of the sequence. + input = &after_valid[invalid_sequence_length..]; + } else { + // The end of the buffer was encountered unexpectedly. Tell the application + // we've written out the remainder of the buffer. + result += after_valid.len(); + break; + } + } + } + } + + return Ok(result); + } + + fn flush(&mut self) -> Result<()> { + self.inner.flush() + } +} + +#[cfg(test)] +mod tests { + use super::SandboxedTTYWriter; + use std::io::{Result, Write}; + + #[test] + fn basic() -> Result<()> { + let mut buffer = Vec::new(); + let mut safe = SandboxedTTYWriter::new(&mut buffer); + safe.write_str("a\0b\u{0080}")?; + safe.write_char('\u{0007}')?; + safe.write(&[0x80])?; + safe.write(&[0xed, 0xa0, 0x80, 0xff, 0xfe])?; + assert_eq!( + buffer, + "a\u{2400}b\u{FFFD}\u{2407}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes() + ); + Ok(()) + } + + #[test] + fn how_many_replacements() -> Result<()> { + // See https://hsivonen.fi/broken-utf-8/ for background. + + let mut buffer = Vec::new(); + let mut safe = SandboxedTTYWriter::new(&mut buffer); + safe.write(&[0x80, 0x80, 0x80, 0x80])?; + assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes()); + + let mut buffer = Vec::new(); + let mut safe = SandboxedTTYWriter::new(&mut buffer); + safe.write(&[0xF0, 0x80, 0x80, 0x41])?; + assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}A".as_bytes()); + + let mut buffer = Vec::new(); + let mut safe = SandboxedTTYWriter::new(&mut buffer); + safe.write(&[0xF0, 0x80, 0x80])?; + assert_eq!(buffer, "\u{FFFD}\u{FFFD}\u{FFFD}".as_bytes()); + + let mut buffer = Vec::new(); + let mut safe = SandboxedTTYWriter::new(&mut buffer); + safe.write(&[0xF4, 0x80, 0x80, 0xC0])?; + assert_eq!(buffer, "\u{FFFD}\u{FFFD}".as_bytes()); + + Ok(()) + } +}