Skip to content

Commit

Permalink
fix(stream): Don't break out of UTF8 character part-way
Browse files Browse the repository at this point in the history
This fixes a soundness issue where we create invalid UTF-8 data and then
do a `str::from_unchecked` on release builds.

This ensures we ignore up-to the start of UTF-8 sequences and not
mid-way through.

Fixes #156
  • Loading branch information
epage committed Jan 12, 2024
1 parent 51ffc2d commit a54cb5b
Showing 1 changed file with 4 additions and 18 deletions.
22 changes: 4 additions & 18 deletions crates/anstream/src/adapter/strip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,19 @@ impl<'s> Iterator for StripStrIter<'s> {
#[inline]
fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
let offset = bytes.iter().copied().position(|b| {
let (next_state, action) = state_change(*state, b);
let (next_state, action) = dbg!(state_change(*state, b));
if next_state != State::Anywhere {
*state = next_state;
}
is_printable_str(action, b)
is_printable_bytes(action, b)
});
let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
*state = State::Ground;

let offset = bytes.iter().copied().position(|b| {
let (_next_state, action) = state_change(State::Ground, b);
!is_printable_str(action, b)
!(is_printable_bytes(action, b) || is_utf8_continuation(b))
});
let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
*bytes = next;
Expand All @@ -153,19 +153,6 @@ unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'stati
}
}

#[inline]
fn is_printable_str(action: Action, byte: u8) -> bool {
// VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
// ISO Latin-1, making it DEL and non-printable
const DEL: u8 = 0x7f;
(action == Action::Print && byte != DEL)
|| action == Action::BeginUtf8
// since we know the input is valid UTF-8, the only thing we can do with
// continuations is to print them
|| is_utf8_continuation(byte)
|| (action == Action::Execute && byte.is_ascii_whitespace())
}

#[inline]
fn is_utf8_continuation(b: u8) -> bool {
matches!(b, 0x80..=0xbf)
Expand Down Expand Up @@ -474,13 +461,12 @@ mod test {
}

#[test]
#[should_panic]
fn test_strip_str_handles_broken_sequence() {
// valid utf8: \xc3\xb6 then \x1b then \xf0\x9f\x98\x80
let s = \x1b😀hello😀goodbye";
let mut it = strip_str(s);
assert_eq!("ö", it.next().unwrap());
assert_eq!("😀hello😀goodbye", it.next().unwrap());
assert_eq!("ello😀goodbye", it.next().unwrap());
}

proptest! {
Expand Down

0 comments on commit a54cb5b

Please sign in to comment.