Skip to content

Commit

Permalink
Add comment to unsafe block in decode_utf8_lossy
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshMcguigan committed Nov 3, 2019
1 parent 3144f86 commit f332e0c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 14 deletions.
25 changes: 18 additions & 7 deletions percent_encoding/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,15 +430,26 @@ fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
Cow::Borrowed(utf8) => {
// If from_utf8_lossy returns a Cow::Borrowed, then we can
// be sure our original bytes were valid UTF-8. This is because
// if the bytes were invalid UTF-8 from_utf8_lossy would have
// to allocate a new owned string to back the Cow so it could
// replace invalid bytes with a placeholder.

// First we do a debug_assert to confirm our description above.
let raw_utf8: *const [u8];
raw_utf8 = utf8.as_bytes();
debug_assert!(raw_utf8 == &*bytes as *const [u8]);

// Given we know the original input bytes are valid UTF-8,
// and we have ownership of those bytes, we re-use them and
// return a Cow::Owned here.
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
},
Cow::Owned(s) => Cow::Owned(s),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()
}
}
}
26 changes: 19 additions & 7 deletions src/query_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,30 @@ pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) ->
}

pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
// Note: This function is duplicated in `percent_encoding/lib.rs`.
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
Cow::Borrowed(utf8) => {
// If from_utf8_lossy returns a Cow::Borrowed, then we can
// be sure our original bytes were valid UTF-8. This is because
// if the bytes were invalid UTF-8 from_utf8_lossy would have
// to allocate a new owned string to back the Cow so it could
// replace invalid bytes with a placeholder.

// First we do a debug_assert to confirm our description above.
let raw_utf8: *const [u8];
raw_utf8 = utf8.as_bytes();
debug_assert!(raw_utf8 == &*bytes as *const [u8]);

// Given we know the original input bytes are valid UTF-8,
// and we have ownership of those bytes, we re-use them and
// return a Cow::Owned here.
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
},
Cow::Owned(s) => Cow::Owned(s),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()
}
}
}

0 comments on commit f332e0c

Please sign in to comment.