From 714075439e57c4a1f7d1c2d4234eea918490c2d3 Mon Sep 17 00:00:00 2001 From: dylni <46035563+dylni@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:03:49 -0400 Subject: [PATCH] Rearrange files --- src/common/convert.rs | 41 +++++++ src/common/mod.rs | 44 +------ src/lib.rs | 23 ++-- src/raw_str.rs | 14 +-- src/wasm/convert.rs | 54 ++++++++ src/wasm/mod.rs | 57 +-------- src/wasm/raw.rs | 1 + src/windows/convert/mod.rs | 116 ++++++++++++++++++ src/windows/{ => convert}/tests.rs | 0 src/windows/{ => convert}/wtf8/code_points.rs | 0 src/windows/{ => convert}/wtf8/convert.rs | 0 src/windows/{ => convert}/wtf8/mod.rs | 0 src/windows/{ => convert}/wtf8/string.rs | 0 src/windows/mod.rs | 112 +---------------- src/windows/raw.rs | 4 +- 15 files changed, 243 insertions(+), 223 deletions(-) create mode 100644 src/common/convert.rs create mode 100644 src/wasm/convert.rs create mode 100644 src/windows/convert/mod.rs rename src/windows/{ => convert}/tests.rs (100%) rename src/windows/{ => convert}/wtf8/code_points.rs (100%) rename src/windows/{ => convert}/wtf8/convert.rs (100%) rename src/windows/{ => convert}/wtf8/mod.rs (100%) rename src/windows/{ => convert}/wtf8/string.rs (100%) diff --git a/src/common/convert.rs b/src/common/convert.rs new file mode 100644 index 0000000..7c5b656 --- /dev/null +++ b/src/common/convert.rs @@ -0,0 +1,41 @@ +use std::borrow::Cow; +use std::convert::Infallible; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::result; + +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +use std::os::fortanix_sgx as os; +#[cfg(target_os = "hermit")] +use std::os::hermit as os; +#[cfg(target_os = "solid_asp3")] +use std::os::solid as os; +#[cfg(unix)] +use std::os::unix as os; +#[cfg(target_os = "wasi")] +use std::os::wasi as os; +#[cfg(target_os = "xous")] +use std::os::xous as os; + +use os::ffi::OsStrExt; +use os::ffi::OsStringExt; + +pub(crate) type EncodingError = Infallible; + +pub(crate) type Result = result::Result; + +pub(crate) fn os_str_from_bytes(string: &[u8]) -> Result> { + Ok(Cow::Borrowed(OsStrExt::from_bytes(string))) +} + +pub(crate) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Borrowed(OsStrExt::as_bytes(os_string)) +} + +pub(crate) fn os_string_from_vec(string: Vec) -> Result { + Ok(OsStringExt::from_vec(string)) +} + +pub(crate) fn os_string_into_vec(os_string: OsString) -> Vec { + OsStringExt::into_vec(os_string) +} diff --git a/src/common/mod.rs b/src/common/mod.rs index 8a0e8b6..73111c5 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,45 +1,7 @@ -use std::borrow::Cow; -use std::convert::Infallible; -use std::ffi::OsStr; -use std::ffi::OsString; -use std::result; - -#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] -use std::os::fortanix_sgx as os; -#[cfg(target_os = "hermit")] -use std::os::hermit as os; -#[cfg(target_os = "solid_asp3")] -use std::os::solid as os; -#[cfg(unix)] -use std::os::unix as os; -#[cfg(target_os = "wasi")] -use std::os::wasi as os; -#[cfg(target_os = "xous")] -use std::os::xous as os; - -use os::ffi::OsStrExt; -use os::ffi::OsStringExt; +if_conversions! { + pub(super) mod convert; +} if_raw_str! { pub(super) mod raw; } - -pub(super) type EncodingError = Infallible; - -pub(super) type Result = result::Result; - -pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { - Ok(Cow::Borrowed(OsStrExt::from_bytes(string))) -} - -pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { - Cow::Borrowed(OsStrExt::as_bytes(os_string)) -} - -pub(super) fn os_string_from_vec(string: Vec) -> Result { - Ok(OsStringExt::from_vec(string)) -} - -pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { - OsStringExt::into_vec(os_string) -} diff --git a/src/lib.rs b/src/lib.rs index 66c077b..6613a8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -275,7 +275,6 @@ if_conversions! { } } -#[allow(dead_code)] #[cfg_attr( all(target_family = "wasm", target_os = "unknown"), path = "wasm/mod.rs" @@ -287,7 +286,11 @@ if_conversions! { )] mod imp; -#[cfg(any(feature = "raw_os_str", windows))] +if_conversions! { + use imp::convert; +} + +#[cfg(any(all(feature = "conversions", windows), feature = "raw_os_str"))] mod util; if_raw_str! { @@ -325,7 +328,7 @@ if_checked_conversions! { os_str_bytes_docs_rs, doc(cfg(feature = "checked_conversions")) )] - pub struct EncodingError(imp::EncodingError); + pub struct EncodingError(convert::EncodingError); impl Display for EncodingError { #[inline] @@ -342,14 +345,14 @@ if_checked_conversions! { } if_conversions! { - fn from_raw_bytes<'a, S>(string: S) -> imp::Result> + fn from_raw_bytes<'a, S>(string: S) -> convert::Result> where S: Into>, { match string.into() { - Cow::Borrowed(string) => imp::os_str_from_bytes(string), + Cow::Borrowed(string) => convert::os_str_from_bytes(string), Cow::Owned(string) => { - imp::os_string_from_vec(string).map(Cow::Owned) + convert::os_string_from_vec(string).map(Cow::Owned) } } } @@ -482,7 +485,7 @@ if_conversions! { #[inline] fn to_raw_bytes(&self) -> Cow<'_, [u8]> { - imp::os_str_to_bytes(self) + convert::os_str_to_bytes(self) } } @@ -1055,19 +1058,19 @@ if_conversions! { impl OsStringBytes for OsString { #[inline] fn assert_from_raw_vec(string: Vec) -> Self { - expect_encoded!(imp::os_string_from_vec(string)) + expect_encoded!(convert::os_string_from_vec(string)) } if_checked_conversions! { #[inline] fn from_raw_vec(string: Vec) -> Result { - imp::os_string_from_vec(string).map_err(EncodingError) + convert::os_string_from_vec(string).map_err(EncodingError) } } #[inline] fn into_raw_vec(self) -> Vec { - imp::os_string_into_vec(self) + convert::os_string_into_vec(self) } } diff --git a/src/raw_str.rs b/src/raw_str.rs index 09c0112..6a05d08 100644 --- a/src/raw_str.rs +++ b/src/raw_str.rs @@ -37,7 +37,7 @@ if_checked_conversions! { } if_conversions! { - use super::imp; + use super::convert; } #[cfg(not(feature = "memchr"))] @@ -181,8 +181,8 @@ impl RawOsStr { if_conversions! { fn cow_from_raw_bytes_checked( string: &[u8], - ) -> imp::Result> { - imp::os_str_from_bytes(string).map(RawOsStrCow::from_os_str) + ) -> convert::Result> { + convert::os_str_from_bytes(string).map(RawOsStrCow::from_os_str) } } @@ -730,7 +730,7 @@ impl RawOsStr { #[inline] #[must_use] pub fn to_raw_bytes(&self) -> Cow<'_, [u8]> { - imp::os_str_to_bytes(self.as_os_str()) + convert::os_str_to_bytes(self.as_os_str()) } } @@ -1087,8 +1087,8 @@ impl RawOsString { } if_conversions! { - fn from_raw_vec_checked(string: Vec) -> imp::Result { - imp::os_string_from_vec(string).map(Self::new) + fn from_raw_vec_checked(string: Vec) -> convert::Result { + convert::os_string_from_vec(string).map(Self::new) } } @@ -1271,7 +1271,7 @@ impl RawOsString { #[inline] #[must_use] pub fn into_raw_vec(self) -> Vec { - imp::os_string_into_vec(self.into_os_string()) + convert::os_string_into_vec(self.into_os_string()) } } diff --git a/src/wasm/convert.rs b/src/wasm/convert.rs new file mode 100644 index 0000000..e2cfe20 --- /dev/null +++ b/src/wasm/convert.rs @@ -0,0 +1,54 @@ +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::result; +use std::str; +use std::str::Utf8Error; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct EncodingError(Utf8Error); + +impl Display for EncodingError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "os_str_bytes: {}", self.0) + } +} + +impl Error for EncodingError {} + +pub(crate) type Result = result::Result; + +macro_rules! expect_utf8 { + ( $result:expr ) => { + $result.expect( + "platform string contains invalid UTF-8, which should not be \ + possible", + ) + }; +} + +fn from_bytes(string: &[u8]) -> Result<&str> { + str::from_utf8(string).map_err(EncodingError) +} + +pub(crate) fn os_str_from_bytes(string: &[u8]) -> Result> { + from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x))) +} + +pub(crate) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Borrowed(expect_utf8!(os_string.to_str()).as_bytes()) +} + +pub(crate) fn os_string_from_vec(string: Vec) -> Result { + String::from_utf8(string) + .map(Into::into) + .map_err(|x| EncodingError(x.utf8_error())) +} + +pub(crate) fn os_string_into_vec(os_string: OsString) -> Vec { + expect_utf8!(os_string.into_string()).into_bytes() +} diff --git a/src/wasm/mod.rs b/src/wasm/mod.rs index 366e950..73111c5 100644 --- a/src/wasm/mod.rs +++ b/src/wasm/mod.rs @@ -1,58 +1,7 @@ -use std::borrow::Cow; -use std::error::Error; -use std::ffi::OsStr; -use std::ffi::OsString; -use std::fmt; -use std::fmt::Display; -use std::fmt::Formatter; -use std::result; -use std::str; -use std::str::Utf8Error; +if_conversions! { + pub(super) mod convert; +} if_raw_str! { pub(super) mod raw; } - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) struct EncodingError(Utf8Error); - -impl Display for EncodingError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "os_str_bytes: {}", self.0) - } -} - -impl Error for EncodingError {} - -pub(super) type Result = result::Result; - -macro_rules! expect_utf8 { - ( $result:expr ) => { - $result.expect( - "platform string contains invalid UTF-8, which should not be \ - possible", - ) - }; -} - -fn from_bytes(string: &[u8]) -> Result<&str> { - str::from_utf8(string).map_err(EncodingError) -} - -pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { - from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x))) -} - -pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { - Cow::Borrowed(expect_utf8!(os_string.to_str()).as_bytes()) -} - -pub(super) fn os_string_from_vec(string: Vec) -> Result { - String::from_utf8(string) - .map(Into::into) - .map_err(|x| EncodingError(x.utf8_error())) -} - -pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { - expect_utf8!(os_string.into_string()).into_bytes() -} diff --git a/src/wasm/raw.rs b/src/wasm/raw.rs index 3492dfb..d155498 100644 --- a/src/wasm/raw.rs +++ b/src/wasm/raw.rs @@ -3,6 +3,7 @@ use std::fmt::Formatter; use crate::RawOsStr; +#[allow(dead_code)] #[path = "../common/raw.rs"] mod common_raw; #[cfg(feature = "uniquote")] diff --git a/src/windows/convert/mod.rs b/src/windows/convert/mod.rs new file mode 100644 index 0000000..35d145a --- /dev/null +++ b/src/windows/convert/mod.rs @@ -0,0 +1,116 @@ +// These functions are necessarily inefficient, because they must revert +// encoding conversions performed by the standard library. However, there is +// currently no better alternative. + +use std::borrow::Cow; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fmt; +use std::fmt::Display; +use std::fmt::Formatter; +use std::ops::Not; +use std::os::windows::ffi::OsStrExt; +use std::os::windows::ffi::OsStringExt; +use std::result; +use std::str; + +mod wtf8; +use wtf8::DecodeWide; + +if_raw_str! { + if_conversions! { + pub(crate) use wtf8::ends_with; + pub(crate) use wtf8::starts_with; + } +} + +#[cfg(test)] +mod tests; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum EncodingError { + Byte(u8), + CodePoint(u32), + End(), +} + +impl EncodingError { + fn position(&self) -> Cow<'_, str> { + match self { + Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)), + Self::CodePoint(code_point) => { + Cow::Owned(format!("code point U+{:04X}", code_point)) + } + Self::End() => Cow::Borrowed("end of string"), + } + } +} + +impl Display for EncodingError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "byte sequence is not representable in the platform encoding; \ + error at {}", + self.position(), + ) + } +} + +impl Error for EncodingError {} + +pub(crate) type Result = result::Result; + +fn from_bytes(string: &[u8]) -> Result> { + let mut encoder = wtf8::encode_wide(string); + + // Collecting an iterator into a result ignores the size hint: + // https://github.com/rust-lang/rust/issues/48994 + let mut encoded_string = Vec::with_capacity(encoder.size_hint().0); + for wchar in &mut encoder { + encoded_string.push(wchar?); + } + + debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8()); + Ok(encoder + .is_still_utf8() + .not() + .then(|| OsStringExt::from_wide(&encoded_string))) +} + +fn to_bytes(os_string: &OsStr) -> Vec { + let encoder = OsStrExt::encode_wide(os_string); + + let mut string = Vec::with_capacity(encoder.size_hint().0); + string.extend(DecodeWide::new(encoder)); + string +} + +pub(crate) fn os_str_from_bytes(string: &[u8]) -> Result> { + from_bytes(string).map(|os_string| { + os_string.map(Cow::Owned).unwrap_or_else(|| { + // SAFETY: This slice was validated to be UTF-8. + Cow::Borrowed(OsStr::new(unsafe { + str::from_utf8_unchecked(string) + })) + }) + }) +} + +pub(crate) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { + Cow::Owned(to_bytes(os_string)) +} + +pub(crate) fn os_string_from_vec(string: Vec) -> Result { + from_bytes(&string).map(|os_string| { + os_string.unwrap_or_else(|| { + // SAFETY: This slice was validated to be UTF-8. + unsafe { String::from_utf8_unchecked(string) }.into() + }) + }) +} + +pub(crate) fn os_string_into_vec(os_string: OsString) -> Vec { + to_bytes(&os_string) +} diff --git a/src/windows/tests.rs b/src/windows/convert/tests.rs similarity index 100% rename from src/windows/tests.rs rename to src/windows/convert/tests.rs diff --git a/src/windows/wtf8/code_points.rs b/src/windows/convert/wtf8/code_points.rs similarity index 100% rename from src/windows/wtf8/code_points.rs rename to src/windows/convert/wtf8/code_points.rs diff --git a/src/windows/wtf8/convert.rs b/src/windows/convert/wtf8/convert.rs similarity index 100% rename from src/windows/wtf8/convert.rs rename to src/windows/convert/wtf8/convert.rs diff --git a/src/windows/wtf8/mod.rs b/src/windows/convert/wtf8/mod.rs similarity index 100% rename from src/windows/wtf8/mod.rs rename to src/windows/convert/wtf8/mod.rs diff --git a/src/windows/wtf8/string.rs b/src/windows/convert/wtf8/string.rs similarity index 100% rename from src/windows/wtf8/string.rs rename to src/windows/convert/wtf8/string.rs diff --git a/src/windows/mod.rs b/src/windows/mod.rs index 7315e2b..73111c5 100644 --- a/src/windows/mod.rs +++ b/src/windows/mod.rs @@ -1,113 +1,7 @@ -// These functions are necessarily inefficient, because they must revert -// encoding conversions performed by the standard library. However, there is -// currently no better alternative. - -use std::borrow::Cow; -use std::error::Error; -use std::ffi::OsStr; -use std::ffi::OsString; -use std::fmt; -use std::fmt::Display; -use std::fmt::Formatter; -use std::ops::Not; -use std::os::windows::ffi::OsStrExt; -use std::os::windows::ffi::OsStringExt; -use std::result; -use std::str; +if_conversions! { + pub(super) mod convert; +} if_raw_str! { pub(super) mod raw; } - -mod wtf8; -use wtf8::DecodeWide; - -#[cfg(test)] -mod tests; - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) enum EncodingError { - Byte(u8), - CodePoint(u32), - End(), -} - -impl EncodingError { - fn position(&self) -> Cow<'_, str> { - match self { - Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)), - Self::CodePoint(code_point) => { - Cow::Owned(format!("code point U+{:04X}", code_point)) - } - Self::End() => Cow::Borrowed("end of string"), - } - } -} - -impl Display for EncodingError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!( - f, - "byte sequence is not representable in the platform encoding; \ - error at {}", - self.position(), - ) - } -} - -impl Error for EncodingError {} - -pub(super) type Result = result::Result; - -fn from_bytes(string: &[u8]) -> Result> { - let mut encoder = wtf8::encode_wide(string); - - // Collecting an iterator into a result ignores the size hint: - // https://github.com/rust-lang/rust/issues/48994 - let mut encoded_string = Vec::with_capacity(encoder.size_hint().0); - for wchar in &mut encoder { - encoded_string.push(wchar?); - } - - debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8()); - Ok(encoder - .is_still_utf8() - .not() - .then(|| OsStringExt::from_wide(&encoded_string))) -} - -fn to_bytes(os_string: &OsStr) -> Vec { - let encoder = OsStrExt::encode_wide(os_string); - - let mut string = Vec::with_capacity(encoder.size_hint().0); - string.extend(DecodeWide::new(encoder)); - string -} - -pub(super) fn os_str_from_bytes(string: &[u8]) -> Result> { - from_bytes(string).map(|os_string| { - os_string.map(Cow::Owned).unwrap_or_else(|| { - // SAFETY: This slice was validated to be UTF-8. - Cow::Borrowed(OsStr::new(unsafe { - str::from_utf8_unchecked(string) - })) - }) - }) -} - -pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { - Cow::Owned(to_bytes(os_string)) -} - -pub(super) fn os_string_from_vec(string: Vec) -> Result { - from_bytes(&string).map(|os_string| { - os_string.unwrap_or_else(|| { - // SAFETY: This slice was validated to be UTF-8. - unsafe { String::from_utf8_unchecked(string) }.into() - }) - }) -} - -pub(super) fn os_string_into_vec(os_string: OsString) -> Vec { - to_bytes(&os_string) -} diff --git a/src/windows/raw.rs b/src/windows/raw.rs index 59f4732..7db4ff2 100644 --- a/src/windows/raw.rs +++ b/src/windows/raw.rs @@ -5,8 +5,8 @@ use std::os::windows::ffi::OsStrExt; use crate::RawOsStr; if_conversions! { - pub(crate) use super::wtf8::ends_with; - pub(crate) use super::wtf8::starts_with; + pub(crate) use super::convert::ends_with; + pub(crate) use super::convert::starts_with; } pub(crate) fn encode_wide(