From 79761d5867aa74ff58c376efd9eacf10e1c5f486 Mon Sep 17 00:00:00 2001 From: fschutt Date: Wed, 3 Feb 2021 17:09:14 +0100 Subject: [PATCH 1/2] Make crate no_std compatible - Use BTreeSet instead of HashSet in test, so that test can run on no_std - Replace all imports of Vec and String with respective alloc types - use core instead of std --- src/ascii.rs | 19 +++++----- src/handles.rs | 36 +++++++++--------- src/lib.rs | 93 ++++++++++++++++++++++++++++------------------ src/mem.rs | 32 ++++++++-------- src/simd_funcs.rs | 34 ++++++++--------- src/single_byte.rs | 2 +- src/utf_8.rs | 6 +-- 7 files changed, 123 insertions(+), 99 deletions(-) diff --git a/src/ascii.rs b/src/ascii.rs index 86481314..4ce12309 100644 --- a/src/ascii.rs +++ b/src/ascii.rs @@ -34,9 +34,9 @@ use crate::simd_funcs::*; cfg_if! { if #[cfg(feature = "simd-accel")] { #[allow(unused_imports)] - use ::std::intrinsics::unlikely; + use ::core::intrinsics::unlikely; #[allow(unused_imports)] - use ::std::intrinsics::likely; + use ::core::intrinsics::likely; } else { #[allow(dead_code)] #[inline(always)] @@ -103,7 +103,7 @@ macro_rules! ascii_alu { let mut until_alignment = { // Check if the other unit aligns if we move the narrower unit // to alignment. - // if ::std::mem::size_of::<$src_unit>() == ::std::mem::size_of::<$dst_unit>() { + // if ::core::mem::size_of::<$src_unit>() == ::core::mem::size_of::<$dst_unit>() { // ascii_to_ascii let src_alignment = (src as usize) & ALU_ALIGNMENT_MASK; let dst_alignment = (dst as usize) & ALU_ALIGNMENT_MASK; @@ -111,7 +111,7 @@ macro_rules! ascii_alu { break; } (ALU_ALIGNMENT - src_alignment) & ALU_ALIGNMENT_MASK - // } else if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() { + // } else if ::core::mem::size_of::<$src_unit>() < ::core::mem::size_of::<$dst_unit>() { // ascii_to_basic_latin // let src_until_alignment = (ALIGNMENT - ((src as usize) & ALIGNMENT_MASK)) & ALIGNMENT_MASK; // if (dst.add(src_until_alignment) as usize) & ALIGNMENT_MASK != 0 { @@ -197,7 +197,7 @@ macro_rules! basic_latin_alu { let mut until_alignment = { // Check if the other unit aligns if we move the narrower unit // to alignment. - // if ::std::mem::size_of::<$src_unit>() == ::std::mem::size_of::<$dst_unit>() { + // if ::core::mem::size_of::<$src_unit>() == ::core::mem::size_of::<$dst_unit>() { // ascii_to_ascii // let src_alignment = (src as usize) & ALIGNMENT_MASK; // let dst_alignment = (dst as usize) & ALIGNMENT_MASK; @@ -206,7 +206,7 @@ macro_rules! basic_latin_alu { // } // (ALIGNMENT - src_alignment) & ALIGNMENT_MASK // } else - if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() { + if ::core::mem::size_of::<$src_unit>() < ::core::mem::size_of::<$dst_unit>() { // ascii_to_basic_latin let src_until_alignment = (ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) @@ -290,7 +290,7 @@ macro_rules! latin1_alu { // This loop is only broken out of as a `goto` forward loop { let mut until_alignment = { - if ::std::mem::size_of::<$src_unit>() < ::std::mem::size_of::<$dst_unit>() { + if ::core::mem::size_of::<$src_unit>() < ::core::mem::size_of::<$dst_unit>() { // unpack let src_until_alignment = (ALU_ALIGNMENT - ((src as usize) & ALU_ALIGNMENT_MASK)) @@ -447,7 +447,7 @@ macro_rules! ascii_simd_check_align_unrolled { dst: *mut $dst_unit, len: usize, ) -> Option<($src_unit, usize)> { - let unit_size = ::std::mem::size_of::<$src_unit>(); + let unit_size = ::core::mem::size_of::<$src_unit>(); let mut offset = 0usize; // This loop is only broken out of as a goto forward without // actually looping @@ -629,7 +629,7 @@ macro_rules! latin1_simd_check_align_unrolled { ) => { #[inline(always)] pub unsafe fn $name(src: *const $src_unit, dst: *mut $dst_unit, len: usize) { - let unit_size = ::std::mem::size_of::<$src_unit>(); + let unit_size = ::core::mem::size_of::<$src_unit>(); let mut offset = 0usize; if SIMD_STRIDE_SIZE <= len { let mut until_alignment = ((SIMD_STRIDE_SIZE @@ -1511,6 +1511,7 @@ pub fn iso_2022_jp_ascii_valid_up_to(bytes: &[u8]) -> usize { #[cfg(test)] mod tests { use super::*; + use alloc::vec::Vec; macro_rules! test_ascii { ($test_name:ident, $fn_tested:ident, $src_unit:ty, $dst_unit:ty) => { diff --git a/src/handles.rs b/src/handles.rs index d40a54fc..b5404c01 100644 --- a/src/handles.rs +++ b/src/handles.rs @@ -108,12 +108,12 @@ impl UnalignedU16Slice { #[inline(always)] pub fn at(&self, i: usize) -> u16 { - use std::mem::MaybeUninit; + use core::mem::MaybeUninit; assert!(i < self.len); unsafe { let mut u: MaybeUninit = MaybeUninit::uninit(); - ::std::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2); + ::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2); u.assume_init() } } @@ -195,7 +195,7 @@ fn copy_unaligned_basic_latin_to_ascii_alu( dst: &mut [u8], offset: usize, ) -> CopyAsciiResult { - let len = ::std::cmp::min(src.len(), dst.len()); + let len = ::core::cmp::min(src.len(), dst.len()); let mut i = 0usize; loop { if i == len { @@ -234,7 +234,7 @@ fn copy_unaligned_basic_latin_to_ascii( src: UnalignedU16Slice, dst: &mut [u8], ) -> CopyAsciiResult { - let len = ::std::cmp::min(src.len(), dst.len()); + let len = ::core::cmp::min(src.len(), dst.len()); let mut offset = 0; if SIMD_STRIDE_SIZE <= len { let len_minus_stride = len - SIMD_STRIDE_SIZE; @@ -736,7 +736,7 @@ impl<'a> Utf16Destination<'a> { let mut src_unaligned = unsafe { UnalignedU16Slice::new( src_remaining.as_ptr(), - ::std::cmp::min(src_remaining.len() / 2, dst_remaining.len()), + ::core::cmp::min(src_remaining.len() / 2, dst_remaining.len()), ) }; if src_unaligned.len() == 0 { @@ -1080,7 +1080,7 @@ impl<'a> Utf8Destination<'a> { pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) { let src_remaining = &source.slice[source.pos..]; let dst_remaining = &mut self.slice[self.pos..]; - let min_len = ::std::cmp::min(src_remaining.len(), dst_remaining.len()); + let min_len = ::core::cmp::min(src_remaining.len(), dst_remaining.len()); // Validate first, then memcpy to let memcpy do its thing even for // non-ASCII. (And potentially do something better than SSE2 for ASCII.) let valid_len = utf8_valid_up_to(&src_remaining[..min_len]); @@ -1156,7 +1156,7 @@ impl<'a> Utf16Source<'a> { self.pos += 1; let unit_minus_surrogate_start = unit.wrapping_sub(0xD800); if unit_minus_surrogate_start > (0xDFFF - 0xD800) { - return unsafe { ::std::char::from_u32_unchecked(u32::from(unit)) }; + return unsafe { ::core::char::from_u32_unchecked(u32::from(unit)) }; } if unit_minus_surrogate_start <= (0xDBFF - 0xD800) { // high surrogate @@ -1167,7 +1167,7 @@ impl<'a> Utf16Source<'a> { // The next code unit is a low surrogate. Advance position. self.pos += 1; return unsafe { - ::std::char::from_u32_unchecked( + ::core::char::from_u32_unchecked( (u32::from(unit) << 10) + u32::from(second) - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), ) @@ -1204,7 +1204,7 @@ impl<'a> Utf16Source<'a> { // The next code unit is a low surrogate. Advance position. self.pos += 1; return Unicode::NonAscii(NonAscii::Astral(unsafe { - ::std::char::from_u32_unchecked( + ::core::char::from_u32_unchecked( (u32::from(unit) << 10) + u32::from(second) - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), ) @@ -1268,7 +1268,7 @@ impl<'a> Utf16Source<'a> { // The next code unit is a low surrogate. Advance position. self.pos += 1; NonAscii::Astral(unsafe { - ::std::char::from_u32_unchecked( + ::core::char::from_u32_unchecked( (u32::from(unit) << 10) + u32::from(second) - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), ) @@ -1341,7 +1341,7 @@ impl<'a> Utf16Source<'a> { // The next code unit is a low surrogate. Advance position. self.pos += 1; NonAscii::Astral(unsafe { - ::std::char::from_u32_unchecked( + ::core::char::from_u32_unchecked( (u32::from(unit) << 10) + u32::from(second) - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32), ) @@ -1469,21 +1469,21 @@ impl<'a> Utf8Source<'a> { let point = ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F); self.pos += 2; - return unsafe { ::std::char::from_u32_unchecked(point) }; + return unsafe { ::core::char::from_u32_unchecked(point) }; } if unit < 0xF0 { let point = ((u32::from(unit) & 0xF) << 12) | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6) | (u32::from(self.slice[self.pos + 2]) & 0x3F); self.pos += 3; - return unsafe { ::std::char::from_u32_unchecked(point) }; + return unsafe { ::core::char::from_u32_unchecked(point) }; } let point = ((u32::from(unit) & 0x7) << 18) | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) | (u32::from(self.slice[self.pos + 3]) & 0x3F); self.pos += 4; - unsafe { ::std::char::from_u32_unchecked(point) } + unsafe { ::core::char::from_u32_unchecked(point) } } #[inline(always)] fn read_enum(&mut self) -> Unicode { @@ -1512,7 +1512,7 @@ impl<'a> Utf8Source<'a> { | (u32::from(self.slice[self.pos + 3]) & 0x3F); self.pos += 4; Unicode::NonAscii(NonAscii::Astral(unsafe { - ::std::char::from_u32_unchecked(point) + ::core::char::from_u32_unchecked(point) })) } #[inline(always)] @@ -1567,7 +1567,7 @@ impl<'a> Utf8Source<'a> { | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) | (u32::from(self.slice[self.pos + 3]) & 0x3F); self.pos += 4; - NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) }) + NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) } } } @@ -1617,7 +1617,7 @@ impl<'a> Utf8Source<'a> { | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) | (u32::from(self.slice[self.pos + 3]) & 0x3F); self.pos += 4; - NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) }) + NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) } } else { return CopyAsciiResult::Stop(( @@ -1674,7 +1674,7 @@ impl<'a> Utf8Source<'a> { | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) | (u32::from(self.slice[self.pos + 3]) & 0x3F); self.pos += 4; - NonAscii::Astral(unsafe { ::std::char::from_u32_unchecked(point) }) + NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) } } else { return CopyAsciiResult::Stop(( diff --git a/src/lib.rs b/src/lib.rs index 9a2c5942..02f6a368 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,11 +41,11 @@ //! The [repository is on GitHub](https://github.com/hsivonen/encoding_rs). The //! [crate is available on crates.io](https://crates.io/crates/encoding_rs). //! -//! # Integration with `std::io` +//! # Integration with `core::io` //! -//! This crate doesn't implement traits from `std::io`. However, for the case of -//! wrapping a `std::io::Read` in a decoder that implements `std::io::Read` and -//! presents the data from the wrapped `std::io::Read` as UTF-8 is addressed by +//! This crate doesn't implement traits from `core::io`. However, for the case of +//! wrapping a `core::io::Read` in a decoder that implements `core::io::Read` and +//! presents the data from the wrapped `core::io::Read` as UTF-8 is addressed by //! the [`encoding_rs_io`](https://docs.rs/encoding_rs_io/) crate. //! //! # Examples @@ -87,7 +87,7 @@ //! // Very short output buffer to demonstrate the output buffer getting full. //! // Normally, you'd use something like `[0u8; 2048]`. //! let mut buffer_bytes = [0u8; 8]; -//! let mut buffer: &mut str = std::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap(); +//! let mut buffer: &mut str = core::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap(); //! //! // How many bytes in the buffer currently hold significant data. //! let mut bytes_in_buffer = 0usize; @@ -679,8 +679,13 @@ //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes) //! for discussion about the UTF-16 family. +#![no_std] + #![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))] +extern crate core; +#[cfg_attr(test, macro_use)] +extern crate alloc; #[macro_use] extern crate cfg_if; @@ -746,10 +751,12 @@ use crate::ascii::iso_2022_jp_ascii_valid_up_to; use crate::utf_8::utf8_valid_up_to; use crate::variant::*; -use std::borrow::Cow; -use std::cmp::Ordering; -use std::hash::Hash; -use std::hash::Hasher; +use alloc::borrow::Cow; +use alloc::vec::Vec; +use alloc::string::String; +use core::cmp::Ordering; +use core::hash::Hash; +use core::hash::Hasher; #[cfg(feature = "serde")] use serde::de::Visitor; @@ -3022,7 +3029,7 @@ impl Encoding { ascii_valid_up_to(bytes) }; if valid_up_to == bytes.len() { - let str: &str = unsafe { std::str::from_utf8_unchecked(bytes) }; + let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) }; return (Cow::Borrowed(str), false); } let decoder = self.new_decoder_without_bom_handling(); @@ -3041,7 +3048,7 @@ impl Encoding { unsafe { let vec = string.as_mut_vec(); vec.set_len(valid_up_to); - std::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); + core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); } (decoder, string, valid_up_to) } else { @@ -3114,7 +3121,7 @@ impl Encoding { if self == UTF_8 { let valid_up_to = utf8_valid_up_to(bytes); if valid_up_to == bytes.len() { - let str: &str = unsafe { std::str::from_utf8_unchecked(bytes) }; + let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) }; return Some(Cow::Borrowed(str)); } return None; @@ -3126,7 +3133,7 @@ impl Encoding { ascii_valid_up_to(bytes) }; if valid_up_to == bytes.len() { - let str: &str = unsafe { std::str::from_utf8_unchecked(bytes) }; + let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) }; return Some(Cow::Borrowed(str)); } let decoder = self.new_decoder_without_bom_handling(); @@ -3140,7 +3147,7 @@ impl Encoding { unsafe { let vec = string.as_mut_vec(); vec.set_len(valid_up_to); - std::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); + core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); } (decoder, string, &bytes[valid_up_to..]) } else { @@ -3228,7 +3235,7 @@ impl Encoding { ); unsafe { vec.set_len(valid_up_to); - std::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); + core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to); } let mut total_read = valid_up_to; let mut total_had_errors = false; @@ -3357,6 +3364,20 @@ impl PartialEq for Encoding { impl Eq for Encoding {} +#[cfg(test)] +impl PartialOrd for Encoding { + fn partial_cmp(&self, other: &Self) -> Option { + (self as *const Encoding as usize).partial_cmp(&(other as *const Encoding as usize)) + } +} + +#[cfg(test)] +impl Ord for Encoding { + fn cmp(&self, other: &Self) -> Ordering { + (self as *const Encoding as usize).cmp(&(other as *const Encoding as usize)) + } +} + impl Hash for Encoding { #[inline] fn hash(&self, state: &mut H) { @@ -3364,9 +3385,9 @@ impl Hash for Encoding { } } -impl std::fmt::Debug for Encoding { +impl core::fmt::Debug for Encoding { #[inline] - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "Encoding {{ {} }}", self.name) } } @@ -3389,7 +3410,7 @@ struct EncodingVisitor; impl<'de> Visitor<'de> for EncodingVisitor { type Value = &'static Encoding; - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { formatter.write_str("a valid encoding label") } @@ -3674,7 +3695,7 @@ impl Decoder { 1, checked_mul(3, checked_div(byte_length.checked_add(1), 2)), ) { - let utf_bom = std::cmp::max(utf8_bom, utf16_bom); + let utf_bom = core::cmp::max(utf8_bom, utf16_bom); let encoding = self.encoding(); if encoding == UTF_8 || encoding == UTF_16LE || encoding == UTF_16BE { // No need to consider the internal state of the underlying decoder, @@ -3683,7 +3704,7 @@ impl Decoder { } else if let Some(non_bom) = self.variant.max_utf8_buffer_length(byte_length) { - return Some(std::cmp::max(utf_bom, non_bom)); + return Some(core::cmp::max(utf_bom, non_bom)); } } } @@ -3701,7 +3722,7 @@ impl Decoder { // because it is at start, because no data has reached it yet. return Some(utf8_bom); } else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) { - return Some(std::cmp::max(utf8_bom, non_bom)); + return Some(core::cmp::max(utf8_bom, non_bom)); } } } @@ -3727,7 +3748,7 @@ impl Decoder { // because it is at start, because no data has reached it yet. return Some(utf16_bom); } else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) { - return Some(std::cmp::max(utf16_bom, non_bom)); + return Some(core::cmp::max(utf16_bom, non_bom)); } } } @@ -3766,7 +3787,7 @@ impl Decoder { 1, checked_mul(3, checked_div(byte_length.checked_add(1), 2)), ) { - let utf_bom = std::cmp::max(utf8_bom, utf16_bom); + let utf_bom = core::cmp::max(utf8_bom, utf16_bom); let encoding = self.encoding(); if encoding == UTF_8 || encoding == UTF_16LE || encoding == UTF_16BE { // No need to consider the internal state of the underlying decoder, @@ -3776,7 +3797,7 @@ impl Decoder { .variant .max_utf8_buffer_length_without_replacement(byte_length) { - return Some(std::cmp::max(utf_bom, non_bom)); + return Some(core::cmp::max(utf_bom, non_bom)); } } } @@ -3796,7 +3817,7 @@ impl Decoder { } else if let Some(non_bom) = self.variant.max_utf8_buffer_length_without_replacement(sum) { - return Some(std::cmp::max(utf8_bom, non_bom)); + return Some(core::cmp::max(utf8_bom, non_bom)); } } } @@ -3824,7 +3845,7 @@ impl Decoder { } else if let Some(non_bom) = self.variant.max_utf8_buffer_length_without_replacement(sum) { - return Some(std::cmp::max(utf16_bom, non_bom)); + return Some(core::cmp::max(utf16_bom, non_bom)); } } } @@ -3918,7 +3939,7 @@ impl Decoder { // bytes of trailing garbage. No need to optimize non-ASCII-compatible // encodings to avoid overwriting here. if self.encoding != UTF_8 { - let max = std::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE); + let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE); while trail < max { bytes[trail] = 0; trail += 1; @@ -4008,7 +4029,7 @@ impl Decoder { // bytes of trailing garbage. No need to optimize non-ASCII-compatible // encodings to avoid overwriting here. if self.encoding != UTF_8 { - let max = std::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE); + let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE); while trail < max { bytes[trail] = 0; trail += 1; @@ -4081,7 +4102,7 @@ impl Decoder { if let Some(utf16_bom) = checked_add(1, checked_div(byte_length.checked_add(1), 2)) { - let utf_bom = std::cmp::max(utf8_bom, utf16_bom); + let utf_bom = core::cmp::max(utf8_bom, utf16_bom); let encoding = self.encoding(); if encoding == UTF_8 || encoding == UTF_16LE || encoding == UTF_16BE { // No need to consider the internal state of the underlying decoder, @@ -4090,7 +4111,7 @@ impl Decoder { } else if let Some(non_bom) = self.variant.max_utf16_buffer_length(byte_length) { - return Some(std::cmp::max(utf_bom, non_bom)); + return Some(core::cmp::max(utf_bom, non_bom)); } } } @@ -4108,7 +4129,7 @@ impl Decoder { // because it is at start, because no data has reached it yet. return Some(utf8_bom); } else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) { - return Some(std::cmp::max(utf8_bom, non_bom)); + return Some(core::cmp::max(utf8_bom, non_bom)); } } } @@ -4132,7 +4153,7 @@ impl Decoder { // because it is at start, because no data has reached it yet. return Some(utf16_bom); } else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) { - return Some(std::cmp::max(utf16_bom, non_bom)); + return Some(core::cmp::max(utf16_bom, non_bom)); } } } @@ -4267,7 +4288,7 @@ pub enum EncoderResult { impl EncoderResult { fn unmappable_from_bmp(bmp: u16) -> EncoderResult { - EncoderResult::Unmappable(::std::char::from_u32(u32::from(bmp)).unwrap()) + EncoderResult::Unmappable(::core::char::from_u32(u32::from(bmp)).unwrap()) } } @@ -4847,7 +4868,7 @@ fn checked_next_power_of_two(opt: Option) -> Option { fn checked_min(one: Option, other: Option) -> Option { if let Some(a) = one { if let Some(b) = other { - Some(::std::cmp::min(a, b)) + Some(::core::cmp::min(a, b)) } else { Some(a) } @@ -4872,7 +4893,7 @@ mod test_labels_names; #[cfg(test)] mod tests { use super::*; - use std::borrow::Cow; + use alloc::borrow::Cow; fn sniff_to_utf16( initial_encoding: &'static Encoding, @@ -5625,7 +5646,7 @@ mod tests { #[test] fn test_hash() { - let mut encodings = ::std::collections::HashSet::new(); + let mut encodings = ::alloc::collections::btree_set::BTreeSet::new(); encodings.insert(UTF_8); encodings.insert(ISO_2022_JP); assert!(encodings.contains(UTF_8)); diff --git a/src/mem.rs b/src/mem.rs index 92f57df6..e112d1d1 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -24,7 +24,9 @@ //! The FFI binding for this module are in the //! [encoding_c_mem crate](https://github.com/hsivonen/encoding_c_mem). -use std::borrow::Cow; +use alloc::borrow::Cow; +use alloc::vec::Vec; +use alloc::string::String; use super::in_inclusive_range16; use super::in_inclusive_range32; @@ -41,8 +43,8 @@ macro_rules! non_fuzz_debug_assert { cfg_if! { if #[cfg(feature = "simd-accel")] { - use ::std::intrinsics::likely; - use ::std::intrinsics::unlikely; + use ::core::intrinsics::likely; + use ::core::intrinsics::unlikely; } else { #[inline(always)] // Unsafe to match the intrinsic, which is needlessly unsafe. @@ -85,7 +87,7 @@ macro_rules! by_unit_check_alu { fn $name(buffer: &[$unit]) -> bool { let mut offset = 0usize; let mut accu = 0usize; - let unit_size = ::std::mem::size_of::<$unit>(); + let unit_size = ::core::mem::size_of::<$unit>(); let len = buffer.len(); if len >= ALU_ALIGNMENT / unit_size { // The most common reason to return `false` is for the first code @@ -157,7 +159,7 @@ macro_rules! by_unit_check_simd { fn $name(buffer: &[$unit]) -> bool { let mut offset = 0usize; let mut accu = 0usize; - let unit_size = ::std::mem::size_of::<$unit>(); + let unit_size = ::core::mem::size_of::<$unit>(); let len = buffer.len(); if len >= SIMD_STRIDE_SIZE / unit_size { // The most common reason to return `false` is for the first code @@ -248,7 +250,7 @@ cfg_if! { // only aligned SIMD (perhaps misguidedly) and needs to deal with // the last code unit in a SIMD stride being part of a valid // surrogate pair. - let unit_size = ::std::mem::size_of::(); + let unit_size = ::core::mem::size_of::(); let src = buffer.as_ptr(); let len = buffer.len(); let mut offset = 0usize; @@ -1781,7 +1783,7 @@ pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usi // src can't advance more than dst let src_left = src_len - total_read; let dst_left = dst_len - total_written; - let min_left = ::std::cmp::min(src_left, dst_left); + let min_left = ::core::cmp::min(src_left, dst_left); if let Some((non_ascii, consumed)) = unsafe { ascii_to_ascii( src_ptr.add(total_read), @@ -1850,7 +1852,7 @@ pub fn convert_latin1_to_str_partial(src: &[u8], dst: &mut str) -> (usize, usize let (read, written) = convert_latin1_to_utf8_partial(src, bytes); let len = bytes.len(); let mut trail = written; - let max = ::std::cmp::min(len, trail + MAX_STRIDE_SIZE); + let max = ::core::cmp::min(len, trail + MAX_STRIDE_SIZE); while trail < max { bytes[trail] = 0; trail += 1; @@ -1991,7 +1993,7 @@ pub fn decode_latin1<'a>(bytes: &'a [u8]) -> Cow<'a, str> { // >= makes later things optimize better than == if up_to >= bytes.len() { debug_assert_eq!(up_to, bytes.len()); - let s: &str = unsafe { ::std::str::from_utf8_unchecked(bytes) }; + let s: &str = unsafe { ::core::str::from_utf8_unchecked(bytes) }; return Cow::Borrowed(s); } let (head, tail) = bytes.split_at(up_to); @@ -3152,11 +3154,11 @@ mod tests { #[cfg_attr(miri, ignore)] // Miri is too slow fn test_is_char_bidi_thoroughly() { for i in 0..0xD800u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); assert_eq!(is_char_bidi(c), reference_is_char_bidi(c)); } for i in 0xE000..0x110000u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); assert_eq!(is_char_bidi(c), reference_is_char_bidi(c)); } } @@ -3178,14 +3180,14 @@ mod tests { fn test_is_str_bidi_thoroughly() { let mut buf = [0; 4]; for i in 0..0xD800u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); assert_eq!( is_str_bidi(c.encode_utf8(&mut buf[..])), reference_is_char_bidi(c) ); } for i in 0xE000..0x110000u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); assert_eq!( is_str_bidi(c.encode_utf8(&mut buf[..])), reference_is_char_bidi(c) @@ -3198,7 +3200,7 @@ mod tests { fn test_is_utf8_bidi_thoroughly() { let mut buf = [0; 8]; for i in 0..0xD800u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); let expect = reference_is_char_bidi(c); { let len = { @@ -3216,7 +3218,7 @@ mod tests { assert_eq!(is_utf8_bidi(&buf[..]), expect); } for i in 0xE000..0x110000u32 { - let c: char = ::std::char::from_u32(i).unwrap(); + let c: char = ::core::char::from_u32(i).unwrap(); let expect = reference_is_char_bidi(c); { let len = { diff --git a/src/simd_funcs.rs b/src/simd_funcs.rs index 54716f05..e752f476 100644 --- a/src/simd_funcs.rs +++ b/src/simd_funcs.rs @@ -16,8 +16,8 @@ use packed_simd::FromBits; #[inline(always)] pub unsafe fn load16_unaligned(ptr: *const u8) -> u8x16 { - let mut simd = ::std::mem::uninitialized(); - ::std::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16); + let mut simd = ::core::mem::uninitialized(); + ::core::ptr::copy_nonoverlapping(ptr, &mut simd as *mut u8x16 as *mut u8, 16); simd } @@ -29,7 +29,7 @@ pub unsafe fn load16_aligned(ptr: *const u8) -> u8x16 { #[inline(always)] pub unsafe fn store16_unaligned(ptr: *mut u8, s: u8x16) { - ::std::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16); + ::core::ptr::copy_nonoverlapping(&s as *const u8x16 as *const u8, ptr, 16); } #[allow(dead_code)] @@ -40,8 +40,8 @@ pub unsafe fn store16_aligned(ptr: *mut u8, s: u8x16) { #[inline(always)] pub unsafe fn load8_unaligned(ptr: *const u16) -> u16x8 { - let mut simd = ::std::mem::uninitialized(); - ::std::ptr::copy_nonoverlapping(ptr as *const u8, &mut simd as *mut u16x8 as *mut u8, 16); + let mut simd = ::core::mem::uninitialized(); + ::core::ptr::copy_nonoverlapping(ptr as *const u8, &mut simd as *mut u16x8 as *mut u8, 16); simd } @@ -53,7 +53,7 @@ pub unsafe fn load8_aligned(ptr: *const u16) -> u16x8 { #[inline(always)] pub unsafe fn store8_unaligned(ptr: *mut u16, s: u16x8) { - ::std::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16); + ::core::ptr::copy_nonoverlapping(&s as *const u16x8 as *const u8, ptr as *mut u8, 16); } #[allow(dead_code)] @@ -64,18 +64,18 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { cfg_if! { if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { - use std::arch::x86_64::__m128i; - use std::arch::x86_64::_mm_movemask_epi8; - use std::arch::x86_64::_mm_packus_epi16; + use core::arch::x86_64::__m128i; + use core::arch::x86_64::_mm_movemask_epi8; + use core::arch::x86_64::_mm_packus_epi16; } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { - use std::arch::x86::__m128i; - use std::arch::x86::_mm_movemask_epi8; - use std::arch::x86::_mm_packus_epi16; + use core::arch::x86::__m128i; + use core::arch::x86::_mm_movemask_epi8; + use core::arch::x86::_mm_packus_epi16; } else if #[cfg(target_arch = "aarch64")]{ - use std::arch::aarch64::uint8x16_t; - use std::arch::aarch64::uint16x8_t; - use std::arch::aarch64::vmaxvq_u8; - use std::arch::aarch64::vmaxvq_u16; + use core::arch::aarch64::uint8x16_t; + use core::arch::aarch64::uint16x8_t; + use core::arch::aarch64::vmaxvq_u8; + use core::arch::aarch64::vmaxvq_u16; } else { } @@ -446,7 +446,7 @@ mod tests { ]; let mut alu = 0u64; unsafe { - ::std::ptr::copy_nonoverlapping(input.as_ptr(), &mut alu as *mut u64 as *mut u8, 8); + ::core::ptr::copy_nonoverlapping(input.as_ptr(), &mut alu as *mut u64 as *mut u8, 8); } let masked = alu & 0x8080808080808080; assert_eq!(masked.trailing_zeros(), 39); diff --git a/src/single_byte.rs b/src/single_byte.rs index b5e75584..f94884c4 100644 --- a/src/single_byte.rs +++ b/src/single_byte.rs @@ -432,7 +432,7 @@ impl SingleByteEncoder { } // The next code unit is a low surrogate. let astral: char = unsafe { - ::std::char::from_u32_unchecked( + ::core::char::from_u32_unchecked( (u32::from(non_ascii) << 10) + second - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32), ) diff --git a/src/utf_8.rs b/src/utf_8.rs index e9c92672..d0355074 100644 --- a/src/utf_8.rs +++ b/src/utf_8.rs @@ -17,8 +17,8 @@ use crate::variant::*; cfg_if! { if #[cfg(feature = "simd-accel")] { - use ::std::intrinsics::unlikely; - use ::std::intrinsics::likely; + use ::core::intrinsics::unlikely; + use ::core::intrinsics::likely; } else { #[inline(always)] // Unsafe to match the intrinsic, which is needlessly unsafe. @@ -236,7 +236,7 @@ pub fn convert_utf8_to_utf16_up_to_invalid(src: &[u8], dst: &mut [u16]) -> (usiz let mut byte = { let src_remaining = &src[read..]; let dst_remaining = &mut dst[written..]; - let length = ::std::cmp::min(src_remaining.len(), dst_remaining.len()); + let length = ::core::cmp::min(src_remaining.len(), dst_remaining.len()); match unsafe { ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) } { From 2ae49dabedd1c0301578645489dd16ec3dbaf357 Mon Sep 17 00:00:00 2001 From: fschutt Date: Thu, 4 Feb 2021 10:56:42 +0100 Subject: [PATCH 2/2] Revert wrong replacements of "std::io" with "core::io" in docs --- src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 02f6a368..e0f280f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,11 +41,11 @@ //! The [repository is on GitHub](https://github.com/hsivonen/encoding_rs). The //! [crate is available on crates.io](https://crates.io/crates/encoding_rs). //! -//! # Integration with `core::io` +//! # Integration with `std::io` //! -//! This crate doesn't implement traits from `core::io`. However, for the case of -//! wrapping a `core::io::Read` in a decoder that implements `core::io::Read` and -//! presents the data from the wrapped `core::io::Read` as UTF-8 is addressed by +//! This crate doesn't implement traits from `std::io`. However, for the case of +//! wrapping a `std::io::Read` in a decoder that implements `std::io::Read` and +//! presents the data from the wrapped `std::io::Read` as UTF-8 is addressed by //! the [`encoding_rs_io`](https://docs.rs/encoding_rs_io/) crate. //! //! # Examples @@ -87,7 +87,7 @@ //! // Very short output buffer to demonstrate the output buffer getting full. //! // Normally, you'd use something like `[0u8; 2048]`. //! let mut buffer_bytes = [0u8; 8]; -//! let mut buffer: &mut str = core::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap(); +//! let mut buffer: &mut str = std::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap(); //! //! // How many bytes in the buffer currently hold significant data. //! let mut bytes_in_buffer = 0usize;