Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize core::char::CaseMappingIter #122616

Merged
merged 2 commits into from
Mar 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
309 changes: 181 additions & 128 deletions library/core/src/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use crate::ascii;
use crate::error::Error;
use crate::escape;
use crate::fmt::{self, Write};
use crate::iter::FusedIterator;
use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce};
use crate::num::NonZero;

pub(crate) use self::methods::EscapeDebugExtArgs;
Expand Down Expand Up @@ -373,176 +373,229 @@ impl fmt::Display for EscapeDebug {
}
}

/// Returns an iterator that yields the lowercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_lowercase`]: char::to_lowercase
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToLowercase(CaseMappingIter);
macro_rules! casemappingiter_impls {
($(#[$attr:meta])* $ITER_NAME:ident) => {
$(#[$attr])*
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct $ITER_NAME(CaseMappingIter);

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for $ITER_NAME {
type Item = char;
fn next(&mut self) -> Option<char> {
self.0.next()
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToLowercase {
type Item = char;
fn next(&mut self) -> Option<char> {
self.0.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}

#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
impl DoubleEndedIterator for ToLowercase {
fn next_back(&mut self) -> Option<char> {
self.0.next_back()
}
}
fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.0.fold(init, fold)
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToLowercase {}
fn count(self) -> usize {
self.0.count()
}

#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
impl ExactSizeIterator for ToLowercase {}
fn last(self) -> Option<Self::Item> {
self.0.last()
}

/// Returns an iterator that yields the uppercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_uppercase`]: char::to_uppercase
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToUppercase(CaseMappingIter);
fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
self.0.advance_by(n)
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToUppercase {
type Item = char;
fn next(&mut self) -> Option<char> {
self.0.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
// SAFETY: just forwarding requirements to caller
unsafe { self.0.__iterator_get_unchecked(idx) }
}
}

#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
impl DoubleEndedIterator for ToUppercase {
fn next_back(&mut self) -> Option<char> {
self.0.next_back()
#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
impl DoubleEndedIterator for $ITER_NAME {
fn next_back(&mut self) -> Option<char> {
self.0.next_back()
}

fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.0.rfold(init, rfold)
}

fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
self.0.advance_back_by(n)
}
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for $ITER_NAME {}

#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
impl ExactSizeIterator for $ITER_NAME {
fn len(&self) -> usize {
self.0.len()
}

fn is_empty(&self) -> bool {
self.0.is_empty()
}
}

// SAFETY: forwards to inner `array::IntoIter`
#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl TrustedLen for $ITER_NAME {}

// SAFETY: forwards to inner `array::IntoIter`
#[doc(hidden)]
#[unstable(feature = "std_internals", issue = "none")]
unsafe impl TrustedRandomAccessNoCoerce for $ITER_NAME {
const MAY_HAVE_SIDE_EFFECT: bool = false;
}

// SAFETY: this iter has no subtypes/supertypes
#[doc(hidden)]
#[unstable(feature = "std_internals", issue = "none")]
unsafe impl TrustedRandomAccess for $ITER_NAME {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for $ITER_NAME {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
}
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToUppercase {}
casemappingiter_impls! {
/// Returns an iterator that yields the lowercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_lowercase`]: char::to_lowercase
ToLowercase
}

#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
impl ExactSizeIterator for ToUppercase {}
casemappingiter_impls! {
/// Returns an iterator that yields the uppercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_uppercase`]: char::to_uppercase
ToUppercase
}

#[derive(Debug, Clone)]
enum CaseMappingIter {
Three(char, char, char),
Two(char, char),
One(char),
Zero,
}
struct CaseMappingIter(core::array::IntoIter<char, 3>);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsure: are these something that people would commonly keep in a struct? Because while I do like the array::IntoIter phrasing of this, it means that iter will be 2×usize bigger than it used to be.

Feels like probably not, more likely something they'd just use directly and toss away, rather than use partially and keep around, but figured I'd mention it just in case.

(I wish we had an ArraySize<N> type that could be smaller for small N.)


impl CaseMappingIter {
#[inline]
fn new(chars: [char; 3]) -> CaseMappingIter {
let mut iter = chars.into_iter();
if chars[2] == '\0' {
iter.next_back();
if chars[1] == '\0' {
CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
} else {
CaseMappingIter::Two(chars[0], chars[1])
iter.next_back();

// Deliberately don't check `chars[0]`,
// as '\0' lowercases to itself
}
} else {
CaseMappingIter::Three(chars[0], chars[1], chars[2])
}
CaseMappingIter(iter)
}
}

impl Iterator for CaseMappingIter {
type Item = char;

fn next(&mut self) -> Option<char> {
match *self {
CaseMappingIter::Three(a, b, c) => {
*self = CaseMappingIter::Two(b, c);
Some(a)
}
CaseMappingIter::Two(b, c) => {
*self = CaseMappingIter::One(c);
Some(b)
}
CaseMappingIter::One(c) => {
*self = CaseMappingIter::Zero;
Some(c)
}
CaseMappingIter::Zero => None,
}
self.0.next()
}

fn size_hint(&self) -> (usize, Option<usize>) {
let size = match self {
CaseMappingIter::Three(..) => 3,
CaseMappingIter::Two(..) => 2,
CaseMappingIter::One(_) => 1,
CaseMappingIter::Zero => 0,
};
(size, Some(size))
self.0.size_hint()
}

fn fold<Acc, Fold>(self, init: Acc, fold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.0.fold(init, fold)
}

fn count(self) -> usize {
self.0.count()
}

fn last(self) -> Option<Self::Item> {
self.0.last()
}

fn advance_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
self.0.advance_by(n)
}

unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item {
// SAFETY: just forwarding requirements to caller
unsafe { self.0.__iterator_get_unchecked(idx) }
}
}

impl DoubleEndedIterator for CaseMappingIter {
fn next_back(&mut self) -> Option<char> {
match *self {
CaseMappingIter::Three(a, b, c) => {
*self = CaseMappingIter::Two(a, b);
Some(c)
}
CaseMappingIter::Two(b, c) => {
*self = CaseMappingIter::One(b);
Some(c)
}
CaseMappingIter::One(c) => {
*self = CaseMappingIter::Zero;
Some(c)
}
CaseMappingIter::Zero => None,
}
self.0.next_back()
}
}

impl fmt::Display for CaseMappingIter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
CaseMappingIter::Three(a, b, c) => {
f.write_char(a)?;
f.write_char(b)?;
f.write_char(c)
}
CaseMappingIter::Two(b, c) => {
f.write_char(b)?;
f.write_char(c)
}
CaseMappingIter::One(c) => f.write_char(c),
CaseMappingIter::Zero => Ok(()),
}
fn rfold<Acc, Fold>(self, init: Acc, rfold: Fold) -> Acc
where
Fold: FnMut(Acc, Self::Item) -> Acc,
{
self.0.rfold(init, rfold)
}

fn advance_back_by(&mut self, n: usize) -> Result<(), NonZero<usize>> {
self.0.advance_back_by(n)
}
}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToLowercase {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
impl ExactSizeIterator for CaseMappingIter {
fn len(&self) -> usize {
self.0.len()
}

fn is_empty(&self) -> bool {
self.0.is_empty()
}
}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToUppercase {
impl FusedIterator for CaseMappingIter {}

// SAFETY: forwards to inner `array::IntoIter`
unsafe impl TrustedLen for CaseMappingIter {}

// SAFETY: forwards to inner `array::IntoIter`
unsafe impl TrustedRandomAccessNoCoerce for CaseMappingIter {
const MAY_HAVE_SIDE_EFFECT: bool = false;
}

// SAFETY: `CaseMappingIter` has no subtypes/supertypes
unsafe impl TrustedRandomAccess for CaseMappingIter {}

impl fmt::Display for CaseMappingIter {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
for c in self.0.clone() {
f.write_char(c)?;
}
Ok(())
}
}

Expand Down
Loading