From c30fa92a0aac87ba27df261ece44602f027a1800 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 30 Dec 2015 10:08:28 +0100 Subject: [PATCH 1/5] `EscapeUnicode` and `EscapeDefault` are `ExactSizeIterator`s In #28662, `size_hint` was made exact for `EscapeUnicode` and `EscapeDefault`, but neither was marked as `ExactSizeIterator`. --- src/libcore/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 6a2331dddcf0e..25d90cc6f3a4a 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -483,6 +483,9 @@ impl Iterator for EscapeUnicode { } } +#[stable(feature = "exact_size_escape", since = "1.11.0")] +impl ExactSizeIterator for EscapeUnicode { } + /// An iterator that yields the literal escape code of a `char`. /// /// This `struct` is created by the [`escape_default()`] method on [`char`]. See @@ -578,6 +581,9 @@ impl Iterator for EscapeDefault { } } +#[stable(feature = "exact_size_escape", since = "1.11.0")] +impl ExactSizeIterator for EscapeDefault { } + /// An iterator over `u8` entries represending the UTF-8 encoding of a `char` /// value. /// From baa9680a3449a585481bd4b124f3e1f108262877 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 30 Dec 2015 16:42:52 +0100 Subject: [PATCH 2/5] Implement `count` for `EscapeDefault` and `EscapeUnicode` Trivial implementation, as both are `ExactSizeIterator`s. Part of #24214. --- src/libcore/char.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 25d90cc6f3a4a..f803b36cede96 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -470,6 +470,11 @@ impl Iterator for EscapeUnicode { (n, Some(n)) } + #[inline] + fn count(self) -> usize { + self.len() + } + fn last(self) -> Option { match self.state { EscapeUnicodeState::Done => None, @@ -535,13 +540,9 @@ impl Iterator for EscapeDefault { } } + #[inline] fn count(self) -> usize { - match self.state { - EscapeDefaultState::Char(_) => 1, - EscapeDefaultState::Unicode(iter) => iter.count(), - EscapeDefaultState::Done => 0, - EscapeDefaultState::Backslash(_) => 2, - } + self.len() } fn nth(&mut self, n: usize) -> Option { From da03950f62a43ff3ca32f931a3edecc71a214f3b Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Mon, 18 Jan 2016 17:36:12 +0100 Subject: [PATCH 3/5] Move length computation to `ExactSizeIterator` impls and reuse it in `size_hint`. --- src/libcore/char.rs | 60 +++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index f803b36cede96..38337c7493eef 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -413,12 +413,12 @@ pub struct EscapeUnicode { #[derive(Clone, Debug)] enum EscapeUnicodeState { - Backslash, - Type, - LeftBrace, - Value, - RightBrace, Done, + RightBrace, + Value, + LeftBrace, + Type, + Backslash, } #[stable(feature = "rust1", since = "1.0.0")] @@ -457,16 +457,9 @@ impl Iterator for EscapeUnicode { } } + #[inline] fn size_hint(&self) -> (usize, Option) { - let n = match self.state { - EscapeUnicodeState::Backslash => 5, - EscapeUnicodeState::Type => 4, - EscapeUnicodeState::LeftBrace => 3, - EscapeUnicodeState::Value => 2, - EscapeUnicodeState::RightBrace => 1, - EscapeUnicodeState::Done => 0, - }; - let n = n + self.hex_digit_idx; + let n = self.len(); (n, Some(n)) } @@ -489,7 +482,20 @@ impl Iterator for EscapeUnicode { } #[stable(feature = "exact_size_escape", since = "1.11.0")] -impl ExactSizeIterator for EscapeUnicode { } +impl ExactSizeIterator for EscapeUnicode { + #[inline] + fn len(&self) -> usize { + // The match is a single memory access with no branching + self.hex_digit_idx + match self.state { + EscapeUnicodeState::Done => 0, + EscapeUnicodeState::RightBrace => 1, + EscapeUnicodeState::Value => 2, + EscapeUnicodeState::LeftBrace => 3, + EscapeUnicodeState::Type => 4, + EscapeUnicodeState::Backslash => 5, + } + } +} /// An iterator that yields the literal escape code of a `char`. /// @@ -506,9 +512,9 @@ pub struct EscapeDefault { #[derive(Clone, Debug)] enum EscapeDefaultState { - Backslash(char), - Char(char), Done, + Char(char), + Backslash(char), Unicode(EscapeUnicode), } @@ -531,13 +537,10 @@ impl Iterator for EscapeDefault { } } + #[inline] fn size_hint(&self) -> (usize, Option) { - match self.state { - EscapeDefaultState::Char(_) => (1, Some(1)), - EscapeDefaultState::Backslash(_) => (2, Some(2)), - EscapeDefaultState::Unicode(ref iter) => iter.size_hint(), - EscapeDefaultState::Done => (0, Some(0)), - } + let n = self.len(); + (n, Some(n)) } #[inline] @@ -583,7 +586,16 @@ impl Iterator for EscapeDefault { } #[stable(feature = "exact_size_escape", since = "1.11.0")] -impl ExactSizeIterator for EscapeDefault { } +impl ExactSizeIterator for EscapeDefault { + fn len(&self) -> usize { + match self.state { + EscapeDefaultState::Done => 0, + EscapeDefaultState::Char(_) => 1, + EscapeDefaultState::Backslash(_) => 2, + EscapeDefaultState::Unicode(ref iter) => iter.len(), + } + } +} /// An iterator over `u8` entries represending the UTF-8 encoding of a `char` /// value. From 41950c64a1bcb7025b42dde05c3cec4c3993f293 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Thu, 26 May 2016 10:04:05 +0200 Subject: [PATCH 4/5] Explain the order of the enumeration items Simply a micro-optimization to reduce code size and to open up inlining opportunities. --- src/libcore/char.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 38337c7493eef..d80b456181ae4 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -411,6 +411,9 @@ pub struct EscapeUnicode { hex_digit_idx: usize, } +// The enum values are ordered so that their representation is the +// same as the remaining length (besides the hexadecimal digits). This +// likely makes `len()` a single load from memory) and inline-worth. #[derive(Clone, Debug)] enum EscapeUnicodeState { Done, From 6b5e86b0ce543c60e201f95d57d720181281f1da Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Thu, 26 May 2016 10:54:58 +0200 Subject: [PATCH 5/5] Extend the test for `EscapeUnicode` to also check that it is legitimately an `ExactSizeIterator`. --- src/libcoretest/char.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index e959e71daf73f..7da876b945947 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -276,6 +276,12 @@ fn eu_iterator_specializations() { // Check last assert_eq!(iter.clone().last(), Some('}')); + // Check len + assert_eq!(iter.len(), len - offset); + + // Check size_hint (= len in ExactSizeIterator) + assert_eq!(iter.size_hint(), (iter.len(), Some(iter.len()))); + // Check counting assert_eq!(iter.clone().count(), len - offset);