Skip to content

Commit

Permalink
par_iter for RangeInclusive<char>
Browse files Browse the repository at this point in the history
  • Loading branch information
CAD97 committed Jun 18, 2020
1 parent a04c20a commit 4c99c56
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 0 deletions.
88 changes: 88 additions & 0 deletions src/range_inclusive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use crate::iter::plumbing::*;
use crate::iter::*;
use std::char;
use std::ops::RangeInclusive;

/// Parallel iterator over an inclusive range, implemented for all integer types.
Expand Down Expand Up @@ -147,6 +148,93 @@ parallel_range_impl! {i64}
parallel_range_impl! {u128}
parallel_range_impl! {i128}

// char is special
impl ParallelIterator for Iter<char> {
type Item = char;

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.drive(consumer)
}

fn opt_len(&self) -> Option<usize> {
Some(self.len())
}
}

// Range<u32> is broken on 16 bit platforms, may as well benefit from it
impl IndexedParallelIterator for Iter<char> {
// Split at the surrogate range first if we're allowed to
fn drive<C>(self, consumer: C) -> C::Result
where
C: Consumer<Self::Item>,
{
if let Some((start, end)) = self.bounds() {
let start = start as u32;
let end = end as u32;
if start < 0xD800 && 0xE000 <= end {
// chain the before and after surrogate range fragments
(start..0xD800)
.into_par_iter()
.chain(0xE000..end + 1) // cannot use RangeInclusive, so add one to end
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.drive(consumer)
} else {
// no surrogate range to worry about
(start..end + 1) // cannot use RangeInclusive, so add one to end
.into_par_iter()
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.drive(consumer)
}
} else {
empty().into_par_iter().drive(consumer)
}
}

fn len(&self) -> usize {
if let Some((start, end)) = self.bounds() {
// Taken from <char as Step>::steps_between
let start = start as u32;
let end = end as u32;
let mut count = end - start;
if start < 0xD800 && 0xE000 <= end {
count -= 0x800
}
(count + 1) as usize // add one for inclusive
} else {
0
}
}

fn with_producer<CB>(self, callback: CB) -> CB::Output
where
CB: ProducerCallback<Self::Item>,
{
if let Some((start, end)) = self.bounds() {
let start = start as u32;
let end = end as u32;
if start < 0xD800 && 0xE000 <= end {
// chain the before and after surrogate range fragments
(start..0xD800)
.into_par_iter()
.chain(0xE000..end + 1) // cannot use RangeInclusive, so add one to end
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.with_producer(callback)
} else {
// no surrogate range to worry about
(start..end + 1) // cannot use RangeInclusive, so add one to end
.into_par_iter()
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.with_producer(callback)
}
} else {
empty().into_par_iter().with_producer(callback)
}
}
}

#[test]
#[cfg(target_pointer_width = "64")]
fn test_u32_opt_len() {
Expand Down
13 changes: 13 additions & 0 deletions tests/chars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,16 @@ fn half_open_correctness() {

assert_eq!(std_iter, par_iter);
}

#[test]
fn closed_correctness() {
let low = char::from_u32(0xD800 - 10).unwrap();
let high = char::from_u32(0xE000 + 10).unwrap();

let range = low..=high;

let std_iter: HashSet<char> = range.clone().collect();
let par_iter: HashSet<char> = range.into_par_iter().collect();

assert_eq!(std_iter, par_iter);
}

0 comments on commit 4c99c56

Please sign in to comment.