Skip to content

Commit

Permalink
par_iter for Range<char>
Browse files Browse the repository at this point in the history
  • Loading branch information
CAD97 committed Jun 18, 2020
1 parent a798276 commit b878116
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
79 changes: 79 additions & 0 deletions src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use crate::iter::plumbing::*;
use crate::iter::*;
use std::char;
use std::ops::Range;
use std::usize;

Expand Down Expand Up @@ -223,6 +224,84 @@ unindexed_range_impl! {i64, u64}
unindexed_range_impl! {u128, u128}
unindexed_range_impl! {i128, u128}

// char is special because of the surrogate range hole
impl ParallelIterator for Iter<char> {
type Item = char;

fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.drive(consumer)
}

fn opt_len(&self) -> Option<usize> {
Some(self.len())
}
}

impl IndexedParallelIterator for Iter<char> {
// Split at the surrogate range first if we're allowed to
fn drive<C>(self, consumer: C) -> C::Result
where
C: Consumer<Self::Item>,
{
let start = self.range.start as u32;
let end = self.range.end as u32;
if start < 0xD800 && 0xE000 < end {
// chain the before and after surrogate range fragments
(start..0xD800)
.into_par_iter()
.chain(0xE000..end)
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.drive(consumer)
} else {
// no surrogate range to worry about
(start..end)
.into_par_iter()
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.drive(consumer)
}
}

fn len(&self) -> usize {
// Taken from <char as Step>::steps_between
let start = self.range.start as u32;
let end = self.range.end as u32;
if start < end {
let mut count = end - start;
if start < 0xD800 && 0xE000 <= end {
count -= 0x800
}
count as usize
} else {
0
}
}

fn with_producer<CB>(self, callback: CB) -> CB::Output
where
CB: ProducerCallback<Self::Item>,
{
let start = self.range.start as u32;
let end = self.range.end as u32;
if start < 0xD800 && 0xE000 < end {
// chain the before and after surrogate range fragments
(start..0xD800)
.into_par_iter()
.chain(0xE000..end)
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.with_producer(callback)
} else {
// no surrogate range to worry about
(start..end)
.into_par_iter()
.map(|codepoint| unsafe { char::from_u32_unchecked(codepoint) })
.with_producer(callback)
}
}
}

#[test]
fn check_range_split_at_overflow() {
// Note, this split index overflows i8!
Expand Down
16 changes: 16 additions & 0 deletions tests/chars.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use rayon::prelude::*;
use std::char;
use std::collections::HashSet;

#[test]
fn half_open_correctness() {
let low = char::from_u32(0xD800 - 10).unwrap();
let high = char::from_u32(0xD800 - 10).unwrap();

let range = low..high;

let std_iter: HashSet<char> = range.clone().collect();
let par_iter: HashSet<char> = range.into_par_iter().collect();

assert_eq!(std_iter, par_iter);
}

0 comments on commit b878116

Please sign in to comment.