Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of bitmap from_trusted.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 5, 2021
1 parent 1f95def commit c9f5e9c
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 29 deletions.
88 changes: 59 additions & 29 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::hint::unreachable_unchecked;
use std::iter::FromIterator;

use crate::bitmap::utils::merge_reversed;
Expand Down Expand Up @@ -309,23 +310,58 @@ impl FromIterator<bool> for MutableBitmap {
}
}

/// # Safety
/// The iterator must be trustedLen and its len must be least `len`.
#[inline]
fn extend<I: Iterator<Item = bool>>(buffer: &mut [u8], length: usize, mut iterator: I) {
let chunks = length / 8;
let reminder = length % 8;

buffer[..chunks].iter_mut().for_each(|byte| {
(0..8).for_each(|i| {
*byte = set(*byte, i, iterator.next().unwrap());
})
});

if reminder != 0 {
let last = &mut buffer[chunks];
iterator.enumerate().for_each(|(i, value)| {
*last = set(*last, i, value);
});
unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator<Item = bool>) -> u8 {
let mut byte_accum: u8 = 0;
let mut mask: u8 = 1;
for _ in 0..len {
let value = match iterator.next() {
Some(value) => value,
None => unsafe { unreachable_unchecked() },
};

byte_accum |= match value {
true => mask,
false => 0,
};
mask <<= 1;
}
byte_accum
}

/// Extends the [`MutableBuffer`] from `iterator`
/// # Safety
/// The iterator MUST be [`TrustedLen`].
#[inline]
unsafe fn extend_aligned_trusted_iter_unchecked(
buffer: &mut MutableBuffer<u8>,
mut iterator: impl Iterator<Item = bool>,
) -> usize {
let additional_bits = iterator.size_hint().1.unwrap();
let chunks = additional_bits / 8;
let remainder = additional_bits % 8;

let additional = chunks + (remainder > 0) as usize;
buffer.reserve(additional);

if chunks > 0 {
for _ in 0..chunks {
// Soundness: iterator lenght is at least chunks * 8
let byte = unsafe { get_byte_unchecked(8, &mut iterator) };
// Soundness: capacity was allocated above
unsafe { buffer.push_unchecked(byte) };
}
}

if remainder > 0 {
// Soundness: iterator has exactly remainder items.
let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) };
// Soundness: reserve above took remainder into account
unsafe { buffer.push_unchecked(byte) };
}
additional_bits
}

impl MutableBitmap {
Expand All @@ -339,6 +375,7 @@ impl MutableBitmap {
/// Extends `self` from an iterator of trusted len.
/// # Safety
/// The caller must guarantee that the iterator has a trusted len.
#[inline]
pub unsafe fn extend_from_trusted_len_iter_unchecked<I: Iterator<Item = bool>>(
&mut self,
mut iterator: I,
Expand Down Expand Up @@ -379,40 +416,33 @@ impl MutableBitmap {
// everything is aligned; proceed with the bulk operation
debug_assert_eq!(self.length % 8, 0);

self.buffer.extend_constant((length + 7) / 8, 0);

extend(&mut self.buffer[self.length / 8..], length, iterator);
unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) };
self.length += length;
}

/// Creates a new [`MutableBitmap`] from an iterator of booleans.
/// # Safety
/// The iterator must report an accurate length.
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I>(iterator: I) -> Self
where
I: Iterator<Item = bool>,
{
let length = iterator.size_hint().1.unwrap();
let mut buffer = MutableBuffer::<u8>::new();

let mut buffer = MutableBuffer::<u8>::from_len_zeroed((length + 7) / 8);

extend(&mut buffer, length, iterator);
let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator);

Self { buffer, length }
}

/// Creates a new [`MutableBitmap`] from an iterator of booleans.
#[inline]
pub fn from_trusted_len_iter<I>(iterator: I) -> Self
where
I: TrustedLen<Item = bool>,
{
let length = iterator.size_hint().1.unwrap();

let mut buffer = MutableBuffer::<u8>::from_len_zeroed((length + 7) / 8);

extend(&mut buffer, length, iterator);

Self { buffer, length }
// Safety: Iterator is `TrustedLen`
unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
}

/// Creates a new [`MutableBitmap`] from an iterator of booleans.
Expand Down
7 changes: 7 additions & 0 deletions tests/it/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ use arrow2::{
buffer::MutableBuffer,
};

#[test]
fn from_slice() {
let slice = &[true, false, true];
let a = MutableBitmap::from(slice);
assert_eq!(a.iter().collect::<Vec<_>>(), slice);
}

#[test]
fn trusted_len() {
let data = vec![true; 65];
Expand Down

0 comments on commit c9f5e9c

Please sign in to comment.