diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 9805703120b..5989e30491c 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -5,8 +5,14 @@ use crate::{trusted_len::TrustedLen, types::NativeType}; use super::bytes::Bytes; use super::mutable::MutableBuffer; -/// Buffer represents a contiguous memory region that can be shared with other buffers and across -/// thread boundaries. +/// [`Buffer`] is a contiguous memory region that can +/// be shared across thread boundaries. +/// The easiest way to think about `Buffer` is being equivalent to +/// an immutable `Vec`, with the following differences: +/// * `T` must be [`NativeType`] +/// * clone is `O(1)` +/// * memory is sharable across thread boundaries (it is under an `Arc`) +/// * it supports external allocated memory (FFI) #[derive(Clone, PartialEq)] pub struct Buffer { /// the internal byte buffer. diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index 1969b5a982c..a146c6fae62 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -1,11 +1,5 @@ -//! This module contains core functionality to handle memory in this crate. -//! -//! The core containers of this module are [`MutableBuffer`] and [`Buffer`]. -//! [`MutableBuffer`] is like [`Vec`], with the following main differences: -//! * it only supports types that implement [`super::types::NativeType`] -//! * it allocates memory along cache lines. -//! * it is not clonable. -//! [`Buffer`] is the immutable counterpart of [`MutableBuffer`]. +//! Contains containers for all Arrow sized types (e.g. `i32`), +//! [`Buffer`] and [`MutableBuffer`]. mod immutable; mod mutable; diff --git a/src/buffer/mutable.rs b/src/buffer/mutable.rs index 43439161bf3..81d43b16d55 100644 --- a/src/buffer/mutable.rs +++ b/src/buffer/mutable.rs @@ -18,7 +18,7 @@ fn capacity_multiple_of_64(capacity: usize) -> usize { util::round_upto_multiple_of_64(capacity * size_of::()) / size_of::() } -/// A [`MutableBuffer`] is this crates' interface to store types that are byte-like, such as `i32`. +/// A [`MutableBuffer`] is this crates' interface to store types that are byte-like such as `i32`. /// It behaves like a [`Vec`], with the following differences: /// * memory is allocated along cache lines and in multiple of 64 bytes. /// * it can only hold types supported by the arrow format (`u8-u64`, `i8-i128`, `f32,f64` and [`crate::types::days_ms`]) @@ -29,6 +29,7 @@ fn capacity_multiple_of_64(capacity: usize) -> usize { /// let mut buffer = MutableBuffer::::new(); /// buffer.push(256); /// buffer.extend_from_slice(&[1]); +/// assert_eq!(buffer.as_slice(), &[256, 1]) /// let buffer: Buffer = buffer.into(); /// assert_eq!(buffer.as_slice(), &[256, 1]) /// ``` @@ -76,8 +77,8 @@ impl MutableBuffer { } } - /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` where - /// all bytes are guaranteed to be `0u8`. + /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` + /// where data is zeroed. /// # Example /// ``` /// # use arrow2::buffer::{Buffer, MutableBuffer}; @@ -165,13 +166,13 @@ impl MutableBuffer { self.len = new_len; } - /// Returns whether this buffer is empty or not. + /// Returns whether this buffer is empty. #[inline] pub fn is_empty(&self) -> bool { self.len == 0 } - /// Returns the length (the number of bytes written) in this buffer. + /// Returns the length (the number of items) in this buffer. /// The invariant `buffer.len() <= buffer.capacity()` is always upheld. #[inline] pub fn len(&self) -> usize { @@ -226,7 +227,7 @@ impl MutableBuffer { self.ptr.as_ptr() } - /// Extends this buffer from a slice of items that can be represented in bytes, increasing its capacity if needed. + /// Extends this buffer from a slice of items, increasing its capacity if needed. /// # Example /// ``` /// # use arrow2::buffer::MutableBuffer; @@ -246,7 +247,7 @@ impl MutableBuffer { self.len += additional; } - /// Extends the buffer with a new item, increasing its capacity if needed. + /// Pushes a new item to the buffer, increasing its capacity if needed. /// # Example /// ``` /// # use arrow2::buffer::MutableBuffer; @@ -264,9 +265,9 @@ impl MutableBuffer { self.len += 1; } - /// Extends the buffer with a new item, without checking for sufficient capacity + /// Extends the buffer with a new item without checking for sufficient capacity /// Safety - /// Caller must ensure that the capacity()-len()>=size_of() + /// Caller must ensure that `self.capacity() - self.len() >= 1` #[inline] pub(crate) unsafe fn push_unchecked(&mut self, item: T) { let dst = self.ptr.as_ptr().add(self.len); @@ -274,14 +275,19 @@ impl MutableBuffer { self.len += 1; } + /// Sets the length of this buffer. + /// # Panic + /// Panics iff `len > capacity`. /// # Safety - /// The caller must ensure that the buffer was properly initialized up to `len`. + /// The caller must ensure no reads are performed on any + /// item within `[len, capacity - len]` #[inline] pub unsafe fn set_len(&mut self, len: usize) { assert!(len <= self.capacity()); self.len = len; } + /// Extends this buffer by `additional` items of value `value`. #[inline] pub fn extend_constant(&mut self, additional: usize, value: T) { self.resize(self.len() + additional, value) @@ -334,21 +340,14 @@ unsafe fn reallocate( impl Extend for MutableBuffer { fn extend>(&mut self, iter: T) { - let iterator = iter.into_iter(); - self.extend_from_iter(iterator) - } -} - -impl MutableBuffer { - #[inline] - fn extend_from_iter>(&mut self, mut iterator: I) { + let mut iterator = iter.into_iter(); let (lower, _) = iterator.size_hint(); let additional = lower; self.reserve(additional); // this is necessary because of https://github.com/rust-lang/rust/issues/32155 let mut len = SetLenOnDrop::new(&mut self.len); - let mut dst = unsafe { self.ptr.as_ptr().add(len.local_len) as *mut T }; + let mut dst = unsafe { self.ptr.as_ptr().add(len.local_len) as *mut A }; let capacity = self.capacity; while len.local_len < capacity { @@ -366,7 +365,9 @@ impl MutableBuffer { iterator.for_each(|item| self.push(item)); } +} +impl MutableBuffer { /// Extends `self` from a [`TrustedLen`] iterator. #[inline] pub fn extend_from_trusted_len_iter>(&mut self, iterator: I) { @@ -409,7 +410,7 @@ impl MutableBuffer { /// # use arrow2::buffer::MutableBuffer; /// let v = vec![1u32]; /// let iter = v.iter().map(|x| x * 2); - /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) }; + /// let buffer = MutableBuffer::from_trusted_len_iter(iter); /// assert_eq!(buffer.len(), 1) /// ``` /// # Safety @@ -428,14 +429,6 @@ impl MutableBuffer { /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length. /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. - /// # Example - /// ``` - /// # use arrow2::buffer::MutableBuffer; - /// let v = vec![1u32]; - /// let iter = v.iter().map(|x| x * 2); - /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter(iter) }; - /// assert_eq!(buffer.len(), 1) - /// ``` /// # Safety /// This method assumes that the iterator's size is correct and is undefined behavior /// to use it on an iterator that reports an incorrect length. @@ -450,8 +443,7 @@ impl MutableBuffer { buffer } - /// Creates a [`MutableBuffer`] from an [`Iterator`] with a [`TrustedLen`] iterator, or errors - /// if any of the items of the iterator is an error. + /// Creates a [`MutableBuffer`] from a fallible [`TrustedLen`] iterator. #[inline] pub fn try_from_trusted_len_iter>>( iterator: I, @@ -515,7 +507,7 @@ impl FromIterator for MutableBuffer { } }; - buffer.extend_from_iter(iterator); + buffer.extend(iterator); buffer } }