diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index 966b55b2f42f5..b4de40f3ee3b5 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -2821,7 +2821,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_primitive_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; + let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build(); Int32Array::from(array_data); @@ -2831,7 +2831,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_list_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; + let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); let values: [i32; 8] = [0; 8]; @@ -2851,7 +2851,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_binary_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; + let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) }; let buf2 = buf.slice(1); let values: [u8; 12] = [0; 12]; diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 9c8cf62bc8b2c..c0d1044ef60ee 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -986,7 +986,7 @@ mod tests { assert_eq!(0, b.len()); assert_eq!(16, b.capacity()); let a = b.finish(); - assert_eq!(64, a.len()); + assert_eq!(0, a.len()); } #[test] @@ -994,7 +994,7 @@ mod tests { let mut b = Int32BufferBuilder::new(0); b.append(123).unwrap(); let a = b.finish(); - assert_eq!(64, a.len()); + assert_eq!(4, a.len()); } #[test] @@ -1005,7 +1005,7 @@ mod tests { } assert_eq!(16, b.capacity()); let a = b.finish(); - assert_eq!(64, a.len()); // 16 * size_of(i32) + assert_eq!(20, a.len()); } #[test] @@ -1017,7 +1017,7 @@ mod tests { } assert_eq!(32, b.capacity()); let a = b.finish(); - assert_eq!(128, a.len()); // 32 * size_of(i32) + assert_eq!(80, a.len()); } #[test] @@ -1028,7 +1028,7 @@ mod tests { b.append(i).unwrap(); } let mut a = b.finish(); - assert_eq!(64, a.len()); + assert_eq!(40, a.len()); assert_eq!(0, b.len()); assert_eq!(0, b.capacity()); @@ -1038,7 +1038,7 @@ mod tests { } assert_eq!(32, b.capacity()); a = b.finish(); - assert_eq!(128, a.len()); + assert_eq!(80, a.len()); } #[test] @@ -1064,12 +1064,12 @@ mod tests { b.append_slice("Hello, ".as_bytes()).unwrap(); b.append_slice("World!".as_bytes()).unwrap(); let buffer = b.finish(); - assert_eq!(64, buffer.len()); + assert_eq!(13, buffer.len()); let mut b = Int32BufferBuilder::new(0); b.append_slice(&[32, 54]).unwrap(); let buffer = b.finish(); - assert_eq!(64, buffer.len()); + assert_eq!(8, buffer.len()); } #[test] @@ -1082,14 +1082,14 @@ mod tests { assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); - assert_eq!(64, buffer.len()); + assert_eq!(1, buffer.len()); let mut b = BooleanBufferBuilder::new(4); b.append_slice(&[false, true, false, true]).unwrap(); assert_eq!(4, b.len()); assert_eq!(512, b.capacity()); let buffer = b.finish(); - assert_eq!(64, buffer.len()); + assert_eq!(1, buffer.len()); } #[test] @@ -1100,7 +1100,7 @@ mod tests { assert_eq!(4, b.len()); assert_eq!(16, b.capacity()); let buffer = b.finish(); - assert_eq!(64, buffer.len()); + assert_eq!(16, buffer.len()); } #[test] diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index 24429cacec5bf..876c2d554f5f7 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -280,7 +280,7 @@ mod tests { assert_eq!(10, arr_data.null_count()); assert_eq!(5, arr_data.offset()); assert_eq!(1, arr_data.buffers().len()); - assert_eq!([0, 1, 2, 3], arr_data.buffers()[0].data()[..4]); + assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].data()); assert_eq!(1, arr_data.child_data().len()); assert_eq!(child_arr_data, arr_data.child_data()[0]); } diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 58412f84a129c..f9e7f27d7ee31 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -53,11 +53,17 @@ struct BufferData { /// The raw pointer into the buffer bytes ptr: *const u8, - /// The length (num of bytes) of the buffer + /// The length (num of bytes) of the buffer. The region `[0, len)` of the buffer + /// is occupied with meaningful data, while the rest `[len, capacity)` is the + /// unoccupied region. len: usize, /// Whether this piece of memory is owned by this object owned: bool, + + /// The capacity (num of bytes) of the buffer + /// Invariant: len <= capacity + capacity: usize, } impl PartialEq for BufferData { @@ -65,6 +71,10 @@ impl PartialEq for BufferData { if self.len != other.len { return false; } + if self.capacity != other.capacity { + return false; + } + unsafe { memory::memcmp(self.ptr, other.ptr, self.len) == 0 } } } @@ -73,7 +83,7 @@ impl PartialEq for BufferData { impl Drop for BufferData { fn drop(&mut self) { if !self.ptr.is_null() && self.owned { - memory::free_aligned(self.ptr as *mut u8, self.len); + memory::free_aligned(self.ptr as *mut u8, self.capacity); } } } @@ -82,8 +92,8 @@ impl Debug for BufferData { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!( f, - "BufferData {{ ptr: {:?}, len: {}, data: ", - self.ptr, self.len + "BufferData {{ ptr: {:?}, len: {}, capacity: {}, data: ", + self.ptr, self.len, self.capacity )?; unsafe { @@ -104,15 +114,17 @@ impl Buffer { /// /// * `ptr` - Pointer to raw parts /// * `len` - Length of raw parts in **bytes** + /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes** /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. - pub unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self { - Buffer::build_with_arguments(ptr, len, true) + pub unsafe fn from_raw_parts(ptr: *const u8, len: usize, capacity: usize) -> Self { + Buffer::build_with_arguments(ptr, len, capacity, true) } + /// Creates a buffer from an existing memory region (must already be byte-aligned), this /// `Buffer` **does not** free this piece of memory when dropped. /// @@ -120,13 +132,14 @@ impl Buffer { /// /// * `ptr` - Pointer to raw parts /// * `len` - Length of raw parts in **bytes** + /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes** /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. - pub unsafe fn from_unowned(ptr: *const u8, len: usize) -> Self { - Buffer::build_with_arguments(ptr, len, false) + pub unsafe fn from_unowned(ptr: *const u8, len: usize, capacity: usize) -> Self { + Buffer::build_with_arguments(ptr, len, capacity, false) } /// Creates a buffer from an existing memory region (must already be byte-aligned). @@ -135,6 +148,7 @@ impl Buffer { /// /// * `ptr` - Pointer to raw parts /// * `len` - Length of raw parts in bytes + /// * `capacity` - Total allocated memory for the pointer `ptr`, in **bytes** /// * `owned` - Whether the raw parts is owned by this `Buffer`. If true, this `Buffer` will /// free this memory when dropped, otherwise it will skip freeing the raw parts. /// @@ -142,12 +156,14 @@ impl Buffer { /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. - unsafe fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self { + unsafe fn build_with_arguments(ptr: *const u8, len: usize, capacity: usize, owned: bool) -> Self { + /// Creates a buffer from an existing memory region (must already be byte-aligned) + pub fn from_raw_parts(ptr: *const u8, len: usize, capacity: usize) -> Self { assert!( memory::is_aligned(ptr, memory::ALIGNMENT), "memory not aligned" ); - let buf_data = BufferData { ptr, len, owned }; + let buf_data = BufferData { ptr, len, capacity, owned }; Buffer { data: Arc::new(buf_data), offset: 0, @@ -159,6 +175,11 @@ impl Buffer { self.data.len - self.offset } + /// Returns the capacity of this buffer + pub fn capacity(&self) -> usize { + self.data.capacity + } + /// Returns whether the buffer is empty. pub fn is_empty(&self) -> bool { self.data.len - self.offset == 0 @@ -210,7 +231,7 @@ impl Buffer { /// Returns an empty buffer. pub fn empty() -> Self { - unsafe { Self::from_raw_parts(::std::ptr::null(), 0) } + unsafe { Self::from_raw_parts(::std::ptr::null(), 0, 0) } } } @@ -234,9 +255,9 @@ impl> From for Buffer { let buffer = memory::allocate_aligned(capacity); unsafe { memory::memcpy(buffer, slice.as_ptr(), len); - Buffer::from_raw_parts(buffer, capacity) + Buffer::from_raw_parts(buffer, len, capacity) } - } + } } /// Helper function for SIMD `BitAnd` and `BitOr` implementations @@ -503,7 +524,8 @@ impl MutableBuffer { pub fn freeze(self) -> Buffer { let buffer_data = BufferData { ptr: self.data, - len: self.capacity, + len: self.len, + capacity: self.capacity, owned: true, }; std::mem::forget(self); @@ -527,6 +549,9 @@ impl PartialEq for MutableBuffer { if self.len != other.len { return false; } + if self.capacity != other.capacity { + return false; + } unsafe { memory::memcmp(self.data, other.data, self.len) == 0 } } } @@ -584,32 +609,34 @@ mod tests { #[test] fn test_from_raw_parts() { - let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0) }; + let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0, 0) }; assert_eq!(0, buf.len()); assert_eq!(0, buf.data().len()); + assert_eq!(0, buf.capacity()); assert!(buf.raw_data().is_null()); let buf = Buffer::from(&[0, 1, 2, 3, 4]); - assert_eq!(64, buf.len()); + assert_eq!(5, buf.len()); assert!(!buf.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf.data()[..5]); + assert_eq!([0, 1, 2, 3, 4], buf.data()); } #[test] fn test_from_vec() { let buf = Buffer::from(&[0, 1, 2, 3, 4]); - assert_eq!(64, buf.len()); + assert_eq!(5, buf.len()); assert!(!buf.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf.data()[..5]); + assert_eq!([0, 1, 2, 3, 4], buf.data()); } #[test] fn test_copy() { let buf = Buffer::from(&[0, 1, 2, 3, 4]); let buf2 = buf.clone(); - assert_eq!(64, buf2.len()); + assert_eq!(5, buf2.len()); + assert_eq!(64, buf2.capacity()); assert!(!buf2.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf2.data()[..5]); + assert_eq!([0, 1, 2, 3, 4], buf2.data()); } #[test] @@ -617,17 +644,20 @@ mod tests { let buf = Buffer::from(&[2, 4, 6, 8, 10]); let buf2 = buf.slice(2); - assert_eq!([6, 8, 10], buf2.data()[0..3]); - assert_eq!(62, buf2.len()); + assert_eq!([6, 8, 10], buf2.data()); + assert_eq!(3, buf2.len()); assert_eq!(unsafe { buf.raw_data().offset(2) }, buf2.raw_data()); let buf3 = buf2.slice(1); - assert_eq!([8, 10], buf3.data()[..2]); - assert_eq!(61, buf3.len()); + assert_eq!([8, 10], buf3.data()); + assert_eq!(2, buf3.len()); assert_eq!(unsafe { buf.raw_data().offset(3) }, buf3.raw_data()); let buf4 = buf.slice(5); - assert_eq!(59, buf4.len()); + let empty_slice: [u8; 0] = []; + assert_eq!(empty_slice, buf4.data()); + assert_eq!(0, buf4.len()); + assert!(buf4.is_empty()); } #[test] @@ -636,7 +666,7 @@ mod tests { )] fn test_slice_offset_out_of_bound() { let buf = Buffer::from(&[2, 4, 6, 8, 10]); - buf.slice(65); + buf.slice(6); } #[test] @@ -680,15 +710,14 @@ mod tests { #[test] fn test_bitwise_not() { let buf = Buffer::from([0b01101010]); - assert_eq!(Buffer::from([0b10010101]).data()[..1], (!&buf).data()[..1]); + assert_eq!(Buffer::from([0b10010101]), !&buf); } #[test] #[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")] fn test_buffer_bitand_different_sizes() { - let size = memory::ALIGNMENT * 2; - let buf1 = Buffer::from_raw_parts(memory::allocate_aligned(size), size); - let buf2 = Buffer::from([0x00]); + let buf1 = Buffer::from([1_u8, 1_u8]); + let buf2 = Buffer::from([0b01001110]); let _buf3 = (&buf1 | &buf2).unwrap(); } @@ -780,18 +809,34 @@ mod tests { assert_eq!("aaaa bbbb cccc dddd".as_bytes(), buf.data()); let immutable_buf = buf.freeze(); - assert_eq!(64, immutable_buf.len()); - assert_eq!( - "aaaa bbbb cccc dddd".as_bytes(), - &immutable_buf.data()[..19] - ); + assert_eq!(19, immutable_buf.len()); + assert_eq!(64, immutable_buf.capacity()); + assert_eq!("aaaa bbbb cccc dddd".as_bytes(), immutable_buf.data()); + } + + #[test] + fn test_mutable_equal() -> Result<()> { + let mut buf = MutableBuffer::new(1); + let mut buf2 = MutableBuffer::new(1); + + buf.write(&[0xaa])?; + buf2.write(&[0xaa, 0xbb])?; + assert!(buf != buf2); + + buf.write(&[0xbb])?; + assert_eq!(buf, buf2); + + buf2.reserve(65)?; + assert!(buf != buf2); + + Ok(()) } #[test] fn test_access_concurrently() { let buffer = Buffer::from(vec![1, 2, 3, 4, 5]); let buffer2 = buffer.clone(); - assert_eq!([1, 2, 3, 4, 5], buffer.data()[..5]); + assert_eq!([1, 2, 3, 4, 5], buffer.data()); let buffer_copy = thread::spawn(move || { // access buffer in another thread. diff --git a/rust/arrow/src/tensor.rs b/rust/arrow/src/tensor.rs index 426a8ba046da2..a37062c4a8b62 100644 --- a/rust/arrow/src/tensor.rs +++ b/rust/arrow/src/tensor.rs @@ -86,6 +86,11 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> { ) -> Self { match &shape { None => { + assert_eq!( + buffer.len(), + mem::size_of::(), + "underlying buffer should only contain a single tensor element" + ); assert_eq!(None, strides); assert_eq!(None, names); }