Skip to content

Commit

Permalink
Validate arguments to ArrayData::new: null bit buffer and buffers
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Nov 2, 2021
1 parent 1d3d5e3 commit 4c5e3be
Show file tree
Hide file tree
Showing 11 changed files with 994 additions and 98 deletions.
41 changes: 28 additions & 13 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,18 @@ mod tests {
assert!(binary_array.is_valid(i));
assert!(!binary_array.is_null(i));
}
}

#[test]
fn test_binary_array_with_offsets() {
let values: [u8; 12] = [
b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
];
let offsets: [i32; 4] = [0, 5, 5, 12];

// Test binary array with offset
let array_data = ArrayData::builder(DataType::Binary)
.len(4)
.len(2)
.offset(1)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -947,10 +955,18 @@ mod tests {
assert!(binary_array.is_valid(i));
assert!(!binary_array.is_null(i));
}
}

#[test]
fn test_large_binary_array_with_offsets() {
let values: [u8; 12] = [
b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't',
];
let offsets: [i64; 4] = [0, 5, 5, 12];

// Test binary array with offset
let array_data = ArrayData::builder(DataType::LargeBinary)
.len(4)
.len(2)
.offset(1)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_buffer(Buffer::from_slice_ref(&values))
Expand Down Expand Up @@ -1196,26 +1212,25 @@ mod tests {

#[test]
#[should_panic(
expected = "FixedSizeBinaryArray can only be created from list array of u8 values \
(i.e. FixedSizeList<PrimitiveArray<u8>>)."
expected = "FixedSizeBinaryArray can only be created from FixedSizeList<u8> arrays"
)]
fn test_fixed_size_binary_array_from_incorrect_list_array() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from_slice_ref(&values))
.add_child_data(ArrayData::builder(DataType::Boolean).build().unwrap())
.build()
.unwrap();

let array_data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Binary, false)),
4,
))
.len(3)
.add_child_data(values_data)
.build()
.unwrap();
let array_data = unsafe {
ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Binary, false)),
4,
))
.len(3)
.add_child_data(values_data)
.build_unchecked()
};
let list_array = FixedSizeListArray::from(array_data);
drop(FixedSizeBinaryArray::from(list_array));
}
Expand Down
9 changes: 5 additions & 4 deletions arrow/src/array/array_boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,11 @@ mod tests {
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
(values buffer)")]
fn test_boolean_array_invalid_buffer_len() {
let data = ArrayData::builder(DataType::Boolean)
.len(5)
.build()
.unwrap();
let data = unsafe {
ArrayData::builder(DataType::Boolean)
.len(5)
.build_unchecked()
};
drop(BooleanArray::from(data));
}
}
76 changes: 42 additions & 34 deletions arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,9 +552,10 @@ mod tests {
assert!(!list_array.is_null(i));
}

// Now test with a non-zero offset
// Now test with a non-zero offset (skip first element)
// [[3, 4, 5], [6, 7]]
let list_data = ArrayData::builder(list_data_type)
.len(3)
.len(2)
.offset(1)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
Expand All @@ -565,7 +566,7 @@ mod tests {
let values = list_array.values();
assert_eq!(&value_data, values.data());
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(3, list_array.len());
assert_eq!(2, list_array.len());
assert_eq!(0, list_array.null_count());
assert_eq!(6, list_array.value_offsets()[1]);
assert_eq!(2, list_array.value_length(1));
Expand Down Expand Up @@ -642,8 +643,9 @@ mod tests {
}

// Now test with a non-zero offset
// [[3, 4, 5], [6, 7]]
let list_data = ArrayData::builder(list_data_type)
.len(3)
.len(2)
.offset(1)
.add_buffer(value_offsets)
.add_child_data(value_data.clone())
Expand All @@ -654,7 +656,7 @@ mod tests {
let values = list_array.values();
assert_eq!(&value_data, values.data());
assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(3, list_array.len());
assert_eq!(2, list_array.len());
assert_eq!(0, list_array.null_count());
assert_eq!(6, list_array.value_offsets()[1]);
assert_eq!(2, list_array.value_length(1));
Expand Down Expand Up @@ -763,11 +765,12 @@ mod tests {
Box::new(Field::new("item", DataType::Int32, false)),
3,
);
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
.build()
.unwrap();
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
.build_unchecked()
};
drop(FixedSizeListArray::from(list_data));
}

Expand Down Expand Up @@ -1038,18 +1041,20 @@ mod tests {
expected = "ListArray data should contain a single buffer only (value offsets)"
)]
fn test_list_array_invalid_buffer_len() {
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build()
.unwrap();
let value_data = unsafe {
ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7]))
.build_unchecked()
};
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
.build()
.unwrap();
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
.build_unchecked()
};
drop(ListArray::from(list_data));
}

Expand All @@ -1061,11 +1066,12 @@ mod tests {
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]);
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.build()
.unwrap();
let list_data = unsafe {
ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
.build_unchecked()
};
drop(ListArray::from(list_data));
}

Expand Down Expand Up @@ -1112,18 +1118,20 @@ mod tests {
let buf2 = buf.slice(1);

let values: [i32; 8] = [0; 8];
let value_data = ArrayData::builder(DataType::Int32)
.add_buffer(Buffer::from_slice_ref(&values))
.build()
.unwrap();
let value_data = unsafe {
ArrayData::builder(DataType::Int32)
.add_buffer(Buffer::from_slice_ref(&values))
.build_unchecked()
};

let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.add_buffer(buf2)
.add_child_data(value_data)
.build()
.unwrap();
let list_data = unsafe {
ArrayData::builder(list_data_type)
.add_buffer(buf2)
.add_child_data(value_data)
.build_unchecked()
};
drop(ListArray::from(list_data));
}

Expand Down
4 changes: 2 additions & 2 deletions arrow/src/array/array_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ mod tests {

// Now test with a non-zero offset
let map_data = ArrayData::builder(map_array.data_type().clone())
.len(3)
.len(2)
.offset(1)
.add_buffer(map_array.data().buffers()[0].clone())
.add_child_data(map_array.data().child_data()[0].clone())
Expand All @@ -331,7 +331,7 @@ mod tests {
let values = map_array.values();
assert_eq!(&value_data, values.data());
assert_eq!(DataType::UInt32, map_array.value_type());
assert_eq!(3, map_array.len());
assert_eq!(2, map_array.len());
assert_eq!(0, map_array.null_count());
assert_eq!(6, map_array.value_offsets()[1]);
assert_eq!(2, map_array.value_length(1));
Expand Down
12 changes: 10 additions & 2 deletions arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ mod tests {
#[test]
fn test_primitive_array_builder() {
// Test building a primitive array with ArrayData builder and offset
let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
let buf = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4, 5, 6]);
let buf2 = buf.clone();
let data = ArrayData::builder(DataType::Int32)
.len(5)
Expand Down Expand Up @@ -950,7 +950,15 @@ mod tests {
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
fn test_primitive_array_invalid_buffer_len() {
let data = ArrayData::builder(DataType::Int32).len(5).build().unwrap();
let buffer = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4]);
let data = unsafe {
ArrayData::builder(DataType::Int32)
.add_buffer(buffer.clone())
.add_buffer(buffer)
.len(5)
.build_unchecked()
};

drop(Int32Array::from(data));
}

Expand Down
5 changes: 4 additions & 1 deletion arrow/src/array/array_union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ impl UnionArray {
}
}

Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap))
let new_self = Self::new(type_ids, value_offsets, child_arrays, bitmap);
new_self.data().validate()?;

Ok(new_self)
}

/// Accesses the child array for `type_id`.
Expand Down
Loading

0 comments on commit 4c5e3be

Please sign in to comment.