-
Notifications
You must be signed in to change notification settings - Fork 821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Validate arguments to ArrayData::new and null bit buffer and buffers #810
Changes from all commits
4c5e3be
5c8da87
8acd8d2
88a41b3
1446674
4136241
92041b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -332,10 +332,11 @@ mod tests { | |
#[should_panic(expected = "BooleanArray data should contain a single buffer only \ | ||
(values buffer)")] | ||
fn test_boolean_array_invalid_buffer_len() { | ||
let data = ArrayData::builder(DataType::Boolean) | ||
.len(5) | ||
.build() | ||
.unwrap(); | ||
let data = unsafe { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Note that it might be best to eventually remove all Array specific checks (which I think will be redundant) in favor of consolidated checks in ArrayData.rs but I don't want to consider doing that until I have the validation checks completed |
||
ArrayData::builder(DataType::Boolean) | ||
.len(5) | ||
.build_unchecked() | ||
}; | ||
drop(BooleanArray::from(data)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -552,9 +552,10 @@ mod tests { | |
assert!(!list_array.is_null(i)); | ||
} | ||
|
||
// Now test with a non-zero offset | ||
// Now test with a non-zero offset (skip first element) | ||
// [[3, 4, 5], [6, 7]] | ||
let list_data = ArrayData::builder(list_data_type) | ||
.len(3) | ||
.len(2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a similar invalid test that the new checks identified -- There is a very similar (and thus fixed) test in array_list.rs and one in array_map.rs |
||
.offset(1) | ||
.add_buffer(value_offsets) | ||
.add_child_data(value_data.clone()) | ||
|
@@ -565,7 +566,7 @@ mod tests { | |
let values = list_array.values(); | ||
assert_eq!(&value_data, values.data()); | ||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(3, list_array.len()); | ||
assert_eq!(2, list_array.len()); | ||
assert_eq!(0, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offsets()[1]); | ||
assert_eq!(2, list_array.value_length(1)); | ||
|
@@ -642,8 +643,9 @@ mod tests { | |
} | ||
|
||
// Now test with a non-zero offset | ||
// [[3, 4, 5], [6, 7]] | ||
let list_data = ArrayData::builder(list_data_type) | ||
.len(3) | ||
.len(2) | ||
.offset(1) | ||
.add_buffer(value_offsets) | ||
.add_child_data(value_data.clone()) | ||
|
@@ -654,7 +656,7 @@ mod tests { | |
let values = list_array.values(); | ||
assert_eq!(&value_data, values.data()); | ||
assert_eq!(DataType::Int32, list_array.value_type()); | ||
assert_eq!(3, list_array.len()); | ||
assert_eq!(2, list_array.len()); | ||
assert_eq!(0, list_array.null_count()); | ||
assert_eq!(6, list_array.value_offsets()[1]); | ||
assert_eq!(2, list_array.value_length(1)); | ||
|
@@ -763,11 +765,12 @@ mod tests { | |
Box::new(Field::new("item", DataType::Int32, false)), | ||
3, | ||
); | ||
let list_data = ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_child_data(value_data) | ||
.build() | ||
.unwrap(); | ||
let list_data = unsafe { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test is checking the ListArray specific error, but since the new ArrayData validation checks now catch the problem we need to switch to using |
||
ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_child_data(value_data) | ||
.build_unchecked() | ||
}; | ||
drop(FixedSizeListArray::from(list_data)); | ||
} | ||
|
||
|
@@ -1038,18 +1041,20 @@ mod tests { | |
expected = "ListArray data should contain a single buffer only (value offsets)" | ||
)] | ||
fn test_list_array_invalid_buffer_len() { | ||
let value_data = ArrayData::builder(DataType::Int32) | ||
.len(8) | ||
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7])) | ||
.build() | ||
.unwrap(); | ||
let value_data = unsafe { | ||
ArrayData::builder(DataType::Int32) | ||
.len(8) | ||
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7])) | ||
.build_unchecked() | ||
}; | ||
let list_data_type = | ||
DataType::List(Box::new(Field::new("item", DataType::Int32, false))); | ||
let list_data = ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_child_data(value_data) | ||
.build() | ||
.unwrap(); | ||
let list_data = unsafe { | ||
ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_child_data(value_data) | ||
.build_unchecked() | ||
}; | ||
drop(ListArray::from(list_data)); | ||
} | ||
|
||
|
@@ -1061,11 +1066,12 @@ mod tests { | |
let value_offsets = Buffer::from_slice_ref(&[0, 2, 5, 7]); | ||
let list_data_type = | ||
DataType::List(Box::new(Field::new("item", DataType::Int32, false))); | ||
let list_data = ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_buffer(value_offsets) | ||
.build() | ||
.unwrap(); | ||
let list_data = unsafe { | ||
ArrayData::builder(list_data_type) | ||
.len(3) | ||
.add_buffer(value_offsets) | ||
.build_unchecked() | ||
}; | ||
drop(ListArray::from(list_data)); | ||
} | ||
|
||
|
@@ -1112,18 +1118,20 @@ mod tests { | |
let buf2 = buf.slice(1); | ||
|
||
let values: [i32; 8] = [0; 8]; | ||
let value_data = ArrayData::builder(DataType::Int32) | ||
.add_buffer(Buffer::from_slice_ref(&values)) | ||
.build() | ||
.unwrap(); | ||
let value_data = unsafe { | ||
ArrayData::builder(DataType::Int32) | ||
.add_buffer(Buffer::from_slice_ref(&values)) | ||
.build_unchecked() | ||
}; | ||
|
||
let list_data_type = | ||
DataType::List(Box::new(Field::new("item", DataType::Int32, false))); | ||
let list_data = ArrayData::builder(list_data_type) | ||
.add_buffer(buf2) | ||
.add_child_data(value_data) | ||
.build() | ||
.unwrap(); | ||
let list_data = unsafe { | ||
ArrayData::builder(list_data_type) | ||
.add_buffer(buf2) | ||
.add_child_data(value_data) | ||
.build_unchecked() | ||
}; | ||
drop(ListArray::from(list_data)); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -894,7 +894,7 @@ mod tests { | |
#[test] | ||
fn test_primitive_array_builder() { | ||
// Test building a primitive array with ArrayData builder and offset | ||
let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]); | ||
let buf = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4, 5, 6]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests need to be updated because they were creating invalid Buffers (this one for example had an len of 5 and offset of 2 but only passed in an array 5 long) |
||
let buf2 = buf.clone(); | ||
let data = ArrayData::builder(DataType::Int32) | ||
.len(5) | ||
|
@@ -950,7 +950,15 @@ mod tests { | |
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ | ||
(values buffer)")] | ||
fn test_primitive_array_invalid_buffer_len() { | ||
let data = ArrayData::builder(DataType::Int32).len(5).build().unwrap(); | ||
let buffer = Buffer::from_slice_ref(&[0i32, 1, 2, 3, 4]); | ||
let data = unsafe { | ||
ArrayData::builder(DataType::Int32) | ||
.add_buffer(buffer.clone()) | ||
.add_buffer(buffer) | ||
.len(5) | ||
.build_unchecked() | ||
}; | ||
|
||
drop(Int32Array::from(data)); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -137,7 +137,10 @@ impl UnionArray { | |
} | ||
} | ||
|
||
Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap)) | ||
let new_self = Self::new(type_ids, value_offsets, child_arrays, bitmap); | ||
new_self.data().validate()?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As part of #85 I will clean up the union array validation |
||
|
||
Ok(new_self) | ||
} | ||
|
||
/// Accesses the child array for `type_id`. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the array data in this case is only 4 elements long, so using an offset of 1 with len 4 is incorrect (goes off the end of the array data). The same with the test below