Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into parquet-tempfile
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Jan 15, 2022
2 parents 0a76321 + 66b84f3 commit cbe7815
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 8 deletions.
1 change: 1 addition & 0 deletions arrow/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ impl std::ops::Deref for Buffer {
}

unsafe impl Sync for Buffer {}
#[allow(clippy::non_send_fields_in_send_ty)]
unsafe impl Send for Buffer {}

impl From<MutableBuffer> for Buffer {
Expand Down
12 changes: 10 additions & 2 deletions arrow/src/compute/kernels/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,11 @@ where
#[cfg(feature = "simd")]
{
let scalar_vector = T::init(scalar);
return simd_unary_math_op(array, |x| x - scalar_vector, |x| x - scalar);
return Ok(simd_unary_math_op(
array,
|x| x - scalar_vector,
|x| x - scalar,
));
}
#[cfg(not(feature = "simd"))]
return Ok(unary(array, |value| value - scalar));
Expand Down Expand Up @@ -706,7 +710,11 @@ where
#[cfg(feature = "simd")]
{
let scalar_vector = T::init(scalar);
return simd_unary_math_op(array, |x| x * scalar_vector, |x| x * scalar);
return Ok(simd_unary_math_op(
array,
|x| x * scalar_vector,
|x| x * scalar,
));
}
#[cfg(not(feature = "simd"))]
return Ok(unary(array, |value| value * scalar));
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/util/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ fn get_data_dir(udf_env: &str, submodule_data: &str) -> Result<PathBuf, Box<dyn
"env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
HINT: try running `git submodule update --init`",
udf_env,
pb.display().to_string(),
pb.display(),
).into())
}
}
Expand Down
100 changes: 99 additions & 1 deletion parquet/src/arrow/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,18 @@ impl LevelInfo {
.as_any()
.downcast_ref::<StructArray>()
.expect("Unable to get struct array");
let struct_level = self.calculate_child_levels(
let mut struct_level = self.calculate_child_levels(
array_offsets,
array_mask,
LevelType::Struct(field.is_nullable()),
);

// If the parent field is a list, calculate the children of the struct as if it
// were a list as well.
if matches!(self.level_type, LevelType::List(_)) {
struct_level.level_type = LevelType::List(false);
}

let mut struct_levels = vec![];
struct_array
.columns()
Expand Down Expand Up @@ -1675,4 +1682,95 @@ mod tests {
};
assert_eq!(list_level, &expected_level);
}

#[test]
fn test_list_of_struct() {
// define schema
let int_field = Field::new("a", DataType::Int32, true);
let item_field =
Field::new("item", DataType::Struct(vec![int_field.clone()]), true);
let list_field = Field::new("list", DataType::List(Box::new(item_field)), true);

let int_builder = Int32Builder::new(10);
let struct_builder =
StructBuilder::new(vec![int_field], vec![Box::new(int_builder)]);
let mut list_builder = ListBuilder::new(struct_builder);

// [{a: 1}], [], null, [null, null], [{a: null}], [{a: 2}]
//
// [{a: 1}]
let values = list_builder.values();
values
.field_builder::<Int32Builder>(0)
.unwrap()
.append_value(1)
.unwrap();
values.append(true).unwrap();
list_builder.append(true).unwrap();

// []
list_builder.append(true).unwrap();

// null
list_builder.append(false).unwrap();

// [null, null]
let values = list_builder.values();
values
.field_builder::<Int32Builder>(0)
.unwrap()
.append_null()
.unwrap();
values.append(false).unwrap();
values
.field_builder::<Int32Builder>(0)
.unwrap()
.append_null()
.unwrap();
values.append(false).unwrap();
list_builder.append(true).unwrap();

// [{a: null}]
let values = list_builder.values();
values
.field_builder::<Int32Builder>(0)
.unwrap()
.append_null()
.unwrap();
values.append(true).unwrap();
list_builder.append(true).unwrap();

// [{a: 2}]
let values = list_builder.values();
values
.field_builder::<Int32Builder>(0)
.unwrap()
.append_value(2)
.unwrap();
values.append(true).unwrap();
list_builder.append(true).unwrap();

let array = Arc::new(list_builder.finish());

let schema = Arc::new(Schema::new(vec![list_field]));

let rb = RecordBatch::try_new(schema, vec![array]).unwrap();

let batch_level = LevelInfo::new(0, rb.num_rows());
let list_level =
&batch_level.calculate_array_levels(rb.column(0), rb.schema().field(0))[0];

let expected_level = LevelInfo {
definition: vec![4, 1, 0, 2, 2, 3, 4],
repetition: Some(vec![0, 0, 0, 0, 1, 0, 0]),
array_offsets: vec![0, 1, 1, 1, 3, 4, 5],
array_mask: vec![true, true, false, false, false, false, true],
max_definition: 4,
level_type: LevelType::Primitive(true),
offset: 0,
length: 5,
};

assert_eq!(list_level, &expected_level);
}
}
8 changes: 6 additions & 2 deletions parquet/src/record/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,15 +716,19 @@ impl fmt::Display for Field {
Field::Float(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{:E}", value)
} else if value.trunc() == value {
write!(f, "{}.0", value)
} else {
write!(f, "{:?}", value)
write!(f, "{}", value)
}
}
Field::Double(value) => {
if !(1e-15..=1e19).contains(&value) {
write!(f, "{:E}", value)
} else if value.trunc() == value {
write!(f, "{}.0", value)
} else {
write!(f, "{:?}", value)
write!(f, "{}", value)
}
}
Field::Decimal(ref value) => {
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/schema/printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ fn print_logical_and_converted(
None => {
// Also print converted type if it is available
match converted_type {
ConvertedType::NONE => format!(""),
ConvertedType::NONE => String::new(),
decimal @ ConvertedType::DECIMAL => {
// For decimal type we should print precision and scale if they
// are > 0, e.g. DECIMAL(9,2) -
Expand All @@ -256,7 +256,7 @@ fn print_logical_and_converted(
format!("({},{})", p, s)
}
(p, 0) if p > 0 => format!("({})", p),
_ => format!(""),
_ => String::new(),
};
format!("{}{}", decimal, precision_scale)
}
Expand Down

0 comments on commit cbe7815

Please sign in to comment.