From e9be49d962560ce5b87544a2933d8b207322cf60 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Wed, 1 Dec 2021 09:33:14 -0500 Subject: [PATCH 1/2] Change `pretty_format_batches` to return `Result` (#975) * Create write_batches function * Update pretty_format_batches and pretty_format_columns to return Display * fix import warning * Fix compile error Co-authored-by: Andrew Lamb --- arrow/src/util/pretty.rs | 79 +++++++++++++++++++++++++++++++++------- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index 8e9cc5feafa2..ac7caa8ac39a 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -19,6 +19,7 @@ //! available unless `feature = "prettyprint"` is enabled. use crate::{array::ArrayRef, record_batch::RecordBatch}; +use std::fmt::Display; use comfy_table::{Cell, Table}; @@ -27,13 +28,16 @@ use crate::error::Result; use super::display::array_value_to_string; ///! Create a visual representation of record batches -pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { - Ok(create_table(results)?.to_string()) +pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { + create_table(results) } ///! Create a visual representation of columns -pub fn pretty_format_columns(col_name: &str, results: &[ArrayRef]) -> Result { - Ok(create_column(col_name, results)?.to_string()) +pub fn pretty_format_columns( + col_name: &str, + results: &[ArrayRef], +) -> Result { + create_column(col_name, results) } ///! Prints a visual representation of record batches to stdout @@ -115,6 +119,7 @@ mod tests { use super::*; use crate::array::{DecimalBuilder, FixedSizeListBuilder, Int32Array}; + use std::fmt::Write; use std::sync::Arc; #[test] @@ -144,7 +149,7 @@ mod tests { ], )?; - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ "+---+-----+", @@ -176,7 +181,7 @@ mod tests { Arc::new(array::StringArray::from(vec![Some("e"), None, Some("g")])), ]; - let table = pretty_format_columns("a", &columns)?; + let table = pretty_format_columns("a", &columns)?.to_string(); let expected = vec![ "+---+", "| a |", "+---+", "| a |", "| b |", "| |", "| d |", "| e |", @@ -208,7 +213,7 @@ mod tests { // define data (null) let batch = RecordBatch::try_new(schema, arrays).unwrap(); - let table = pretty_format_batches(&[batch]).unwrap(); + let table = pretty_format_batches(&[batch]).unwrap().to_string(); let expected = vec![ "+---+---+---+", @@ -244,7 +249,7 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![array])?; - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ "+-------+", @@ -285,7 +290,7 @@ mod tests { let array = Arc::new(builder.finish()); let batch = RecordBatch::try_new(schema, vec![array])?; - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ "+-----------+", "| d1 |", @@ -320,7 +325,9 @@ mod tests { )])); let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap(); - let table = pretty_format_batches(&[batch]).expect("formatting batches"); + let table = pretty_format_batches(&[batch]) + .expect("formatting batches") + .to_string(); let expected = $EXPECTED_RESULT; let actual: Vec<&str> = table.lines().collect(); @@ -494,7 +501,7 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![dm])?; - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ "+-------+", @@ -535,7 +542,7 @@ mod tests { let batch = RecordBatch::try_new(schema, vec![dm])?; - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ "+------+", "| f |", "+------+", "| 101 |", "| |", "| 200 |", "| 3040 |", "+------+", @@ -589,7 +596,7 @@ mod tests { RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]) .unwrap(); - let table = pretty_format_batches(&[batch])?; + let table = pretty_format_batches(&[batch])?.to_string(); let expected = vec![ r#"+-------------------------------------+----+"#, r#"| c1 | c2 |"#, @@ -605,4 +612,50 @@ mod tests { Ok(()) } + + #[test] + fn test_writing_formatted_batches() -> Result<()> { + // define a schema. + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Utf8, true), + Field::new("b", DataType::Int32, true), + ])); + + // define data. + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(array::StringArray::from(vec![ + Some("a"), + Some("b"), + None, + Some("d"), + ])), + Arc::new(array::Int32Array::from(vec![ + Some(1), + None, + Some(10), + Some(100), + ])), + ], + )?; + + let mut buf = String::new(); + write!(&mut buf, "{}", pretty_format_batches(&[batch])?.to_string()).unwrap(); + + let s = vec![ + "+---+-----+", + "| a | b |", + "+---+-----+", + "| a | 1 |", + "| b | |", + "| | 10 |", + "| d | 100 |", + "+---+-----+", + ]; + let expected = String::from(s.join("\n")); + assert_eq!(expected, buf); + + Ok(()) + } } From 9fb2a5fab32d50feacc1f600b45606ba402017df Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" <193874+carols10cents@users.noreply.github.com> Date: Fri, 3 Dec 2021 10:14:32 -0500 Subject: [PATCH 2/2] Fix warnings introduced by Rust/Clippy 1.57.0 (#992) * Remove needless borrows identified by clippy https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow * Remove muts that are no longer needed * Derive Default instead of using an equivalent manual impl Identified by clippy. https://rust-lang.github.io/rust-clippy/master/index.html#derivable_impls * Remove redundant closures Identified by clippy. https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure * Allow dead code on a field Rust now identifies as never read --- arrow/src/array/equal/utils.rs | 12 ++++++------ arrow/src/array/transform/boolean.rs | 2 +- arrow/src/compute/kernels/take.rs | 10 +--------- arrow/src/compute/util.rs | 4 ++-- arrow/src/datatypes/field.rs | 2 +- arrow/src/datatypes/schema.rs | 5 +---- arrow/src/ipc/writer.rs | 16 ++++++++-------- .../flight_server_scenarios/auth_basic_proto.rs | 1 + parquet/src/arrow/arrow_writer.rs | 16 ++++++++-------- parquet/src/record/reader.rs | 14 +++++++------- parquet/src/util/bit_util.rs | 4 ++-- parquet_derive/src/parquet_field.rs | 15 ++++++--------- 12 files changed, 44 insertions(+), 57 deletions(-) diff --git a/arrow/src/array/equal/utils.rs b/arrow/src/array/equal/utils.rs index 8eb988cb9a98..7ce8e14993e7 100644 --- a/arrow/src/array/equal/utils.rs +++ b/arrow/src/array/equal/utils.rs @@ -121,14 +121,14 @@ pub(super) fn child_logical_null_buffer( let array_offset = parent_data.offset(); let bitmap_len = bit_util::ceil(parent_len * len, 8); let mut buffer = MutableBuffer::from_len_zeroed(bitmap_len); - let mut null_slice = buffer.as_slice_mut(); + let null_slice = buffer.as_slice_mut(); (array_offset..parent_len + array_offset).for_each(|index| { let start = index * len; let end = start + len; let mask = parent_bitmap.is_set(index); (start..end).for_each(|child_index| { if mask && self_null_bitmap.is_set(child_index) { - bit_util::set_bit(&mut null_slice, child_index); + bit_util::set_bit(null_slice, child_index); } }); }); @@ -151,12 +151,12 @@ pub(super) fn child_logical_null_buffer( // slow path let array_offset = parent_data.offset(); let mut buffer = MutableBuffer::new_null(parent_len); - let mut null_slice = buffer.as_slice_mut(); + let null_slice = buffer.as_slice_mut(); (0..parent_len).for_each(|index| { if parent_bitmap.is_set(index + array_offset) && self_null_bitmap.is_set(index + array_offset) { - bit_util::set_bit(&mut null_slice, index); + bit_util::set_bit(null_slice, index); } }); Some(buffer.into()) @@ -182,7 +182,7 @@ fn logical_list_bitmap( let offset_start = offsets.first().unwrap().to_usize().unwrap(); let offset_len = offsets.get(parent_data.len()).unwrap().to_usize().unwrap(); let mut buffer = MutableBuffer::new_null(offset_len - offset_start); - let mut null_slice = buffer.as_slice_mut(); + let null_slice = buffer.as_slice_mut(); offsets .windows(2) @@ -194,7 +194,7 @@ fn logical_list_bitmap( let mask = parent_bitmap.is_set(index); (start..end).for_each(|child_index| { if mask && child_bitmap.is_set(child_index) { - bit_util::set_bit(&mut null_slice, child_index - offset_start); + bit_util::set_bit(null_slice, child_index - offset_start); } }); }); diff --git a/arrow/src/array/transform/boolean.rs b/arrow/src/array/transform/boolean.rs index 182914971732..456fb6e5e14e 100644 --- a/arrow/src/array/transform/boolean.rs +++ b/arrow/src/array/transform/boolean.rs @@ -29,7 +29,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { let buffer = &mut mutable.buffer1; resize_for_bits(buffer, mutable.len + len); set_bits( - &mut buffer.as_slice_mut(), + buffer.as_slice_mut(), values, mutable.len, array.offset() + start, diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 8d9b4cb3cf07..9fe00ea9a7b9 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -296,7 +296,7 @@ where } /// Options that define how `take` should behave -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct TakeOptions { /// Perform bounds check before taking indices from values. /// If enabled, an `ArrowError` is returned if the indices are out of bounds. @@ -304,14 +304,6 @@ pub struct TakeOptions { pub check_bounds: bool, } -impl Default for TakeOptions { - fn default() -> Self { - Self { - check_bounds: false, - } - } -} - #[inline(always)] fn maybe_usize(index: I) -> Result { index diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs index e4808e25199c..6778be3f50a9 100644 --- a/arrow/src/compute/util.rs +++ b/arrow/src/compute/util.rs @@ -301,7 +301,7 @@ pub(super) mod tests { values.append(&mut array); } else { list_null_count += 1; - bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx); + bit_util::unset_bit(list_bitmap.as_slice_mut(), idx); } offset.push(values.len() as i64); } @@ -386,7 +386,7 @@ pub(super) mod tests { values.extend(items.into_iter()); } else { list_null_count += 1; - bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx); + bit_util::unset_bit(list_bitmap.as_slice_mut(), idx); values.extend(vec![None; length as usize].into_iter()); } } diff --git a/arrow/src/datatypes/field.rs b/arrow/src/datatypes/field.rs index 4ed06610e0dc..22e23faa63fd 100644 --- a/arrow/src/datatypes/field.rs +++ b/arrow/src/datatypes/field.rs @@ -286,7 +286,7 @@ impl Field { DataType::Struct(mut fields) => match map.get("children") { Some(Value::Array(values)) => { let struct_fields: Result> = - values.iter().map(|v| Field::from(v)).collect(); + values.iter().map(Field::from).collect(); fields.append(&mut struct_fields?); DataType::Struct(fields) } diff --git a/arrow/src/datatypes/schema.rs b/arrow/src/datatypes/schema.rs index 83bdf7d6e265..22b1ceb39a76 100644 --- a/arrow/src/datatypes/schema.rs +++ b/arrow/src/datatypes/schema.rs @@ -227,10 +227,7 @@ impl Schema { match *json { Value::Object(ref schema) => { let fields = if let Some(Value::Array(fields)) = schema.get("fields") { - fields - .iter() - .map(|f| Field::from(f)) - .collect::>()? + fields.iter().map(Field::from).collect::>()? } else { return Err(ArrowError::ParseError( "Schema fields should be an array".to_string(), diff --git a/arrow/src/ipc/writer.rs b/arrow/src/ipc/writer.rs index 853fc0fc970b..c354eb4890a2 100644 --- a/arrow/src/ipc/writer.rs +++ b/arrow/src/ipc/writer.rs @@ -752,9 +752,9 @@ fn write_continuation( /// Write array data to a vector of bytes fn write_array_data( array_data: &ArrayData, - mut buffers: &mut Vec, - mut arrow_data: &mut Vec, - mut nodes: &mut Vec, + buffers: &mut Vec, + arrow_data: &mut Vec, + nodes: &mut Vec, offset: i64, num_rows: usize, null_count: usize, @@ -775,11 +775,11 @@ fn write_array_data( Some(buffer) => buffer.clone(), }; - offset = write_buffer(&null_buffer, &mut buffers, &mut arrow_data, offset); + offset = write_buffer(&null_buffer, buffers, arrow_data, offset); } array_data.buffers().iter().for_each(|buffer| { - offset = write_buffer(buffer, &mut buffers, &mut arrow_data, offset); + offset = write_buffer(buffer, buffers, arrow_data, offset); }); if !matches!(array_data.data_type(), DataType::Dictionary(_, _)) { @@ -788,9 +788,9 @@ fn write_array_data( // write the nested data (e.g list data) offset = write_array_data( data_ref, - &mut buffers, - &mut arrow_data, - &mut nodes, + buffers, + arrow_data, + nodes, offset, data_ref.len(), data_ref.null_count(), diff --git a/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs b/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs index b6c11dc8e1a5..5607b46a9986 100644 --- a/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs +++ b/integration-testing/src/flight_server_scenarios/auth_basic_proto.rs @@ -58,6 +58,7 @@ pub async fn scenario_setup(port: &str) -> Result { pub struct AuthBasicProtoScenarioImpl { username: Arc, password: Arc, + #[allow(dead_code)] peer_identity: Arc>>, } diff --git a/parquet/src/arrow/arrow_writer.rs b/parquet/src/arrow/arrow_writer.rs index 8600eb0b5101..643f5a29f488 100644 --- a/parquet/src/arrow/arrow_writer.rs +++ b/parquet/src/arrow/arrow_writer.rs @@ -143,9 +143,9 @@ fn get_col_writer( #[allow(clippy::borrowed_box)] fn write_leaves( - mut row_group_writer: &mut Box, + row_group_writer: &mut Box, array: &arrow_array::ArrayRef, - mut levels: &mut Vec, + levels: &mut Vec, ) -> Result<()> { match array.data_type() { ArrowDataType::Null @@ -173,7 +173,7 @@ fn write_leaves( | ArrowDataType::LargeUtf8 | ArrowDataType::Decimal(_, _) | ArrowDataType::FixedSizeBinary(_) => { - let mut col_writer = get_col_writer(&mut row_group_writer)?; + let mut col_writer = get_col_writer(row_group_writer)?; write_leaf( &mut col_writer, array, @@ -186,7 +186,7 @@ fn write_leaves( // write the child list let data = array.data(); let child_array = arrow_array::make_array(data.child_data()[0].clone()); - write_leaves(&mut row_group_writer, &child_array, &mut levels)?; + write_leaves(row_group_writer, &child_array, levels)?; Ok(()) } ArrowDataType::Struct(_) => { @@ -195,7 +195,7 @@ fn write_leaves( .downcast_ref::() .expect("Unable to get struct array"); for field in struct_array.columns() { - write_leaves(&mut row_group_writer, field, &mut levels)?; + write_leaves(row_group_writer, field, levels)?; } Ok(()) } @@ -204,15 +204,15 @@ fn write_leaves( .as_any() .downcast_ref::() .expect("Unable to get map array"); - write_leaves(&mut row_group_writer, &map_array.keys(), &mut levels)?; - write_leaves(&mut row_group_writer, &map_array.values(), &mut levels)?; + write_leaves(row_group_writer, &map_array.keys(), levels)?; + write_leaves(row_group_writer, &map_array.values(), levels)?; Ok(()) } ArrowDataType::Dictionary(_, value_type) => { // cast dictionary to a primitive let array = arrow::compute::cast(array, value_type)?; - let mut col_writer = get_col_writer(&mut row_group_writer)?; + let mut col_writer = get_col_writer(row_group_writer)?; write_leaf( &mut col_writer, &array, diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs index c45e09738b07..475da44b3acc 100644 --- a/parquet/src/record/reader.rs +++ b/parquet/src/record/reader.rs @@ -106,7 +106,7 @@ impl TreeBuilder { fn reader_tree( &self, field: TypePtr, - mut path: &mut Vec, + path: &mut Vec, mut curr_def_level: i16, mut curr_rep_level: i16, paths: &HashMap, @@ -160,7 +160,7 @@ impl TreeBuilder { // Support for backward compatible lists let reader = self.reader_tree( repeated_field, - &mut path, + path, curr_def_level, curr_rep_level, paths, @@ -180,7 +180,7 @@ impl TreeBuilder { let reader = self.reader_tree( child_field, - &mut path, + path, curr_def_level + 1, curr_rep_level + 1, paths, @@ -235,7 +235,7 @@ impl TreeBuilder { ); let key_reader = self.reader_tree( key_type.clone(), - &mut path, + path, curr_def_level + 1, curr_rep_level + 1, paths, @@ -245,7 +245,7 @@ impl TreeBuilder { let value_type = &key_value_type.get_fields()[1]; let value_reader = self.reader_tree( value_type.clone(), - &mut path, + path, curr_def_level + 1, curr_rep_level + 1, paths, @@ -278,7 +278,7 @@ impl TreeBuilder { let reader = self.reader_tree( Arc::new(required_field), - &mut path, + path, curr_def_level, curr_rep_level, paths, @@ -298,7 +298,7 @@ impl TreeBuilder { for child in field.get_fields() { let reader = self.reader_tree( child.clone(), - &mut path, + path, curr_def_level, curr_rep_level, paths, diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs index fa8b9ddac1ea..162cfd8b5813 100644 --- a/parquet/src/util/bit_util.rs +++ b/parquet/src/util/bit_util.rs @@ -383,8 +383,8 @@ impl BitWriter { // TODO: should we return `Result` for this func? return false; } - let mut ptr = result.unwrap(); - memcpy_value(&val, num_bytes, &mut ptr); + let ptr = result.unwrap(); + memcpy_value(&val, num_bytes, ptr); true } diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs index 36730c7713c5..8658f59b9cf0 100644 --- a/parquet_derive/src/parquet_field.rs +++ b/parquet_derive/src/parquet_field.rs @@ -769,7 +769,7 @@ mod test { }; let fields = extract_fields(snippet); - let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect(); + let processed: Vec<_> = fields.iter().map(Field::from).collect(); let column_writers: Vec<_> = processed .iter() @@ -800,7 +800,7 @@ mod test { }; let fields = extract_fields(snippet); - let processed: Vec<_> = fields.iter().map(|field| Field::from(field)).collect(); + let processed: Vec<_> = fields.iter().map(Field::from).collect(); assert_eq!(processed.len(), 3); assert_eq!( @@ -840,8 +840,7 @@ mod test { }; let fields = extract_fields(snippet); - let converted_fields: Vec<_> = - fields.iter().map(|field| Type::from(field)).collect(); + let converted_fields: Vec<_> = fields.iter().map(Type::from).collect(); let inner_types: Vec<_> = converted_fields .iter() .map(|field| field.inner_type()) @@ -878,8 +877,7 @@ mod test { }; let fields = extract_fields(snippet); - let converted_fields: Vec<_> = - fields.iter().map(|field| Type::from(field)).collect(); + let converted_fields: Vec<_> = fields.iter().map(Type::from).collect(); let physical_types: Vec<_> = converted_fields .iter() .map(|ty| ty.physical_type()) @@ -911,8 +909,7 @@ mod test { }; let fields = extract_fields(snippet); - let converted_fields: Vec<_> = - fields.iter().map(|field| Type::from(field)).collect(); + let converted_fields: Vec<_> = fields.iter().map(Type::from).collect(); assert_eq!( converted_fields, @@ -938,7 +935,7 @@ mod test { }; let fields = extract_fields(snippet); - let types: Vec<_> = fields.iter().map(|field| Type::from(field)).collect(); + let types: Vec<_> = fields.iter().map(Type::from).collect(); assert_eq!( types,