From d2a3623d11e6079e97ecc47ee69fa71f1a018cfd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 27 Jun 2024 20:26:46 -0400 Subject: [PATCH] Minor: reduce replication for nested comparison (#11149) --- datafusion/physical-expr-common/src/datum.rs | 2 +- datafusion/physical-plan/src/joins/hash_join.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/datafusion/physical-expr-common/src/datum.rs b/datafusion/physical-expr-common/src/datum.rs index fd3f2587e2ff5..96c903180ed98 100644 --- a/datafusion/physical-expr-common/src/datum.rs +++ b/datafusion/physical-expr-common/src/datum.rs @@ -87,7 +87,7 @@ pub fn apply_cmp_for_nested( } /// Compare on nested type List, Struct, and so on -fn compare_op_for_nested( +pub fn compare_op_for_nested( op: &Operator, lhs: &dyn Datum, rhs: &dyn Datum, diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 7d268839df127..b2f9ef5607458 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -52,15 +52,13 @@ use arrow::array::{ Array, ArrayRef, BooleanArray, BooleanBufferBuilder, PrimitiveArray, UInt32Array, UInt64Array, }; -use arrow::buffer::NullBuffer; use arrow::compute::kernels::cmp::{eq, not_distinct}; use arrow::compute::{and, concat_batches, take, FilterBuilder}; use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow::util::bit_util; use arrow_array::cast::downcast_array; -use arrow_ord::ord::make_comparator; -use arrow_schema::{ArrowError, SortOptions}; +use arrow_schema::ArrowError; use datafusion_common::utils::memory::estimate_memory_size; use datafusion_common::{ internal_datafusion_err, internal_err, plan_err, project_schema, DataFusionError, @@ -75,6 +73,8 @@ use datafusion_physical_expr::expressions::UnKnownColumn; use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; use ahash::RandomState; +use datafusion_expr::Operator; +use datafusion_physical_expr_common::datum::compare_op_for_nested; use futures::{ready, Stream, StreamExt, TryStreamExt}; use parking_lot::Mutex; @@ -1216,11 +1216,7 @@ fn eq_dyn_null( // implementation // if left.data_type().is_nested() && null_equals_null { - let cmp = make_comparator(left, right, SortOptions::default())?; - let len = left.len().min(right.len()); - let values = (0..len).map(|i| cmp(i, i).is_eq()).collect(); - let nulls = NullBuffer::union(left.nulls(), right.nulls()); - return Ok(BooleanArray::new(values, nulls)); + return Ok(compare_op_for_nested(&Operator::Eq, &left, &right)?); } match (left.data_type(), right.data_type()) { _ if null_equals_null => not_distinct(&left, &right),