Skip to content

Commit

Permalink
check null in filter
Browse files Browse the repository at this point in the history
  • Loading branch information
xinlifoobar committed Jul 5, 2024
1 parent d387335 commit 3d3571b
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 17 deletions.
11 changes: 3 additions & 8 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field};
use datafusion_common::{
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema, Result, ScalarValue, TableReference
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema,
Result, TableReference,
};
use std::collections::HashMap;
use std::sync::Arc;
Expand Down Expand Up @@ -110,13 +111,7 @@ impl ExprSchemable for Expr {
Expr::Column(c) => Ok(schema.data_type(c)?.clone()),
Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()),
Expr::ScalarVariable(ty, _) => Ok(ty.clone()),
Expr::Literal(l) => {
match l {
// Interpret NULL as a missing boolean value.
ScalarValue::Null => Ok(DataType::Boolean),
_ => Ok(l.data_type())
}
},
Expr::Literal(l) => Ok(l.data_type()),
Expr::Case(case) => case.when_then_expr[0].1.get_type(schema),
Expr::Cast(Cast { data_type, .. })
| Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()),
Expand Down
3 changes: 2 additions & 1 deletion datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2123,7 +2123,8 @@ impl Filter {
// construction (such as with correlated subqueries) so we make a best effort here and
// ignore errors resolving the expression against the schema.
if let Ok(predicate_type) = predicate.get_type(input.schema()) {
if predicate_type != DataType::Boolean {
// Interpret NULL as a missing boolean value.
if predicate_type != DataType::Boolean && predicate_type != DataType::Null {
return plan_err!(
"Cannot create filter with non-boolean predicate '{predicate}' returning {predicate_type}"
);
Expand Down
21 changes: 13 additions & 8 deletions datafusion/physical-plan/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use arrow::record_batch::RecordBatch;
use arrow_array::{Array, BooleanArray};
use datafusion_common::cast::{as_boolean_array, as_null_array};
use datafusion_common::stats::Precision;
use datafusion_common::{plan_err, DataFusionError, Result};
use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
use datafusion_execution::TaskContext;
use datafusion_expr::Operator;
use datafusion_physical_expr::expressions::BinaryExpr;
Expand Down Expand Up @@ -370,14 +370,19 @@ pub(crate) fn batch_filter(
.evaluate(batch)
.and_then(|v| v.into_array(batch.num_rows()))
.and_then(|array| {
let filter_array: BooleanArray = match as_null_array(&array) {
Ok(null_array) => {
// if the predicate is null, then the result is also null
Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(null_array.len()))
}
let filter_array = match as_boolean_array(&array) {
Ok(boolean_array) => {
Ok(boolean_array.to_owned())
},
Err(_) => {
// if the predicate is not null, then the result is a boolean array
Ok(as_boolean_array(&array)?.to_owned())
let Ok(null_array) = as_null_array(&array) else {
return internal_err!("Cannot create filter_array from non-boolean predicates, unable to continute");
};

// if the predicate is null, then the result is also null
Ok::<BooleanArray, DataFusionError>(BooleanArray::new_null(
null_array.len(),
))
}
}?;

Expand Down
9 changes: 9 additions & 0 deletions datafusion/sqllogictest/test_files/misc.slt
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,12 @@ foo (empty) NULL
query I
select 1 where NULL
----

query I
select 1 where NULL and 1 = 1
----

query I
select 1 where NULL or 1 = 1
----
1

0 comments on commit 3d3571b

Please sign in to comment.