diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml index f0f368a75c6e..390dab43160d 100644 --- a/datafusion/Cargo.toml +++ b/datafusion/Cargo.toml @@ -52,8 +52,8 @@ avro = ["avro-rs", "num-traits"] [dependencies] ahash = "0.7" hashbrown = { version = "0.11", features = ["raw"] } -arrow = { version = "6.1.0", features = ["prettyprint"] } -parquet = { version = "6.1.0", features = ["arrow"] } +arrow = { version = "6.2.0", features = ["prettyprint"] } +parquet = { version = "6.2.0", features = ["arrow"] } sqlparser = "0.12" paste = "^1.0" num_cpus = "1.13.0" diff --git a/datafusion/src/physical_optimizer/pruning.rs b/datafusion/src/physical_optimizer/pruning.rs index ba0186513a45..24334d7983d5 100644 --- a/datafusion/src/physical_optimizer/pruning.rs +++ b/datafusion/src/physical_optimizer/pruning.rs @@ -1374,36 +1374,24 @@ mod tests { #[test] fn prune_bool_column_eq_true() { - let (schema, statistics, _, _) = bool_setup(); + let (schema, statistics, expected_true, _) = bool_setup(); // b1 = true let expr = col("b1").eq(lit(true)); let p = PruningPredicate::try_new(&expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap_err(); - assert!( - result.to_string().contains( - "Data type Boolean not supported for scalar operation 'lt_eq' on dyn array" - ), - "{}", - result - ) + let result = p.prune(&statistics).unwrap(); + assert_eq!(result, expected_true); } #[test] fn prune_bool_not_column_eq_true() { - let (schema, statistics, _, _) = bool_setup(); + let (schema, statistics, _, expected_false) = bool_setup(); // !b1 = true let expr = col("b1").not().eq(lit(true)); let p = PruningPredicate::try_new(&expr, schema).unwrap(); - let result = p.prune(&statistics).unwrap_err(); - assert!( - result.to_string().contains( - "Data type Boolean not supported for scalar operation 'lt_eq' on dyn array" - ), - "{}", - result - ) + let result = p.prune(&statistics).unwrap(); + assert_eq!(result, expected_false); } /// Creates setup for int32 chunk pruning diff --git a/datafusion/src/physical_plan/expressions/binary.rs b/datafusion/src/physical_plan/expressions/binary.rs index 456e8d42a5df..8bd12fabfaa3 100644 --- a/datafusion/src/physical_plan/expressions/binary.rs +++ b/datafusion/src/physical_plan/expressions/binary.rs @@ -24,6 +24,10 @@ use arrow::compute::kernels::arithmetic::{ }; use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene}; use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq}; +use arrow::compute::kernels::comparison::{ + eq_bool, eq_bool_scalar, gt_bool, gt_eq_bool, lt_bool, lt_eq_bool, neq_bool, + neq_bool_scalar, +}; use arrow::compute::kernels::comparison::{ eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar, }; @@ -49,6 +53,62 @@ use super::coercion::{ eq_coercion, like_coercion, numerical_coercion, order_coercion, string_coercion, }; +// Simple (low performance) kernels until optimized kernels are added to arrow +// TODO: file arrow-rs ticket to track this feature + +fn is_distinct_from_bool( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { + // Different from `eq` beacause not eq_bool because null == null + Ok(left + .iter() + .zip(right.iter()) + .map(|(left, right)| Some(left != right)) + .collect()) +} + +fn is_not_distinct_from_bool( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { + Ok(left + .iter() + .zip(right.iter()) + .map(|(left, right)| Some(left == right)) + .collect()) +} + +#[allow(clippy::bool_comparison)] +fn lt_bool_scalar(left: &BooleanArray, right: bool) -> Result { + Ok(left + .iter() + .map(|left| left.map(|left| left < right)) + .collect()) +} + +fn lt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result { + Ok(left + .iter() + .map(|left| left.map(|left| left <= right)) + .collect()) +} + +#[allow(clippy::bool_comparison)] +fn gt_bool_scalar(left: &BooleanArray, right: bool) -> Result { + Ok(left + .iter() + .map(|left| left.map(|left| left > right)) + .collect()) +} + +fn gt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result { + Ok(left + .iter() + .map(|left| left.map(|left| left >= right)) + .collect()) +} + /// Binary expression #[derive(Debug)] pub struct BinaryExpr { @@ -126,6 +186,47 @@ macro_rules! compute_utf8_op_scalar { }}; } +/// Invoke a compute kernel on a boolean data array and a scalar value +macro_rules! compute_bool_op_scalar { + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + use std::convert::TryInto; + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + // generate the scalar function name, such as lt_scalar, from the $OP parameter + // (which could have a value of lt) and the suffix _scalar + Ok(Arc::new(paste::expr! {[<$OP _bool_scalar>]}( + &ll, + $RIGHT.try_into()?, + )?)) + }}; +} + +/// Invoke a bool compute kernel on array(s) +macro_rules! compute_bool_op { + // invoke binary operator + ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ + let ll = $LEFT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + let rr = $RIGHT + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + Ok(Arc::new(paste::expr! {[<$OP _bool>]}(&ll, &rr)?)) + }}; + // invoke unary operator + ($OPERAND:expr, $OP:ident, $DT:ident) => {{ + let operand = $OPERAND + .as_any() + .downcast_ref::<$DT>() + .expect("compute_op failed to downcast array"); + Ok(Arc::new(paste::expr! {[<$OP _bool>]}(&operand)?)) + }}; +} + /// Invoke a compute kernel on a data array and a scalar value macro_rules! compute_op_scalar { ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ @@ -276,6 +377,7 @@ macro_rules! binary_array_op_scalar { DataType::Date64 => { compute_op_scalar!($LEFT, $RIGHT, $OP, Date64Array) } + DataType::Boolean => compute_bool_op_scalar!($LEFT, $RIGHT, $OP, BooleanArray), other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for scalar operation '{}' on dyn array", other, stringify!($OP) @@ -320,6 +422,7 @@ macro_rules! binary_array_op { DataType::Date64 => { compute_op!($LEFT, $RIGHT, $OP, Date64Array) } + DataType::Boolean => compute_bool_op!($LEFT, $RIGHT, $OP, BooleanArray), other => Err(DataFusionError::Internal(format!( "Data type {:?} not supported for binary operation '{}' on dyn arrays", other, stringify!($OP) @@ -822,7 +925,7 @@ mod tests { use super::*; use crate::error::Result; - use crate::physical_plan::expressions::col; + use crate::physical_plan::expressions::{col, lit}; // Create a binary expression without coercion. Used here when we do not want to coerce the expressions // to valid types. Usage can result in an execution (after plan) error. @@ -1372,6 +1475,42 @@ mod tests { Ok(()) } + // Test `scalar arr` produces expected + fn apply_logic_op_scalar_arr( + schema: &SchemaRef, + scalar: bool, + arr: &ArrayRef, + op: Operator, + expected: &BooleanArray, + ) -> Result<()> { + let scalar = lit(scalar.into()); + + let arithmetic_op = binary_simple(scalar, op, col("a", schema)?); + let batch = RecordBatch::try_new(Arc::clone(schema), vec![Arc::clone(arr)])?; + let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); + assert_eq!(result.as_ref(), expected); + + Ok(()) + } + + // Test `arr scalar` produces expected + fn apply_logic_op_arr_scalar( + schema: &SchemaRef, + arr: &ArrayRef, + scalar: bool, + op: Operator, + expected: &BooleanArray, + ) -> Result<()> { + let scalar = lit(scalar.into()); + + let arithmetic_op = binary_simple(col("a", schema)?, op, scalar); + let batch = RecordBatch::try_new(Arc::clone(schema), vec![Arc::clone(arr)])?; + let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); + assert_eq!(result.as_ref(), expected); + + Ok(()) + } + #[test] fn and_with_nulls_op() -> Result<()> { let schema = Schema::new(vec![ @@ -1462,6 +1601,293 @@ mod tests { Ok(()) } + /// Returns (schema, a: BooleanArray, b: BooleanArray) with all possible inputs + /// + /// a: [true, true, true, NULL, NULL, NULL, false, false, false] + /// b: [true, NULL, false, true, NULL, false, true, NULL, false] + fn bool_test_arrays() -> (SchemaRef, BooleanArray, BooleanArray) { + let schema = Schema::new(vec![ + Field::new("a", DataType::Boolean, false), + Field::new("b", DataType::Boolean, false), + ]); + let a = [ + Some(true), + Some(true), + Some(true), + None, + None, + None, + Some(false), + Some(false), + Some(false), + ] + .iter() + .collect(); + let b = [ + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + ] + .iter() + .collect(); + (Arc::new(schema), a, b) + } + + /// Returns (schema, BooleanArray) with [true, NULL, false] + fn scalar_bool_test_array() -> (SchemaRef, ArrayRef) { + let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]); + let a: BooleanArray = vec![Some(true), None, Some(false)].iter().collect(); + (Arc::new(schema), Arc::new(a)) + } + + #[test] + fn eq_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = vec![ + Some(true), + None, + Some(false), + None, + None, + None, + Some(false), + None, + Some(true), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::Eq, expected).unwrap(); + } + + #[test] + fn eq_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::Eq, &expected).unwrap(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::Eq, &expected).unwrap(); + + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::Eq, &expected).unwrap(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::Eq, &expected).unwrap(); + } + + #[test] + fn neq_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(false), + None, + Some(true), + None, + None, + None, + Some(true), + None, + Some(false), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::NotEq, expected).unwrap(); + } + + #[test] + fn neq_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::NotEq, &expected).unwrap(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::NotEq, &expected).unwrap(); + + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::NotEq, &expected) + .unwrap(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::NotEq, &expected) + .unwrap(); + } + + #[test] + fn lt_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(false), + None, + Some(false), + None, + None, + None, + Some(true), + None, + Some(false), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::Lt, expected).unwrap(); + } + + #[test] + fn lt_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(false), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::Lt, &expected).unwrap(); + + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::Lt, &expected).unwrap(); + + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::Lt, &expected).unwrap(); + + let expected = [Some(false), None, Some(false)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::Lt, &expected).unwrap(); + } + + #[test] + fn lt_eq_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(true), + None, + Some(false), + None, + None, + None, + Some(true), + None, + Some(true), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::LtEq, expected).unwrap(); + } + + #[test] + fn lt_eq_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::LtEq, &expected).unwrap(); + + let expected = [Some(true), None, Some(true)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::LtEq, &expected).unwrap(); + + let expected = [Some(true), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::LtEq, &expected).unwrap(); + + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::LtEq, &expected).unwrap(); + } + + #[test] + fn gt_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(false), + None, + Some(true), + None, + None, + None, + Some(false), + None, + Some(false), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::Gt, expected).unwrap(); + } + + #[test] + fn gt_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::Gt, &expected).unwrap(); + + let expected = [Some(false), None, Some(false)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::Gt, &expected).unwrap(); + + let expected = [Some(false), None, Some(false)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::Gt, &expected).unwrap(); + + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::Gt, &expected).unwrap(); + } + + #[test] + fn gt_eq_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(true), + None, + Some(true), + None, + None, + None, + Some(false), + None, + Some(true), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::GtEq, expected).unwrap(); + } + + #[test] + fn gt_eq_op_bool_scalar() { + let (schema, a) = scalar_bool_test_array(); + let expected = [Some(true), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, true, &a, Operator::GtEq, &expected).unwrap(); + + let expected = [Some(true), None, Some(false)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, true, Operator::GtEq, &expected).unwrap(); + + let expected = [Some(false), None, Some(true)].iter().collect(); + apply_logic_op_scalar_arr(&schema, false, &a, Operator::GtEq, &expected).unwrap(); + + let expected = [Some(true), None, Some(true)].iter().collect(); + apply_logic_op_arr_scalar(&schema, &a, false, Operator::GtEq, &expected).unwrap(); + } + + #[test] + fn is_distinct_from_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(false), + Some(true), + Some(true), + Some(true), + Some(false), + Some(true), + Some(true), + Some(true), + Some(false), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::IsDistinctFrom, expected).unwrap(); + } + + #[test] + fn is_not_distinct_from_op_bool() { + let (schema, a, b) = bool_test_arrays(); + let expected = [ + Some(true), + Some(false), + Some(false), + Some(false), + Some(true), + Some(false), + Some(false), + Some(false), + Some(true), + ] + .iter() + .collect(); + apply_logic_op(schema, a, b, Operator::IsNotDistinctFrom, expected).unwrap(); + } + #[test] fn test_coersion_error() -> Result<()> { let expr = diff --git a/datafusion/src/physical_plan/expressions/nullif.rs b/datafusion/src/physical_plan/expressions/nullif.rs index 55e7bda40f83..1d915998480a 100644 --- a/datafusion/src/physical_plan/expressions/nullif.rs +++ b/datafusion/src/physical_plan/expressions/nullif.rs @@ -23,7 +23,9 @@ use crate::scalar::ScalarValue; use arrow::array::Array; use arrow::array::*; use arrow::compute::kernels::boolean::nullif; -use arrow::compute::kernels::comparison::{eq, eq_scalar, eq_utf8, eq_utf8_scalar}; +use arrow::compute::kernels::comparison::{ + eq, eq_bool, eq_bool_scalar, eq_scalar, eq_utf8, eq_utf8_scalar, +}; use arrow::datatypes::{DataType, TimeUnit}; /// Invoke a compute kernel on a primitive array and a Boolean Array diff --git a/datafusion/src/physical_plan/file_format/parquet.rs b/datafusion/src/physical_plan/file_format/parquet.rs index e7980d9aa6d3..52dc8e9bce85 100644 --- a/datafusion/src/physical_plan/file_format/parquet.rs +++ b/datafusion/src/physical_plan/file_format/parquet.rs @@ -714,12 +714,14 @@ mod tests { } #[test] - fn row_group_predicate_builder_unsupported_type() -> Result<()> { + fn row_group_predicate_builder_null_expr() -> Result<()> { use crate::logical_plan::{col, lit}; - // test row group predicate with unsupported statistics type (boolean) - // where a null array is generated for some statistics columns - // int > 1 and bool = true => c1_max > 1 and null - let expr = col("c1").gt(lit(15)).and(col("c2").eq(lit(true))); + // test row group predicate with an unknown (Null) expr + // + // int > 1 and bool = NULL => c1_max > 1 and null + let expr = col("c1") + .gt(lit(15)) + .and(col("c2").eq(lit(ScalarValue::Boolean(None)))); let schema = Arc::new(Schema::new(vec![ Field::new("c1", DataType::Int32, false), Field::new("c2", DataType::Boolean, false), diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index eeb6c10926b1..0e7c9e5ad02f 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -1051,6 +1051,115 @@ async fn csv_query_having_without_group_by() -> Result<()> { Ok(()) } +#[tokio::test] +async fn csv_query_boolean_eq_neq() { + let mut ctx = ExecutionContext::new(); + register_boolean(&mut ctx).await.unwrap(); + // verify the plumbing is all hooked up for eq and neq + let sql = "SELECT a, b, a = b as eq, b = true as eq_scalar, a != b as neq, a != true as neq_scalar FROM t1"; + let actual = execute_to_batches(&mut ctx, sql).await; + + let expected = vec![ + "+-------+-------+-------+-----------+-------+------------+", + "| a | b | eq | eq_scalar | neq | neq_scalar |", + "+-------+-------+-------+-----------+-------+------------+", + "| true | true | true | true | false | false |", + "| true | | | | | false |", + "| true | false | false | false | true | false |", + "| | true | | true | | |", + "| | | | | | |", + "| | false | | false | | |", + "| false | true | false | true | true | true |", + "| false | | | | | true |", + "| false | false | true | false | false | true |", + "+-------+-------+-------+-----------+-------+------------+", + ]; + assert_batches_eq!(expected, &actual); +} + +#[tokio::test] +async fn csv_query_boolean_lt_lt_eq() { + let mut ctx = ExecutionContext::new(); + register_boolean(&mut ctx).await.unwrap(); + // verify the plumbing is all hooked up for < and <= + let sql = "SELECT a, b, a < b as lt, b = true as lt_scalar, a <= b as lt_eq, a <= true as lt_eq_scalar FROM t1"; + let actual = execute_to_batches(&mut ctx, sql).await; + + let expected = vec![ + "+-------+-------+-------+-----------+-------+--------------+", + "| a | b | lt | lt_scalar | lt_eq | lt_eq_scalar |", + "+-------+-------+-------+-----------+-------+--------------+", + "| true | true | false | true | true | true |", + "| true | | | | | true |", + "| true | false | false | false | false | true |", + "| | true | | true | | |", + "| | | | | | |", + "| | false | | false | | |", + "| false | true | true | true | true | true |", + "| false | | | | | true |", + "| false | false | false | false | true | true |", + "+-------+-------+-------+-----------+-------+--------------+", + ]; + assert_batches_eq!(expected, &actual); +} + +#[tokio::test] +async fn csv_query_boolean_gt_gt_eq() { + let mut ctx = ExecutionContext::new(); + register_boolean(&mut ctx).await.unwrap(); + // verify the plumbing is all hooked up for > and >= + let sql = "SELECT a, b, a > b as gt, b = true as gt_scalar, a >= b as gt_eq, a >= true as gt_eq_scalar FROM t1"; + let actual = execute_to_batches(&mut ctx, sql).await; + + let expected = vec![ + "+-------+-------+-------+-----------+-------+--------------+", + "| a | b | gt | gt_scalar | gt_eq | gt_eq_scalar |", + "+-------+-------+-------+-----------+-------+--------------+", + "| true | true | false | true | true | true |", + "| true | | | | | true |", + "| true | false | true | false | true | true |", + "| | true | | true | | |", + "| | | | | | |", + "| | false | | false | | |", + "| false | true | false | true | false | false |", + "| false | | | | | false |", + "| false | false | false | false | true | false |", + "+-------+-------+-------+-----------+-------+--------------+", + ]; + assert_batches_eq!(expected, &actual); +} + +#[tokio::test] +async fn csv_query_boolean_distinct_from() { + let mut ctx = ExecutionContext::new(); + register_boolean(&mut ctx).await.unwrap(); + // verify the plumbing is all hooked up for is distinct from and is not distinct from + let sql = "SELECT a, b, \ + a is distinct from b as df, \ + b is distinct from true as df_scalar, \ + a is not distinct from b as ndf, \ + a is not distinct from true as ndf_scalar \ + FROM t1"; + let actual = execute_to_batches(&mut ctx, sql).await; + + let expected = vec![ + "+-------+-------+-------+-----------+-------+------------+", + "| a | b | df | df_scalar | ndf | ndf_scalar |", + "+-------+-------+-------+-----------+-------+------------+", + "| true | true | false | false | true | true |", + "| true | | true | true | false | true |", + "| true | false | true | true | false | true |", + "| | true | true | false | false | false |", + "| | | false | true | true | false |", + "| | false | true | true | false | false |", + "| false | true | true | false | false | false |", + "| false | | true | true | false | false |", + "| false | false | false | true | true | false |", + "+-------+-------+-------+-----------+-------+------------+", + ]; + assert_batches_eq!(expected, &actual); +} + #[tokio::test] async fn csv_query_avg_sqrt() -> Result<()> { let mut ctx = create_ctx()?; @@ -3404,6 +3513,42 @@ async fn register_aggregate_csv_by_sql(ctx: &mut ExecutionContext) { ); } +/// Create table "t1" with two boolean columns "a" and "b" +async fn register_boolean(ctx: &mut ExecutionContext) -> Result<()> { + let a: BooleanArray = [ + Some(true), + Some(true), + Some(true), + None, + None, + None, + Some(false), + Some(false), + Some(false), + ] + .iter() + .collect(); + let b: BooleanArray = [ + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + ] + .iter() + .collect(); + + let data = + RecordBatch::try_from_iter([("a", Arc::new(a) as _), ("b", Arc::new(b) as _)])?; + let table = MemTable::try_new(data.schema(), vec![vec![data]])?; + ctx.register_table("t1", Arc::new(table))?; + Ok(()) +} + async fn register_aggregate_csv(ctx: &mut ExecutionContext) -> Result<()> { let testdata = datafusion::test_util::arrow_test_data(); let schema = common::aggr_test_schema(); @@ -4487,6 +4632,8 @@ macro_rules! test_expression { async fn test_boolean_expressions() -> Result<()> { test_expression!("true", "true"); test_expression!("false", "false"); + test_expression!("false = false", "true"); + test_expression!("true = false", "false"); Ok(()) }