From f3c487cb7e05faee450a901b3ea793182111c427 Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Thu, 9 May 2024 11:42:16 +0800 Subject: [PATCH 1/6] [WIP][VL]isNull and isNotNull coexsit causing result not correct --- .../org/apache/gluten/execution/TestOperator.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index a14a5b7e78de..3df12e5d0b2e 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -106,6 +106,17 @@ class TestOperator extends VeloxWholeStageTransformerSuite { checkLengthAndPlan(df, 6) } + test("is_null and is_not_null coexist") { + val data = Seq(Row(null), Row("data"), Row(null)) + val schema = StructType(Array(StructField("col1", StringType, nullable = true))) + spark + .createDataFrame(JavaConverters.seqAsJavaList(data), schema) + .createOrReplaceTempView("temp_test_is_null") + val df = runQueryAndCompare( + "select * from temp_test_is_null where col1 is null and col1 is not null") { _ => } + checkLengthAndPlan(df, 2) + } + test("and pushdown") { val df = runQueryAndCompare( "select l_orderkey from lineitem where l_orderkey > 2 " + From c3266a9ee7628dd627027687e0c0028cc62014e0 Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Thu, 9 May 2024 13:32:00 +0800 Subject: [PATCH 2/6] try to reproduce data quality problem --- .../test/scala/org/apache/gluten/execution/TestOperator.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index 3df12e5d0b2e..d69871349d76 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -113,8 +113,8 @@ class TestOperator extends VeloxWholeStageTransformerSuite { .createDataFrame(JavaConverters.seqAsJavaList(data), schema) .createOrReplaceTempView("temp_test_is_null") val df = runQueryAndCompare( - "select * from temp_test_is_null where col1 is null and col1 is not null") { _ => } - checkLengthAndPlan(df, 2) + "select * from temp_test_is_null where col1 is not null and col1 is null") { _ => } + checkLengthAndPlan(df, 0) } test("and pushdown") { From bd8ebf0175d781197a23c4a5320b3fc12299b1f6 Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Thu, 9 May 2024 14:40:56 +0800 Subject: [PATCH 3/6] try to reproduce data quality problem --- .../scala/org/apache/gluten/execution/TestOperator.scala | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index d69871349d76..c166b8b0acfb 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -107,13 +107,8 @@ class TestOperator extends VeloxWholeStageTransformerSuite { } test("is_null and is_not_null coexist") { - val data = Seq(Row(null), Row("data"), Row(null)) - val schema = StructType(Array(StructField("col1", StringType, nullable = true))) - spark - .createDataFrame(JavaConverters.seqAsJavaList(data), schema) - .createOrReplaceTempView("temp_test_is_null") val df = runQueryAndCompare( - "select * from temp_test_is_null where col1 is not null and col1 is null") { _ => } + "select l_orderkey from lineitem where l_comment is not null and l_comment is null") { _ => } checkLengthAndPlan(df, 0) } From c2b6a098b3d05d4142f0b7c9b86cf4b8e6d27c23 Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Thu, 9 May 2024 19:16:40 +0800 Subject: [PATCH 4/6] try to reproduce data quality problem --- .../test/scala/org/apache/gluten/execution/TestOperator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index c166b8b0acfb..abc9c8811063 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -108,7 +108,7 @@ class TestOperator extends VeloxWholeStageTransformerSuite { test("is_null and is_not_null coexist") { val df = runQueryAndCompare( - "select l_orderkey from lineitem where l_comment is not null and l_comment is null") { _ => } + "select l_orderkey from lineitem where l_comment is null and l_comment is not null") { _ => } checkLengthAndPlan(df, 0) } From fdb5fba2a2f5962d58192e34a1af522d40727d50 Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Thu, 9 May 2024 11:44:59 +0800 Subject: [PATCH 5/6] fix in substrait plan --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 6 +++++- cpp/velox/substrait/SubstraitToVeloxPlan.h | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 4b1a2543e7c3..26b0b8c57ed3 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -2036,6 +2036,7 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( bool nullAllowed = filterInfo.nullAllowed_; bool isNull = filterInfo.isNull_; + bool existIsNullAndIsNotNull = filterInfo.existIsNullAndIsNotNull_; uint32_t rangeSize = std::max(filterInfo.lowerBounds_.size(), filterInfo.upperBounds_.size()); if constexpr (KIND == facebook::velox::TypeKind::HUGEINT) { @@ -2122,7 +2123,10 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( // Handle null filtering. if (rangeSize == 0) { - if (!nullAllowed) { + // handle is not null and is null exists at same time + if (existIsNullAndIsNotNull) { + filters[common::Subfield(inputName)] = std::move(std::make_unique()); + } else if (!nullAllowed) { filters[common::Subfield(inputName)] = std::make_unique(); } else if (isNull) { filters[common::Subfield(inputName)] = std::make_unique(); diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index dc97d2a4cf60..e758e320a5ed 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -316,6 +316,10 @@ class SubstraitToVeloxPlanConverter { if (!initialized_) { initialized_ = true; } + forbidsNullSet_ = true; + if (isNullSet_) { + existIsNullAndIsNotNull_ = true; + } } // Only null is allowed. @@ -325,6 +329,10 @@ class SubstraitToVeloxPlanConverter { if (!initialized_) { initialized_ = true; } + isNullSet_ = true; + if (forbidsNullSet_) { + existIsNullAndIsNotNull_ = true; + } } // Return the initialization status. @@ -375,6 +383,9 @@ class SubstraitToVeloxPlanConverter { bool nullAllowed_ = false; bool isNull_ = false; + bool forbidsNullSet_ = false; + bool isNullSet_ = false; + bool existIsNullAndIsNotNull_ = false; // If true, left bound will be exclusive. std::vector lowerExclusives_; From ef2b9805909e738036ace2c5ab740c64cf2612ff Mon Sep 17 00:00:00 2001 From: zjuwangg Date: Sat, 11 May 2024 10:36:35 +0800 Subject: [PATCH 6/6] address comments --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 2 +- cpp/velox/substrait/SubstraitToVeloxPlan.h | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 26b0b8c57ed3..24abc5a6580d 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -2036,7 +2036,7 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( bool nullAllowed = filterInfo.nullAllowed_; bool isNull = filterInfo.isNull_; - bool existIsNullAndIsNotNull = filterInfo.existIsNullAndIsNotNull_; + bool existIsNullAndIsNotNull = filterInfo.forbidsNullSet_ && filterInfo.isNullSet_; uint32_t rangeSize = std::max(filterInfo.lowerBounds_.size(), filterInfo.upperBounds_.size()); if constexpr (KIND == facebook::velox::TypeKind::HUGEINT) { diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index e758e320a5ed..1bda6435eaee 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -317,9 +317,6 @@ class SubstraitToVeloxPlanConverter { initialized_ = true; } forbidsNullSet_ = true; - if (isNullSet_) { - existIsNullAndIsNotNull_ = true; - } } // Only null is allowed. @@ -330,9 +327,6 @@ class SubstraitToVeloxPlanConverter { initialized_ = true; } isNullSet_ = true; - if (forbidsNullSet_) { - existIsNullAndIsNotNull_ = true; - } } // Return the initialization status. @@ -385,7 +379,6 @@ class SubstraitToVeloxPlanConverter { bool isNull_ = false; bool forbidsNullSet_ = false; bool isNullSet_ = false; - bool existIsNullAndIsNotNull_ = false; // If true, left bound will be exclusive. std::vector lowerExclusives_;