From 7b8a621f888603c53b23c6599828495051342598 Mon Sep 17 00:00:00 2001 From: yikaifei Date: Tue, 29 Oct 2024 14:22:14 +0800 Subject: [PATCH] Velox backend support merge two aggregate to one complete mode aggregate --- .../backendsapi/velox/VeloxRuleApi.scala | 1 + .../HashAggregateExecTransformer.scala | 14 +- .../tpch-approved-plan/v1-bhj/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1-bhj/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark32/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark32/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark32/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark32/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark32/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark32/20.txt | 390 +++++++++--------- .../tpch-approved-plan/v1/spark32/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark33/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark33/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark33/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark33/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark33/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark33/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark33/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark33/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark33/3.txt | 180 ++++---- .../tpch-approved-plan/v1/spark34/11.txt | 152 ++++--- .../tpch-approved-plan/v1/spark34/13.txt | 178 ++++---- .../tpch-approved-plan/v1/spark34/14.txt | 110 +++-- .../tpch-approved-plan/v1/spark34/15.txt | 74 ++-- .../tpch-approved-plan/v1/spark34/17.txt | 170 ++++---- .../tpch-approved-plan/v1/spark34/18.txt | 284 +++++++------ .../tpch-approved-plan/v1/spark34/19.txt | 106 +++-- .../tpch-approved-plan/v1/spark34/20.txt | 382 +++++++++-------- .../tpch-approved-plan/v1/spark34/3.txt | 180 ++++---- .../VeloxAggregateFunctionsSuite.scala | 1 + ...MergeTwoPhasesHashBaseAggregateSuite.scala | 198 +++++++++ docs/Configuration.md | 1 + .../MergeTwoPhasesHashBaseAggregate.scala | 5 +- .../utils/velox/VeloxTestSettings.scala | 1 + .../utils/velox/VeloxTestSettings.scala | 1 + .../utils/velox/VeloxTestSettings.scala | 1 + .../org/apache/gluten/GlutenConfig.scala | 10 + 37 files changed, 2535 insertions(+), 2536 deletions(-) create mode 100644 backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala rename {backends-clickhouse/src/main/scala/org/apache/gluten/extension => gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar}/MergeTwoPhasesHashBaseAggregate.scala (96%) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala index 3554bc5c9c01..ea20e0fa0094 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxRuleApi.scala @@ -54,6 +54,7 @@ private object VeloxRuleApi { injector.injectTransform(_ => PushDownInputFileExpression.PreOffload) injector.injectTransform(c => FallbackOnANSIMode.apply(c.session)) injector.injectTransform(c => FallbackMultiCodegens.apply(c.session)) + injector.injectTransform(c => MergeTwoPhasesHashBaseAggregate(c.session)) injector.injectTransform(_ => RewriteSubqueryBroadcast()) injector.injectTransform(c => BloomFilterMightContainJointRewriteRule.apply(c.session)) injector.injectTransform(c => ArrowScanReplaceRule.apply(c.session)) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala index fe5e0d92d6d5..25e0aecfd5e0 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala @@ -214,7 +214,7 @@ abstract class HashAggregateExecTransformer( VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction) ) aggregateNodeList.add(aggFunctionNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -242,7 +242,7 @@ abstract class HashAggregateExecTransformer( aggregateFunction.inputAggBufferAttributes.head.nullable) ) aggregateNodeList.add(partialNode) - case Final => + case Final | Complete => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, @@ -275,7 +275,7 @@ abstract class HashAggregateExecTransformer( expression.mode match { case Partial | PartialMerge => typeNodeList.add(VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction)) - case Final => + case Final | Complete => typeNodeList.add( ConverterUtils .getTypeNode(aggregateFunction.dataType, aggregateFunction.nullable)) @@ -356,7 +356,7 @@ abstract class HashAggregateExecTransformer( // The process of handling the inconsistency in column types and order between // Spark and Velox is exactly the opposite of applyExtractStruct. aggregateExpression.mode match { - case PartialMerge | Final => + case PartialMerge | Final | Complete => val newInputAttributes = new ArrayBuffer[Attribute]() val childNodes = new JArrayList[ExpressionNode]() val (sparkOrders, sparkTypes) = @@ -467,7 +467,7 @@ abstract class HashAggregateExecTransformer( // by previous projection. childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) colIdx += 1 - case Partial => + case Partial | Complete => aggFunc.children.foreach { _ => childrenNodes.add(ExpressionBuilder.makeSelection(colIdx)) @@ -600,7 +600,7 @@ abstract class HashAggregateExecTransformer( } val aggregateFunc = aggExpr.aggregateFunction val childrenNodes = aggExpr.mode match { - case Partial => + case Partial | Complete => aggregateFunc.children.toList.map( expr => { ExpressionConverter @@ -784,7 +784,7 @@ case class HashAggregateExecPullOutHelper( expr.mode match { case Partial | PartialMerge => expr.aggregateFunction.aggBufferAttributes - case Final => + case Final | Complete => Seq(aggregateAttributes(index)) case other => throw new GlutenNotSupportException(s"Unsupported aggregate mode: $other.") diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt index d7603e49ffb6..f69dfcda1ea6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/15.txt @@ -237,31 +237,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ Scan parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ Scan parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) Scan parquet @@ -326,71 +325,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt index 1ec03a2f9edd..14411a3054e3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark34/15.txt @@ -239,31 +239,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRow (60) - +- ^ RegularHashAggregateExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ ProjectExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ InputIteratorTransformer (54) - +- ShuffleQueryStage (52), Statistics(X) - +- ColumnarExchange (51) - +- VeloxResizeBatches (50) - +- ^ ProjectExecTransformer (48) - +- ^ FlushableHashAggregateExecTransformer (47) - +- ^ ProjectExecTransformer (46) - +- ^ FilterExecTransformer (45) - +- ^ Scan parquet (44) + VeloxColumnarToRow (59) + +- ^ RegularHashAggregateExecTransformer (57) + +- ^ ProjectExecTransformer (56) + +- ^ RegularHashAggregateExecTransformer (55) + +- ^ InputIteratorTransformer (54) + +- ShuffleQueryStage (52), Statistics(X) + +- ColumnarExchange (51) + +- VeloxResizeBatches (50) + +- ^ ProjectExecTransformer (48) + +- ^ FlushableHashAggregateExecTransformer (47) + +- ^ ProjectExecTransformer (46) + +- ^ FilterExecTransformer (45) + +- ^ Scan parquet (44) +- == Initial Plan == - HashAggregate (68) - +- HashAggregate (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- Project (63) - +- Filter (62) - +- Scan parquet (61) + HashAggregate (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- Filter (61) + +- Scan parquet (60) (44) Scan parquet @@ -328,71 +327,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (57) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(58) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(59) WholeStageCodegenTransformer (X) +(58) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(60) VeloxColumnarToRow +(59) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(61) Scan parquet +(60) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(62) Filter +(61) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(63) Project +(62) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(64) HashAggregate +(63) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(65) Exchange +(64) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(67) HashAggregate +(66) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(68) HashAggregate +(67) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(69) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index edbcb50206df..41522ce3cbde 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ Scan parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index 91df642595a3..1d39e91be644 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index fe915989f9e6..e8d00f46129e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ Scan parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index a2345ca9ed35..d3397d005bda 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ Scan parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ Scan parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ Scan parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ Scan parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ Scan parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ Scan parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) Scan parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index d06cdb0fecff..1126dd6ad82e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index 6fa1271a5a39..b0acefbbb5ad 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,20 +1,20 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (145) +- == Final Plan == - VeloxColumnarToRow (96) - +- ^ SortExecTransformer (94) - +- ^ InputIteratorTransformer (93) - +- ShuffleQueryStage (91) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (95) + +- ^ SortExecTransformer (93) + +- ^ InputIteratorTransformer (92) + +- ShuffleQueryStage (90) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7) : : +- ColumnarExchange (6) @@ -22,12 +22,12 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33) : : +- ColumnarExchange (32) @@ -48,83 +48,82 @@ AdaptiveSparkPlan (146) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ Scan parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ Scan parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ Scan parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ Scan parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ Scan parquet (76) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- Project (143) - +- SortMergeJoin Inner (142) - :- Sort (136) - : +- Exchange (135) - : +- Project (134) - : +- SortMergeJoin LeftSemi (133) - : :- Sort (100) - : : +- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Sort (132) - : +- Exchange (131) - : +- Project (130) - : +- SortMergeJoin Inner (129) - : :- Sort (112) - : : +- Exchange (111) - : : +- SortMergeJoin LeftSemi (110) - : : :- Sort (104) - : : : +- Exchange (103) - : : : +- Filter (102) - : : : +- Scan parquet (101) - : : +- Sort (109) - : : +- Exchange (108) - : : +- Project (107) - : : +- Filter (106) - : : +- Scan parquet (105) - : +- Sort (128) - : +- Exchange (127) - : +- Filter (126) - : +- HashAggregate (125) - : +- HashAggregate (124) - : +- SortMergeJoin LeftSemi (123) - : :- Sort (117) - : : +- Exchange (116) - : : +- Project (115) - : : +- Filter (114) - : : +- Scan parquet (113) - : +- Sort (122) - : +- Exchange (121) - : +- Project (120) - : +- Filter (119) - : +- Scan parquet (118) - +- Sort (141) - +- Exchange (140) - +- Project (139) - +- Filter (138) - +- Scan parquet (137) + Sort (144) + +- Exchange (143) + +- Project (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- SortMergeJoin LeftSemi (132) + : :- Sort (99) + : : +- Exchange (98) + : : +- Filter (97) + : : +- Scan parquet (96) + : +- Sort (131) + : +- Exchange (130) + : +- Project (129) + : +- SortMergeJoin Inner (128) + : :- Sort (111) + : : +- Exchange (110) + : : +- SortMergeJoin LeftSemi (109) + : : :- Sort (103) + : : : +- Exchange (102) + : : : +- Filter (101) + : : : +- Scan parquet (100) + : : +- Sort (108) + : : +- Exchange (107) + : : +- Project (106) + : : +- Filter (105) + : : +- Scan parquet (104) + : +- Sort (127) + : +- Exchange (126) + : +- Filter (125) + : +- HashAggregate (124) + : +- HashAggregate (123) + : +- SortMergeJoin LeftSemi (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- Filter (113) + : : +- Scan parquet (112) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -327,417 +326,410 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) Scan parquet +(76) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) InputAdapter +(91) InputAdapter Input [2]: [s_name#X, s_address#X] -(93) InputIteratorTransformer +(92) InputIteratorTransformer Input [2]: [s_name#X, s_address#X] -(94) SortExecTransformer +(93) SortExecTransformer Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(95) WholeStageCodegenTransformer (X) +(94) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(96) VeloxColumnarToRow +(95) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(97) Scan parquet +(96) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(98) Filter +(97) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(99) Exchange +(98) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Sort +(99) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(101) Scan parquet +(100) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(102) Filter +(101) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(103) Exchange +(102) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) Sort +(103) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(105) Scan parquet +(104) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(106) Filter +(105) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(107) Project +(106) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(108) Exchange +(107) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(110) SortMergeJoin +(109) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(111) Exchange +(110) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Sort +(111) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(113) Scan parquet +(112) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(114) Filter +(113) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(115) Project +(114) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(116) Exchange +(115) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Sort +(116) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(118) Scan parquet +(117) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(119) Filter +(118) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(120) Project +(119) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(121) Exchange +(120) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(124) HashAggregate +(123) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(125) HashAggregate +(124) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(126) Filter +(125) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(127) Exchange +(126) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Sort +(127) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(129) SortMergeJoin +(128) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(130) Project +(129) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(131) Exchange +(130) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(131) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(133) SortMergeJoin +(132) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(134) Project +(133) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(135) Exchange +(134) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Sort +(135) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(137) Scan parquet +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(138) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(139) Project +(138) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(140) Exchange +(139) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(141) Sort +(140) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(142) SortMergeJoin +(141) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(143) Project +(142) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(144) Exchange +(143) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(144) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(146) AdaptiveSparkPlan +(145) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index 3b9b7fd93b96..23ecfb9fe5b9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ Scan parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) Scan parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index 6ca992b70988..2cdc632f0673 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -424,55 +424,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ Scan parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) Scan parquet @@ -581,129 +580,122 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index 9e927782f6c3..087309c0271c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ Scan parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -133,180 +132,173 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index d82e4f2b21db..a328bd57010a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -123,93 +122,86 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index 950441af70e9..44a18a8f01c6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -257,31 +257,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ Scan parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) Scan parquet @@ -346,71 +345,64 @@ Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedpri (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index e48379edc9fa..88a65a0924ae 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ Scan parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -213,139 +212,132 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index 8dbf9429961a..b01d8c24a3a8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ Scan parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ Scan parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ Scan parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ Scan parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ Scan parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ Scan parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) Scan parquet @@ -371,219 +370,212 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 41da1d28e987..4ef0a5c17181 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -122,89 +121,82 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index 1fad7160bf64..065c86d08eb8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ Scan parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ Scan parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ Scan parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ Scan parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ Scan parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) Scan parquet @@ -326,407 +325,400 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) Scan parquet +(76) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index a698975568fb..2ca3296f02ed 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ Scan parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) Scan parquet @@ -215,141 +214,134 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index 68e87c39a078..2a3499737815 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -428,55 +428,54 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (135) +- == Final Plan == - VeloxColumnarToRow (114) - +- ^ ProjectExecTransformer (112) - +- ^ RegularHashAggregateExecTransformer (111) - +- ^ RegularHashAggregateExecTransformer (110) - +- ^ ProjectExecTransformer (109) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) - :- ^ InputIteratorTransformer (103) - : +- ShuffleQueryStage (101), Statistics(X) - : +- ColumnarExchange (100) - : +- VeloxResizeBatches (99) - : +- ^ ProjectExecTransformer (97) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - : :- ^ InputIteratorTransformer (91) - : : +- ShuffleQueryStage (89), Statistics(X) - : : +- ColumnarExchange (88) - : : +- VeloxResizeBatches (87) - : : +- ^ ProjectExecTransformer (85) - : : +- ^ FilterExecTransformer (84) - : : +- ^ Scan parquet (83) - : +- ^ InputIteratorTransformer (95) - : +- ShuffleQueryStage (93), Statistics(X) - : +- ReusedExchange (92) - +- ^ InputIteratorTransformer (107) - +- ShuffleQueryStage (105), Statistics(X) - +- ReusedExchange (104) + VeloxColumnarToRow (113) + +- ^ ProjectExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxResizeBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxResizeBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (135) - +- HashAggregate (134) - +- Project (133) - +- SortMergeJoin Inner (132) - :- Sort (126) - : +- Exchange (125) - : +- Project (124) - : +- SortMergeJoin Inner (123) - : :- Sort (118) - : : +- Exchange (117) - : : +- Filter (116) - : : +- Scan parquet (115) - : +- Sort (122) - : +- Exchange (121) - : +- Filter (120) - : +- Scan parquet (119) - +- Sort (131) - +- Exchange (130) - +- Project (129) - +- Filter (128) - +- Scan parquet (127) + HashAggregate (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (117) + : : +- Exchange (116) + : : +- Filter (115) + : : +- Scan parquet (114) + : +- Sort (121) + : +- Exchange (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (83) Scan parquet @@ -587,131 +586,124 @@ Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] (110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(111) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(112) ProjectExecTransformer +(111) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(113) WholeStageCodegenTransformer (X) +(112) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(114) VeloxColumnarToRow +(113) VeloxColumnarToRow Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(115) Scan parquet +(114) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(116) Filter +(115) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(117) Exchange +(116) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Sort +(117) Sort Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(119) Scan parquet +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(120) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(121) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(121) Sort Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(123) SortMergeJoin +(122) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(124) Project +(123) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(125) Exchange +(124) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) Sort +(125) Sort Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(127) Scan parquet +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(128) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(129) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(130) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) Sort +(130) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(132) SortMergeJoin +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(132) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(134) HashAggregate +(133) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(135) HashAggregate +(134) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(136) AdaptiveSparkPlan +(135) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index b527c2bc3bc3..fbd7281569ad 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,55 +1,54 @@ == Physical Plan == -AdaptiveSparkPlan (58) +AdaptiveSparkPlan (57) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ SortExecTransformer (38) - +- ^ InputIteratorTransformer (37) - +- ShuffleQueryStage (35), Statistics(X) - +- ColumnarExchange (34) - +- VeloxResizeBatches (33) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ ProjectExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ RegularHashAggregateExecTransformer (20) - +- ^ ProjectExecTransformer (19) - +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) - :- ^ InputIteratorTransformer (8) - : +- ShuffleQueryStage (6), Statistics(X) - : +- ColumnarExchange (5) - : +- VeloxResizeBatches (4) - : +- ^ ProjectExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (17) - +- ShuffleQueryStage (15), Statistics(X) - +- ColumnarExchange (14) - +- VeloxResizeBatches (13) - +- ^ ProjectExecTransformer (11) - +- ^ FilterExecTransformer (10) - +- ^ Scan parquet (9) + VeloxColumnarToRow (39) + +- ^ SortExecTransformer (37) + +- ^ InputIteratorTransformer (36) + +- ShuffleQueryStage (34), Statistics(X) + +- ColumnarExchange (33) + +- VeloxResizeBatches (32) + +- ^ RegularHashAggregateExecTransformer (30) + +- ^ InputIteratorTransformer (29) + +- ShuffleQueryStage (27), Statistics(X) + +- ColumnarExchange (26) + +- VeloxResizeBatches (25) + +- ^ ProjectExecTransformer (23) + +- ^ FlushableHashAggregateExecTransformer (22) + +- ^ ProjectExecTransformer (21) + +- ^ RegularHashAggregateExecTransformer (20) + +- ^ ProjectExecTransformer (19) + +- ^ ShuffledHashJoinExecTransformer LeftOuter BuildLeft (18) + :- ^ InputIteratorTransformer (8) + : +- ShuffleQueryStage (6), Statistics(X) + : +- ColumnarExchange (5) + : +- VeloxResizeBatches (4) + : +- ^ ProjectExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (17) + +- ShuffleQueryStage (15), Statistics(X) + +- ColumnarExchange (14) + +- VeloxResizeBatches (13) + +- ^ ProjectExecTransformer (11) + +- ^ FilterExecTransformer (10) + +- ^ Scan parquet (9) +- == Initial Plan == - Sort (57) - +- Exchange (56) - +- HashAggregate (55) - +- Exchange (54) - +- HashAggregate (53) - +- HashAggregate (52) - +- HashAggregate (51) - +- Project (50) - +- SortMergeJoin LeftOuter (49) - :- Sort (43) - : +- Exchange (42) - : +- Scan parquet (41) - +- Sort (48) - +- Exchange (47) - +- Project (46) - +- Filter (45) - +- Scan parquet (44) + Sort (56) + +- Exchange (55) + +- HashAggregate (54) + +- Exchange (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin LeftOuter (48) + :- Sort (42) + : +- Exchange (41) + : +- Scan parquet (40) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -134,181 +133,174 @@ Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] (20) RegularHashAggregateExecTransformer Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] -Functions [1]: [partial_count(o_orderkey#X)] -Aggregate Attributes [1]: [count#X] -Results [2]: [c_custkey#X, count#X] - -(21) RegularHashAggregateExecTransformer -Input [2]: [c_custkey#X, count#X] -Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [2]: [c_custkey#X, count(o_orderkey#X)#X] -(22) ProjectExecTransformer +(21) ProjectExecTransformer Output [1]: [count(o_orderkey#X)#X AS c_count#X] Input [2]: [c_custkey#X, count(o_orderkey#X)#X] -(23) FlushableHashAggregateExecTransformer +(22) FlushableHashAggregateExecTransformer Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(24) ProjectExecTransformer +(23) ProjectExecTransformer Output [3]: [hash(c_count#X, 42) AS hash_partition_key#X, c_count#X, count#X] Input [2]: [c_count#X, count#X] -(25) WholeStageCodegenTransformer (X) +(24) WholeStageCodegenTransformer (X) Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: false -(26) VeloxResizeBatches +(25) VeloxResizeBatches Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: X, X -(27) ColumnarExchange +(26) ColumnarExchange Input [3]: [hash_partition_key#X, c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [c_count#X, count#X], [plan_id=X], [shuffle_writer_type=hash] -(28) ShuffleQueryStage +(27) ShuffleQueryStage Output [2]: [c_count#X, count#X] Arguments: X -(29) InputAdapter +(28) InputAdapter Input [2]: [c_count#X, count#X] -(30) InputIteratorTransformer +(29) InputIteratorTransformer Input [2]: [c_count#X, count#X] -(31) RegularHashAggregateExecTransformer +(30) RegularHashAggregateExecTransformer Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(32) WholeStageCodegenTransformer (X) +(31) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(33) VeloxResizeBatches +(32) VeloxResizeBatches Input [2]: [c_count#X, custdist#X] Arguments: X, X -(34) ColumnarExchange +(33) ColumnarExchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(35) ShuffleQueryStage +(34) ShuffleQueryStage Output [2]: [c_count#X, custdist#X] Arguments: X -(36) InputAdapter +(35) InputAdapter Input [2]: [c_count#X, custdist#X] -(37) InputIteratorTransformer +(36) InputIteratorTransformer Input [2]: [c_count#X, custdist#X] -(38) SortExecTransformer +(37) SortExecTransformer Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [2]: [c_count#X, custdist#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [2]: [c_count#X, custdist#X] -(41) Scan parquet +(40) Scan parquet Output [1]: [c_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(41) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Sort +(42) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(44) Scan parquet +(43) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(46) Project +(45) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(47) Exchange +(46) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [2]: [o_orderkey#X, o_custkey#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(49) SortMergeJoin +(48) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(50) Project +(49) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(51) HashAggregate +(50) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(52) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(53) HashAggregate +(52) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(54) Exchange +(53) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) HashAggregate +(54) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(56) Exchange +(55) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) Sort +(56) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(58) AdaptiveSparkPlan +(57) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index cc6d364dea22..775a1e59edb1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,40 +1,39 @@ == Physical Plan == -AdaptiveSparkPlan (39) +AdaptiveSparkPlan (38) +- == Final Plan == - VeloxColumnarToRow (25) - +- ^ ProjectExecTransformer (23) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (24) + +- ^ ProjectExecTransformer (22) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (38) - +- HashAggregate (37) - +- Project (36) - +- SortMergeJoin Inner (35) - :- Sort (30) - : +- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Sort (34) - +- Exchange (33) - +- Filter (32) - +- Scan parquet (31) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -124,94 +123,87 @@ Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] (21) RegularHashAggregateExecTransformer Input [5]: [l_extendedprice#X, l_discount#X, p_type#X, _pre_X#X, _pre_X#X] Keys: [] -Functions [2]: [partial_sum(_pre_X#X), partial_sum(_pre_X#X)] -Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -Keys: [] -Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [2]: [sum(_pre_X#X), sum(_pre_X#X)] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(23) ProjectExecTransformer +(22) ProjectExecTransformer Output [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] Input [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(24) WholeStageCodegenTransformer (X) +(23) WholeStageCodegenTransformer (X) Input [1]: [promo_revenue#X] Arguments: false -(25) VeloxColumnarToRow +(24) VeloxColumnarToRow Input [1]: [promo_revenue#X] -(26) Scan parquet +(25) Scan parquet Output [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)] ReadSchema: struct -(27) Filter +(26) Filter Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-09-01)) AND (l_shipdate#X < 1995-10-01)) AND isnotnull(l_partkey#X)) -(28) Project +(27) Project Output [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(29) Exchange +(28) Exchange Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Sort +(29) Sort Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(31) Scan parquet +(30) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(32) Filter +(31) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(33) Exchange +(32) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Sort +(33) Sort Input [2]: [p_partkey#X, p_type#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(35) SortMergeJoin +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(36) Project +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(37) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(38) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(39) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index 8f1a1b4415db..8eb6b088430d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -259,31 +259,30 @@ Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (73) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRow (64) - +- ^ RegularHashAggregateExecTransformer (62) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ ProjectExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ InputIteratorTransformer (58) - +- ShuffleQueryStage (56), Statistics(X) - +- ColumnarExchange (55) - +- VeloxResizeBatches (54) - +- ^ ProjectExecTransformer (52) - +- ^ FlushableHashAggregateExecTransformer (51) - +- ^ ProjectExecTransformer (50) - +- ^ FilterExecTransformer (49) - +- ^ Scan parquet (48) + VeloxColumnarToRow (63) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxResizeBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (72) - +- HashAggregate (71) - +- HashAggregate (70) - +- Exchange (69) - +- HashAggregate (68) - +- Project (67) - +- Filter (66) - +- Scan parquet (65) + HashAggregate (71) + +- HashAggregate (70) + +- HashAggregate (69) + +- Exchange (68) + +- HashAggregate (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (48) Scan parquet @@ -348,71 +347,64 @@ Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] (61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] -Functions [1]: [partial_max(total_revenue#X)] -Aggregate Attributes [1]: [max#X] -Results [1]: [max#X] - -(62) RegularHashAggregateExecTransformer -Input [1]: [max#X] -Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(64) VeloxColumnarToRow +(63) VeloxColumnarToRow Input [1]: [max(total_revenue)#X] -(65) Scan parquet +(64) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(66) Filter +(65) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(67) Project +(66) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(68) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(69) Exchange +(68) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) HashAggregate +(69) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(71) HashAggregate +(70) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(72) HashAggregate +(71) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(73) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index eaad0429ab40..3fa5fb0ee294 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,62 +1,61 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (62) +- == Final Plan == - VeloxColumnarToRow (40) - +- ^ ProjectExecTransformer (38) - +- ^ RegularHashAggregateExecTransformer (37) - +- ^ RegularHashAggregateExecTransformer (36) - +- ^ ProjectExecTransformer (35) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) - :- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ FilterExecTransformer (33) - +- ^ ProjectExecTransformer (32) - +- ^ RegularHashAggregateExecTransformer (31) - +- ^ InputIteratorTransformer (30) - +- ShuffleQueryStage (28), Statistics(X) - +- ColumnarExchange (27) - +- VeloxResizeBatches (26) - +- ^ ProjectExecTransformer (24) - +- ^ FlushableHashAggregateExecTransformer (23) - +- ^ FilterExecTransformer (22) - +- ^ Scan parquet (21) + VeloxColumnarToRow (39) + +- ^ ProjectExecTransformer (37) + +- ^ RegularHashAggregateExecTransformer (36) + +- ^ ProjectExecTransformer (35) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (34) + :- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ FilterExecTransformer (33) + +- ^ ProjectExecTransformer (32) + +- ^ RegularHashAggregateExecTransformer (31) + +- ^ InputIteratorTransformer (30) + +- ShuffleQueryStage (28), Statistics(X) + +- ColumnarExchange (27) + +- VeloxResizeBatches (26) + +- ^ ProjectExecTransformer (24) + +- ^ FlushableHashAggregateExecTransformer (23) + +- ^ FilterExecTransformer (22) + +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (62) - +- HashAggregate (61) - +- Project (60) - +- SortMergeJoin Inner (59) - :- Project (51) - : +- SortMergeJoin Inner (50) - : :- Sort (44) - : : +- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Sort (49) - : +- Exchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Sort (58) - +- Filter (57) - +- HashAggregate (56) - +- Exchange (55) - +- HashAggregate (54) - +- Filter (53) - +- Scan parquet (52) + HashAggregate (61) + +- HashAggregate (60) + +- Project (59) + +- SortMergeJoin Inner (58) + :- Project (50) + : +- SortMergeJoin Inner (49) + : :- Sort (43) + : : +- Exchange (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Project (46) + : +- Filter (45) + : +- Scan parquet (44) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -215,141 +214,134 @@ Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity) (36) RegularHashAggregateExecTransformer Input [1]: [l_extendedprice#X] Keys: [] -Functions [1]: [partial_sum(l_extendedprice#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(37) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [sum(l_extendedprice#X)#X] -(38) ProjectExecTransformer +(37) ProjectExecTransformer Output [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] Input [1]: [sum(l_extendedprice#X)#X] -(39) WholeStageCodegenTransformer (X) +(38) WholeStageCodegenTransformer (X) Input [1]: [avg_yearly#X] Arguments: false -(40) VeloxColumnarToRow +(39) VeloxColumnarToRow Input [1]: [avg_yearly#X] -(41) Scan parquet +(40) Scan parquet Output [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)] ReadSchema: struct -(42) Filter +(41) Filter Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) -(43) Exchange +(42) Exchange Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(43) Sort Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(45) Scan parquet +(44) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(46) Filter +(45) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(47) Project +(46) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(48) Exchange +(47) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(48) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(50) SortMergeJoin +(49) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(51) Project +(50) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(52) Scan parquet +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(53) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(54) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(55) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(57) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(58) Sort +(57) Sort Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(59) SortMergeJoin +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(60) Project +(59) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(61) HashAggregate +(60) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(62) HashAggregate +(61) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(63) AdaptiveSparkPlan +(62) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index 85b11fa8ca18..7a4a6641b8c8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,102 +1,101 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (109) +- == Final Plan == - VeloxColumnarToRow (70) - +- TakeOrderedAndProjectExecTransformer (69) - +- ^ RegularHashAggregateExecTransformer (67) - +- ^ RegularHashAggregateExecTransformer (66) - +- ^ ProjectExecTransformer (65) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) - :- ^ InputIteratorTransformer (46) - : +- ShuffleQueryStage (44), Statistics(X) - : +- ColumnarExchange (43) - : +- VeloxResizeBatches (42) - : +- ^ ProjectExecTransformer (40) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (38) - : +- ShuffleQueryStage (36), Statistics(X) - : +- ColumnarExchange (35) - : +- VeloxResizeBatches (34) - : +- ^ ProjectExecTransformer (32) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) - : :- ^ InputIteratorTransformer (18) - : : +- ShuffleQueryStage (16), Statistics(X) - : : +- ColumnarExchange (15) - : : +- VeloxResizeBatches (14) - : : +- ^ ProjectExecTransformer (12) - : : +- ^ FilterExecTransformer (11) - : : +- ^ Scan parquet (10) - : +- ^ ProjectExecTransformer (30) - : +- ^ FilterExecTransformer (29) - : +- ^ RegularHashAggregateExecTransformer (28) - : +- ^ InputIteratorTransformer (27) - : +- ShuffleQueryStage (25), Statistics(X) - : +- ColumnarExchange (24) - : +- VeloxResizeBatches (23) - : +- ^ ProjectExecTransformer (21) - : +- ^ FlushableHashAggregateExecTransformer (20) - : +- ^ Scan parquet (19) - +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) - :- ^ InputIteratorTransformer (55) - : +- ShuffleQueryStage (53), Statistics(X) - : +- ColumnarExchange (52) - : +- VeloxResizeBatches (51) - : +- ^ ProjectExecTransformer (49) - : +- ^ FilterExecTransformer (48) - : +- ^ Scan parquet (47) - +- ^ ProjectExecTransformer (62) - +- ^ FilterExecTransformer (61) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ InputIteratorTransformer (59) - +- ShuffleQueryStage (57), Statistics(X) - +- ReusedExchange (56) + VeloxColumnarToRow (69) + +- TakeOrderedAndProjectExecTransformer (68) + +- ^ RegularHashAggregateExecTransformer (66) + +- ^ ProjectExecTransformer (65) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (64) + :- ^ InputIteratorTransformer (46) + : +- ShuffleQueryStage (44), Statistics(X) + : +- ColumnarExchange (43) + : +- VeloxResizeBatches (42) + : +- ^ ProjectExecTransformer (40) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (39) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (38) + : +- ShuffleQueryStage (36), Statistics(X) + : +- ColumnarExchange (35) + : +- VeloxResizeBatches (34) + : +- ^ ProjectExecTransformer (32) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (31) + : :- ^ InputIteratorTransformer (18) + : : +- ShuffleQueryStage (16), Statistics(X) + : : +- ColumnarExchange (15) + : : +- VeloxResizeBatches (14) + : : +- ^ ProjectExecTransformer (12) + : : +- ^ FilterExecTransformer (11) + : : +- ^ Scan parquet (10) + : +- ^ ProjectExecTransformer (30) + : +- ^ FilterExecTransformer (29) + : +- ^ RegularHashAggregateExecTransformer (28) + : +- ^ InputIteratorTransformer (27) + : +- ShuffleQueryStage (25), Statistics(X) + : +- ColumnarExchange (24) + : +- VeloxResizeBatches (23) + : +- ^ ProjectExecTransformer (21) + : +- ^ FlushableHashAggregateExecTransformer (20) + : +- ^ Scan parquet (19) + +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (63) + :- ^ InputIteratorTransformer (55) + : +- ShuffleQueryStage (53), Statistics(X) + : +- ColumnarExchange (52) + : +- VeloxResizeBatches (51) + : +- ^ ProjectExecTransformer (49) + : +- ^ FilterExecTransformer (48) + : +- ^ Scan parquet (47) + +- ^ ProjectExecTransformer (62) + +- ^ FilterExecTransformer (61) + +- ^ RegularHashAggregateExecTransformer (60) + +- ^ InputIteratorTransformer (59) + +- ShuffleQueryStage (57), Statistics(X) + +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (109) - +- HashAggregate (108) - +- HashAggregate (107) - +- Project (106) - +- SortMergeJoin Inner (105) - :- Sort (92) - : +- Exchange (91) - : +- Project (90) - : +- SortMergeJoin Inner (89) - : :- Sort (74) - : : +- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Sort (88) - : +- Exchange (87) - : +- SortMergeJoin LeftSemi (86) - : :- Sort (78) - : : +- Exchange (77) - : : +- Filter (76) - : : +- Scan parquet (75) - : +- Sort (85) - : +- Project (84) - : +- Filter (83) - : +- HashAggregate (82) - : +- Exchange (81) - : +- HashAggregate (80) - : +- Scan parquet (79) - +- SortMergeJoin LeftSemi (104) - :- Sort (96) - : +- Exchange (95) - : +- Filter (94) - : +- Scan parquet (93) - +- Sort (103) - +- Project (102) - +- Filter (101) - +- HashAggregate (100) - +- Exchange (99) - +- HashAggregate (98) - +- Scan parquet (97) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- SortMergeJoin Inner (104) + :- Sort (91) + : +- Exchange (90) + : +- Project (89) + : +- SortMergeJoin Inner (88) + : :- Sort (73) + : : +- Exchange (72) + : : +- Filter (71) + : : +- Scan parquet (70) + : +- Sort (87) + : +- Exchange (86) + : +- SortMergeJoin LeftSemi (85) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (84) + : +- Project (83) + : +- Filter (82) + : +- HashAggregate (81) + : +- Exchange (80) + : +- HashAggregate (79) + : +- Scan parquet (78) + +- SortMergeJoin LeftSemi (103) + :- Sort (95) + : +- Exchange (94) + : +- Filter (93) + : +- Scan parquet (92) + +- Sort (102) + +- Project (101) + +- Filter (100) + +- HashAggregate (99) + +- Exchange (98) + +- HashAggregate (97) + +- Scan parquet (96) (1) Scan parquet @@ -375,223 +374,216 @@ Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, (66) RegularHashAggregateExecTransformer Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] - -(67) RegularHashAggregateExecTransformer -Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(68) WholeStageCodegenTransformer (X) +(67) WholeStageCodegenTransformer (X) Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: false -(69) TakeOrderedAndProjectExecTransformer +(68) TakeOrderedAndProjectExecTransformer Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X], 0 -(70) VeloxColumnarToRow +(69) VeloxColumnarToRow Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(71) Scan parquet +(70) Scan parquet Output [2]: [c_custkey#X, c_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey)] ReadSchema: struct -(72) Filter +(71) Filter Input [2]: [c_custkey#X, c_name#X] Condition : isnotnull(c_custkey#X) -(73) Exchange +(72) Exchange Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Sort +(73) Sort Input [2]: [c_custkey#X, c_name#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(75) Scan parquet +(74) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(76) Filter +(75) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(77) Exchange +(76) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Sort +(77) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(79) Scan parquet +(78) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(80) HashAggregate +(79) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(81) Exchange +(80) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) HashAggregate +(81) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(83) Filter +(82) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(84) Project +(83) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(85) Sort +(84) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(86) SortMergeJoin +(85) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(87) Exchange +(86) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Sort +(87) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(89) SortMergeJoin +(88) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(90) Project +(89) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(91) Exchange +(90) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) Sort +(91) Sort Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(93) Scan parquet +(92) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(94) Filter +(93) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(95) Exchange +(94) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Sort +(95) Sort Input [2]: [l_orderkey#X, l_quantity#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(97) Scan parquet +(96) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(98) HashAggregate +(97) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(99) Exchange +(98) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) HashAggregate +(99) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(101) Filter +(100) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(102) Project +(101) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(103) Sort +(102) Sort Input [1]: [l_orderkey#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(104) SortMergeJoin +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(105) SortMergeJoin +(104) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(106) Project +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(107) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(108) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(109) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(110) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index c72ca859ce37..dfa8bbb74a15 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,39 +1,38 @@ == Physical Plan == -AdaptiveSparkPlan (38) +AdaptiveSparkPlan (37) +- == Final Plan == - VeloxColumnarToRow (24) - +- ^ RegularHashAggregateExecTransformer (22) - +- ^ RegularHashAggregateExecTransformer (21) - +- ^ ProjectExecTransformer (20) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) - :- ^ InputIteratorTransformer (9) - : +- ShuffleQueryStage (7), Statistics(X) - : +- ColumnarExchange (6) - : +- VeloxResizeBatches (5) - : +- ^ ProjectExecTransformer (3) - : +- ^ FilterExecTransformer (2) - : +- ^ Scan parquet (1) - +- ^ InputIteratorTransformer (18) - +- ShuffleQueryStage (16), Statistics(X) - +- ColumnarExchange (15) - +- VeloxResizeBatches (14) - +- ^ ProjectExecTransformer (12) - +- ^ FilterExecTransformer (11) - +- ^ Scan parquet (10) + VeloxColumnarToRow (23) + +- ^ RegularHashAggregateExecTransformer (21) + +- ^ ProjectExecTransformer (20) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (19) + :- ^ InputIteratorTransformer (9) + : +- ShuffleQueryStage (7), Statistics(X) + : +- ColumnarExchange (6) + : +- VeloxResizeBatches (5) + : +- ^ ProjectExecTransformer (3) + : +- ^ FilterExecTransformer (2) + : +- ^ Scan parquet (1) + +- ^ InputIteratorTransformer (18) + +- ShuffleQueryStage (16), Statistics(X) + +- ColumnarExchange (15) + +- VeloxResizeBatches (14) + +- ^ ProjectExecTransformer (12) + +- ^ FilterExecTransformer (11) + +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (37) - +- HashAggregate (36) - +- Project (35) - +- SortMergeJoin Inner (34) - :- Sort (29) - : +- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Sort (33) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -123,90 +122,83 @@ Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partke (21) RegularHashAggregateExecTransformer Input [3]: [l_extendedprice#X, l_discount#X, _pre_X#X] Keys: [] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [2]: [sum#X, isEmpty#X] - -(22) RegularHashAggregateExecTransformer -Input [2]: [sum#X, isEmpty#X] -Keys: [] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(23) WholeStageCodegenTransformer (X) +(22) WholeStageCodegenTransformer (X) Input [1]: [revenue#X] Arguments: false -(24) VeloxColumnarToRow +(23) VeloxColumnarToRow Input [1]: [revenue#X] -(25) Scan parquet +(24) Scan parquet Output [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1.00),LessThanOrEqual(l_quantity,11.00)),And(GreaterThanOrEqual(l_quantity,10.00),LessThanOrEqual(l_quantity,20.00))),And(GreaterThanOrEqual(l_quantity,20.00),LessThanOrEqual(l_quantity,30.00)))] ReadSchema: struct -(26) Filter +(25) Filter Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] Condition : ((((isnotnull(l_shipinstruct#X) AND l_shipmode#X IN (AIR,AIR REG)) AND (l_shipinstruct#X = DELIVER IN PERSON)) AND isnotnull(l_partkey#X)) AND ((((l_quantity#X >= 1.00) AND (l_quantity#X <= 11.00)) OR ((l_quantity#X >= 10.00) AND (l_quantity#X <= 20.00))) OR ((l_quantity#X >= 20.00) AND (l_quantity#X <= 30.00)))) -(27) Project +(26) Project Output [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipinstruct#X, l_shipmode#X] -(28) Exchange +(27) Exchange Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Sort +(28) Sort Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(30) Scan parquet +(29) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(31) Filter +(30) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(32) Exchange +(31) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) Sort +(32) Sort Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(34) SortMergeJoin +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(35) Project +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(36) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(37) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(38) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index ed09215178a4..87270108a2d8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,19 +1,19 @@ == Physical Plan == -AdaptiveSparkPlan (143) +AdaptiveSparkPlan (142) +- == Final Plan == - VeloxColumnarToRow (93) - +- AQEShuffleRead (92) - +- ShuffleQueryStage (91), Statistics(X) - +- ColumnarExchange (90) - +- VeloxResizeBatches (89) - +- ^ ProjectExecTransformer (87) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (86) - :- ^ InputIteratorTransformer (76) - : +- ShuffleQueryStage (74), Statistics(X) - : +- ColumnarExchange (73) - : +- VeloxResizeBatches (72) - : +- ^ ProjectExecTransformer (70) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (69) + VeloxColumnarToRow (92) + +- AQEShuffleRead (91) + +- ShuffleQueryStage (90), Statistics(X) + +- ColumnarExchange (89) + +- VeloxResizeBatches (88) + +- ^ ProjectExecTransformer (86) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) + :- ^ InputIteratorTransformer (75) + : +- ShuffleQueryStage (73), Statistics(X) + : +- ColumnarExchange (72) + : +- VeloxResizeBatches (71) + : +- ^ ProjectExecTransformer (69) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (68) : :- ^ InputIteratorTransformer (9) : : +- ShuffleQueryStage (7), Statistics(X) : : +- ColumnarExchange (6) @@ -21,12 +21,12 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (3) : : +- ^ FilterExecTransformer (2) : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (68) - : +- ShuffleQueryStage (66), Statistics(X) - : +- ColumnarExchange (65) - : +- VeloxResizeBatches (64) - : +- ^ ProjectExecTransformer (62) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (61) + : +- ^ InputIteratorTransformer (67) + : +- ShuffleQueryStage (65), Statistics(X) + : +- ColumnarExchange (64) + : +- VeloxResizeBatches (63) + : +- ^ ProjectExecTransformer (61) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (60) : :- ^ InputIteratorTransformer (35) : : +- ShuffleQueryStage (33), Statistics(X) : : +- ColumnarExchange (32) @@ -47,83 +47,82 @@ AdaptiveSparkPlan (143) : : +- ^ ProjectExecTransformer (21) : : +- ^ FilterExecTransformer (20) : : +- ^ Scan parquet (19) - : +- ^ InputIteratorTransformer (60) - : +- ShuffleQueryStage (58), Statistics(X) - : +- ColumnarExchange (57) - : +- VeloxResizeBatches (56) - : +- ^ ProjectExecTransformer (54) - : +- ^ FilterExecTransformer (53) - : +- ^ ProjectExecTransformer (52) - : +- ^ RegularHashAggregateExecTransformer (51) - : +- ^ RegularHashAggregateExecTransformer (50) - : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) - : :- ^ InputIteratorTransformer (44) - : : +- ShuffleQueryStage (42), Statistics(X) - : : +- ColumnarExchange (41) - : : +- VeloxResizeBatches (40) - : : +- ^ ProjectExecTransformer (38) - : : +- ^ FilterExecTransformer (37) - : : +- ^ Scan parquet (36) - : +- ^ InputIteratorTransformer (48) - : +- ShuffleQueryStage (46), Statistics(X) - : +- ReusedExchange (45) - +- ^ InputIteratorTransformer (85) - +- ShuffleQueryStage (83), Statistics(X) - +- ColumnarExchange (82) - +- VeloxResizeBatches (81) - +- ^ ProjectExecTransformer (79) - +- ^ FilterExecTransformer (78) - +- ^ Scan parquet (77) + : +- ^ InputIteratorTransformer (59) + : +- ShuffleQueryStage (57), Statistics(X) + : +- ColumnarExchange (56) + : +- VeloxResizeBatches (55) + : +- ^ ProjectExecTransformer (53) + : +- ^ FilterExecTransformer (52) + : +- ^ ProjectExecTransformer (51) + : +- ^ RegularHashAggregateExecTransformer (50) + : +- ^ ShuffledHashJoinExecTransformer LeftSemi BuildRight (49) + : :- ^ InputIteratorTransformer (44) + : : +- ShuffleQueryStage (42), Statistics(X) + : : +- ColumnarExchange (41) + : : +- VeloxResizeBatches (40) + : : +- ^ ProjectExecTransformer (38) + : : +- ^ FilterExecTransformer (37) + : : +- ^ Scan parquet (36) + : +- ^ InputIteratorTransformer (48) + : +- ShuffleQueryStage (46), Statistics(X) + : +- ReusedExchange (45) + +- ^ InputIteratorTransformer (84) + +- ShuffleQueryStage (82), Statistics(X) + +- ColumnarExchange (81) + +- VeloxResizeBatches (80) + +- ^ ProjectExecTransformer (78) + +- ^ FilterExecTransformer (77) + +- ^ Scan parquet (76) +- == Initial Plan == - Sort (142) - +- Exchange (141) - +- Project (140) - +- SortMergeJoin Inner (139) - :- Sort (133) - : +- Exchange (132) - : +- Project (131) - : +- SortMergeJoin LeftSemi (130) - : :- Sort (97) - : : +- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Sort (129) - : +- Exchange (128) - : +- Project (127) - : +- SortMergeJoin Inner (126) - : :- Sort (109) - : : +- Exchange (108) - : : +- SortMergeJoin LeftSemi (107) - : : :- Sort (101) - : : : +- Exchange (100) - : : : +- Filter (99) - : : : +- Scan parquet (98) - : : +- Sort (106) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Sort (125) - : +- Exchange (124) - : +- Filter (123) - : +- HashAggregate (122) - : +- HashAggregate (121) - : +- SortMergeJoin LeftSemi (120) - : :- Sort (114) - : : +- Exchange (113) - : : +- Project (112) - : : +- Filter (111) - : : +- Scan parquet (110) - : +- Sort (119) - : +- Exchange (118) - : +- Project (117) - : +- Filter (116) - : +- Scan parquet (115) - +- Sort (138) - +- Exchange (137) - +- Project (136) - +- Filter (135) - +- Scan parquet (134) + Sort (141) + +- Exchange (140) + +- Project (139) + +- SortMergeJoin Inner (138) + :- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin LeftSemi (129) + : :- Sort (96) + : : +- Exchange (95) + : : +- Filter (94) + : : +- Scan parquet (93) + : +- Sort (128) + : +- Exchange (127) + : +- Project (126) + : +- SortMergeJoin Inner (125) + : :- Sort (108) + : : +- Exchange (107) + : : +- SortMergeJoin LeftSemi (106) + : : :- Sort (100) + : : : +- Exchange (99) + : : : +- Filter (98) + : : : +- Scan parquet (97) + : : +- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (124) + : +- Exchange (123) + : +- Filter (122) + : +- HashAggregate (121) + : +- HashAggregate (120) + : +- SortMergeJoin LeftSemi (119) + : :- Sort (113) + : : +- Exchange (112) + : : +- Project (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (118) + : +- Exchange (117) + : +- Project (116) + : +- Filter (115) + : +- Scan parquet (114) + +- Sort (137) + +- Exchange (136) + +- Project (135) + +- Filter (134) + +- Scan parquet (133) (1) Scan parquet @@ -328,415 +327,408 @@ Join condition: None (50) RegularHashAggregateExecTransformer Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] -Functions [1]: [partial_sum(l_quantity#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] - -(51) RegularHashAggregateExecTransformer -Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(52) ProjectExecTransformer +(51) ProjectExecTransformer Output [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [l_partkey#X, l_suppkey#X, sum(l_quantity#X)#X] -(53) FilterExecTransformer +(52) FilterExecTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: isnotnull((0.5 * sum(l_quantity))#X) -(54) ProjectExecTransformer +(53) ProjectExecTransformer Output [4]: [hash(l_partkey#X, l_suppkey#X, 42) AS hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(55) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: false -(56) VeloxResizeBatches +(55) VeloxResizeBatches Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X, X -(57) ColumnarExchange +(56) ColumnarExchange Input [4]: [hash_partition_key#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(58) ShuffleQueryStage +(57) ShuffleQueryStage Output [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: X -(59) InputAdapter +(58) InputAdapter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(60) InputIteratorTransformer +(59) InputIteratorTransformer Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(61) ShuffledHashJoinExecTransformer +(60) ShuffledHashJoinExecTransformer Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(62) ProjectExecTransformer +(61) ProjectExecTransformer Output [2]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(63) WholeStageCodegenTransformer (X) +(62) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: false -(64) VeloxResizeBatches +(63) VeloxResizeBatches Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: X, X -(65) ColumnarExchange +(64) ColumnarExchange Input [2]: [hash_partition_key#X, ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X], [plan_id=X], [shuffle_writer_type=hash] -(66) ShuffleQueryStage +(65) ShuffleQueryStage Output [1]: [ps_suppkey#X] Arguments: X -(67) InputAdapter +(66) InputAdapter Input [1]: [ps_suppkey#X] -(68) InputIteratorTransformer +(67) InputIteratorTransformer Input [1]: [ps_suppkey#X] -(69) ShuffledHashJoinExecTransformer +(68) ShuffledHashJoinExecTransformer Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(70) ProjectExecTransformer +(69) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(71) WholeStageCodegenTransformer (X) +(70) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: false -(72) VeloxResizeBatches +(71) VeloxResizeBatches Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: X, X -(73) ColumnarExchange +(72) ColumnarExchange Input [4]: [hash_partition_key#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [s_name#X, s_address#X, s_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(74) ShuffleQueryStage +(73) ShuffleQueryStage Output [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: X -(75) InputAdapter +(74) InputAdapter Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(76) InputIteratorTransformer +(75) InputIteratorTransformer Input [3]: [s_name#X, s_address#X, s_nationkey#X] -(77) Scan parquet +(76) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(78) FilterExecTransformer +(77) FilterExecTransformer Input [2]: [n_nationkey#X, n_name#X] Arguments: ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(79) ProjectExecTransformer +(78) ProjectExecTransformer Output [2]: [hash(n_nationkey#X, 42) AS hash_partition_key#X, n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(80) WholeStageCodegenTransformer (X) +(79) WholeStageCodegenTransformer (X) Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: false -(81) VeloxResizeBatches +(80) VeloxResizeBatches Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: X, X -(82) ColumnarExchange +(81) ColumnarExchange Input [2]: [hash_partition_key#X, n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [n_nationkey#X], [plan_id=X], [shuffle_writer_type=hash] -(83) ShuffleQueryStage +(82) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(84) InputAdapter +(83) InputAdapter Input [1]: [n_nationkey#X] -(85) InputIteratorTransformer +(84) InputIteratorTransformer Input [1]: [n_nationkey#X] -(86) ShuffledHashJoinExecTransformer +(85) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(87) ProjectExecTransformer +(86) ProjectExecTransformer Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(88) WholeStageCodegenTransformer (X) +(87) WholeStageCodegenTransformer (X) Input [2]: [s_name#X, s_address#X] Arguments: false -(89) VeloxResizeBatches +(88) VeloxResizeBatches Input [2]: [s_name#X, s_address#X] Arguments: X, X -(90) ColumnarExchange +(89) ColumnarExchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X], [shuffle_writer_type=hash] -(91) ShuffleQueryStage +(90) ShuffleQueryStage Output [2]: [s_name#X, s_address#X] Arguments: X -(92) AQEShuffleRead +(91) AQEShuffleRead Input [2]: [s_name#X, s_address#X] Arguments: local -(93) VeloxColumnarToRow +(92) VeloxColumnarToRow Input [2]: [s_name#X, s_address#X] -(94) Scan parquet +(93) Scan parquet Output [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_nationkey)] ReadSchema: struct -(95) Filter +(94) Filter Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Condition : isnotnull(s_nationkey#X) -(96) Exchange +(95) Exchange Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Sort +(96) Sort Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 -(98) Scan parquet +(97) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(99) Filter +(98) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(100) Exchange +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Sort +(100) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 -(102) Scan parquet +(101) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(103) Filter +(102) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(104) Project +(103) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(105) Exchange +(104) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(105) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(107) SortMergeJoin +(106) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(108) Exchange +(107) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(108) Sort Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 -(110) Scan parquet +(109) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(111) Filter +(110) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(112) Project +(111) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(113) Exchange +(112) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Sort +(113) Sort Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 -(115) Scan parquet +(114) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(116) Filter +(115) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(117) Project +(116) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(118) Exchange +(117) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Sort +(118) Sort Input [1]: [p_partkey#X] Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 -(120) SortMergeJoin +(119) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(121) HashAggregate +(120) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(122) HashAggregate +(121) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(123) Filter +(122) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(124) Exchange +(123) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(124) Sort Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 -(126) SortMergeJoin +(125) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(127) Project +(126) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(128) Exchange +(127) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Sort +(128) Sort Input [1]: [ps_suppkey#X] Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 -(130) SortMergeJoin +(129) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(131) Project +(130) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(132) Exchange +(131) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(132) Sort Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 -(134) Scan parquet +(133) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(135) Filter +(134) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(136) Project +(135) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(137) Exchange +(136) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(137) Sort Input [1]: [n_nationkey#X] Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 -(139) SortMergeJoin +(138) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(140) Project +(139) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(141) Exchange +(140) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Sort +(141) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(143) AdaptiveSparkPlan +(142) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 8eea7581a417..49d47c2b6f14 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,64 +1,63 @@ == Physical Plan == -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (66) +- == Final Plan == - VeloxColumnarToRow (43) - +- TakeOrderedAndProjectExecTransformer (42) - +- ^ ProjectExecTransformer (40) - +- ^ RegularHashAggregateExecTransformer (39) - +- ^ RegularHashAggregateExecTransformer (38) - +- ^ ProjectExecTransformer (37) - +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) - :- ^ InputIteratorTransformer (26) - : +- ShuffleQueryStage (24), Statistics(X) - : +- ColumnarExchange (23) - : +- VeloxResizeBatches (22) - : +- ^ ProjectExecTransformer (20) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) - : :- ^ InputIteratorTransformer (9) - : : +- ShuffleQueryStage (7), Statistics(X) - : : +- ColumnarExchange (6) - : : +- VeloxResizeBatches (5) - : : +- ^ ProjectExecTransformer (3) - : : +- ^ FilterExecTransformer (2) - : : +- ^ Scan parquet (1) - : +- ^ InputIteratorTransformer (18) - : +- ShuffleQueryStage (16), Statistics(X) - : +- ColumnarExchange (15) - : +- VeloxResizeBatches (14) - : +- ^ ProjectExecTransformer (12) - : +- ^ FilterExecTransformer (11) - : +- ^ Scan parquet (10) - +- ^ InputIteratorTransformer (35) - +- ShuffleQueryStage (33), Statistics(X) - +- ColumnarExchange (32) - +- VeloxResizeBatches (31) - +- ^ ProjectExecTransformer (29) - +- ^ FilterExecTransformer (28) - +- ^ Scan parquet (27) + VeloxColumnarToRow (42) + +- TakeOrderedAndProjectExecTransformer (41) + +- ^ ProjectExecTransformer (39) + +- ^ RegularHashAggregateExecTransformer (38) + +- ^ ProjectExecTransformer (37) + +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (36) + :- ^ InputIteratorTransformer (26) + : +- ShuffleQueryStage (24), Statistics(X) + : +- ColumnarExchange (23) + : +- VeloxResizeBatches (22) + : +- ^ ProjectExecTransformer (20) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildLeft (19) + : :- ^ InputIteratorTransformer (9) + : : +- ShuffleQueryStage (7), Statistics(X) + : : +- ColumnarExchange (6) + : : +- VeloxResizeBatches (5) + : : +- ^ ProjectExecTransformer (3) + : : +- ^ FilterExecTransformer (2) + : : +- ^ Scan parquet (1) + : +- ^ InputIteratorTransformer (18) + : +- ShuffleQueryStage (16), Statistics(X) + : +- ColumnarExchange (15) + : +- VeloxResizeBatches (14) + : +- ^ ProjectExecTransformer (12) + : +- ^ FilterExecTransformer (11) + : +- ^ Scan parquet (10) + +- ^ InputIteratorTransformer (35) + +- ShuffleQueryStage (33), Statistics(X) + +- ColumnarExchange (32) + +- VeloxResizeBatches (31) + +- ^ ProjectExecTransformer (29) + +- ^ FilterExecTransformer (28) + +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Project (63) - +- SortMergeJoin Inner (62) - :- Sort (56) - : +- Exchange (55) - : +- Project (54) - : +- SortMergeJoin Inner (53) - : :- Sort (48) - : : +- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Sort (52) - : +- Exchange (51) - : +- Filter (50) - : +- Scan parquet (49) - +- Sort (61) - +- Exchange (60) - +- Project (59) - +- Filter (58) - +- Scan parquet (57) + TakeOrderedAndProject (65) + +- HashAggregate (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (55) + : +- Exchange (54) + : +- Project (53) + : +- SortMergeJoin Inner (52) + : :- Sort (47) + : : +- Exchange (46) + : : +- Project (45) + : : +- Filter (44) + : : +- Scan parquet (43) + : +- Sort (51) + : +- Exchange (50) + : +- Filter (49) + : +- Scan parquet (48) + +- Sort (60) + +- Exchange (59) + +- Project (58) + +- Filter (57) + +- Scan parquet (56) (1) Scan parquet @@ -217,143 +216,136 @@ Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_exten (38) RegularHashAggregateExecTransformer Input [6]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [partial_sum(_pre_X#X)] -Aggregate Attributes [2]: [sum#X, isEmpty#X] -Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] - -(39) RegularHashAggregateExecTransformer -Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] -Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] +Functions [1]: [sum(_pre_X#X)] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(40) ProjectExecTransformer +(39) ProjectExecTransformer Output [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] Input [4]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(41) WholeStageCodegenTransformer (X) +(40) WholeStageCodegenTransformer (X) Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: false -(42) TakeOrderedAndProjectExecTransformer +(41) TakeOrderedAndProjectExecTransformer Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X], 0 -(43) VeloxColumnarToRow +(42) VeloxColumnarToRow Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(44) Scan parquet +(43) Scan parquet Output [2]: [c_custkey#X, c_mktsegment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)] ReadSchema: struct -(45) Filter +(44) Filter Input [2]: [c_custkey#X, c_mktsegment#X] Condition : ((isnotnull(c_mktsegment#X) AND (c_mktsegment#X = BUILDING)) AND isnotnull(c_custkey#X)) -(46) Project +(45) Project Output [1]: [c_custkey#X] Input [2]: [c_custkey#X, c_mktsegment#X] -(47) Exchange +(46) Exchange Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(47) Sort Input [1]: [c_custkey#X] Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 -(49) Scan parquet +(48) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(50) Filter +(49) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(51) Exchange +(50) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(51) Sort Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 -(53) SortMergeJoin +(52) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(54) Project +(53) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(55) Exchange +(54) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Sort +(55) Sort Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 -(57) Scan parquet +(56) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(58) Filter +(57) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(59) Project +(58) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(60) Exchange +(59) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) Sort +(60) Sort Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 -(62) SortMergeJoin +(61) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(63) Project +(62) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(64) HashAggregate +(63) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(65) HashAggregate +(64) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(66) TakeOrderedAndProject +(65) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(67) AdaptiveSparkPlan +(66) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 84d1fe8db919..cf0bf272bc5b 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -49,6 +49,7 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu .set("spark.unsafe.exceptionOnMemoryLeak", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.sql.sources.useV1SourceList", "avro") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "false") } test("count") { diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala new file mode 100644 index 000000000000..846995a9c333 --- /dev/null +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMergeTwoPhasesHashBaseAggregateSuite.scala @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.internal.SQLConf + +abstract class BaseVeloxMergeTwoPhasesHashBaseAggregateSuite extends WholeStageTransformerSuite { + val fileFormat: String = "parquet" + override protected val resourcePath: String = "/tpch-data-parquet" + + val adaptiveExecutionOn: Boolean + + override def beforeAll(): Unit = { + super.beforeAll() + spark.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, adaptiveExecutionOn) + + spark + .sql(s""" + |CREATE TABLE t1 (id int, age int, phone int, date string) + |USING $fileFormat + |PARTITIONED BY (date) + |""".stripMargin) + .show() + + spark + .sql(s""" + |INSERT INTO t1 PARTITION(date = '2020-01-01') + |SELECT id, id % 10 as age, id % 10 as phone + |FROM range(100) + |""".stripMargin) + .show() + } + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .set("spark.sql.files.maxPartitionBytes", "1g") + .set("spark.sql.shuffle.partitions", "1") + .set("spark.memory.offHeap.size", "2g") + .set("spark.gluten.ras.enabled", "false") + .set("spark.gluten.sql.mergeTwoPhasesAggregate.enabled", "true") + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, adaptiveExecutionOn.toString) + } + + override def afterAll(): Unit = { + try { + sql("DROP TABLE IF EXISTS t1") + } finally { + super.afterAll() + } + } + + test("Merge two phase hash-based aggregate into one aggregate") { + def checkHashAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange hash aggregate, merge to one hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT key, count(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange hash aggregate + checkHashAggregateCount( + spark.sql(""" + |SELECT count(1) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase object-based aggregate into one aggregate") { + def checkObjectAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange object aggregate, merge to one hash aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT key, collect_list(key) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange object aggregate + checkObjectAggregateCount( + spark.sql(""" + |SELECT collect_list(id) FROM t1 + |""".stripMargin), + 2) + } + + test("Merge two phase sort-based aggregate into one aggregate") { + def checkSortAggregateCount(df: DataFrame, expectedCount: Int): Unit = { + df.collect() + val plans = collect(df.queryExecution.executedPlan) { + case agg: HashAggregateExecBaseTransformer => agg + } + assert(plans.size == expectedCount) + } + + withSQLConf("spark.sql.test.forceApplySortAggregate" -> "true") { + withTempView("v1") { + spark.range(100).selectExpr("id as key").createOrReplaceTempView("v1") + // no exchange sort aggregate, merge to one hash aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(key<0,0,key)) + |FROM v1 + |GROUP BY key + |""".stripMargin), + 1 + ) + + // with filter sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT key, sum(if(key<0,0,key)) FILTER (WHERE key LIKE '%1%') AS pc2 + |FROM v1 + |GROUP BY key + |""".stripMargin), + 2 + ) + } + + // with exchange sort aggregate + checkSortAggregateCount( + spark.sql(""" + |SELECT sum(if(id<0,0,id)) FROM t1 + |""".stripMargin), + 2) + } + } +} + +class VeloxMergeTwoPhasesAggregateSuiteOn extends BaseVeloxMergeTwoPhasesHashBaseAggregateSuite { + override val adaptiveExecutionOn: Boolean = true +} + +class VeloxMergeTwoPhasesAggregateSuiteOff extends BaseVeloxMergeTwoPhasesHashBaseAggregateSuite { + override val adaptiveExecutionOn: Boolean = false +} diff --git a/docs/Configuration.md b/docs/Configuration.md index 76549dd4fe7c..a1afd88bae12 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -58,6 +58,7 @@ You can add these configurations into spark-defaults.conf to enable or disable t | spark.gluten.sql.columnar.fallback.ignoreRowToColumnar | When true, the fallback policy ignores the RowToColumnar when counting fallback number. | true | | spark.gluten.sql.columnar.fallback.preferColumnar | When true, the fallback policy prefers to use Gluten plan rather than vanilla Spark plan if the both of them contains ColumnarToRow and the vanilla Spark plan ColumnarToRow number is not smaller than Gluten plan. | true | | spark.gluten.sql.columnar.force.hashagg | Force to use hash agg to replace sort agg. | true | +| spark.gluten.sql.mergeTwoPhasesAggregate.enabled | Whether to merge two phases aggregate if if there are no other operators between two aggregates. | true | | spark.gluten.sql.columnar.vanillaReaders | Enable vanilla spark's vectorized reader. Please note it may bring perf. overhead due to extra data transition. We recommend to disable it if most queries can be fully offloaded to gluten. | false | | spark.gluten.sql.native.bloomFilter | Enable or Disable native runtime bloom filter. | true | | spark.gluten.sql.native.arrow.reader.enabled | Enable or Disable native arrow read CSV file format | false | diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala similarity index 96% rename from backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala rename to gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala index a10659b6d5e7..a034a3229a88 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/extension/MergeTwoPhasesHashBaseAggregate.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/MergeTwoPhasesHashBaseAggregate.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.extension +package org.apache.gluten.extension.columnar import org.apache.gluten.GlutenConfig @@ -42,6 +42,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) val scanOnly: Boolean = glutenConf.enableScanOnly val enableColumnarHashAgg: Boolean = !scanOnly && glutenConf.enableColumnarHashAgg val replaceSortAggWithHashAgg: Boolean = GlutenConfig.getConf.forceToUseHashAgg + val mergeTwoPhasesAggEnabled: Boolean = GlutenConfig.getConf.mergeTwoPhasesAggEnabled private def isPartialAgg(partialAgg: BaseAggregateExec, finalAgg: BaseAggregateExec): Boolean = { // TODO: now it can not support to merge agg which there are the filters in the aggregate exprs. @@ -59,7 +60,7 @@ case class MergeTwoPhasesHashBaseAggregate(session: SparkSession) } override def apply(plan: SparkPlan): SparkPlan = { - if (!enableColumnarHashAgg) { + if (!mergeTwoPhasesAggEnabled || !enableColumnarHashAgg) { plan } else { plan.transformDown { diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 4e8f13ee4414..7101ff242416 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -886,6 +886,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("replace partial and final hash aggregate together with sort aggregate") .exclude("do not replace hash aggregate if child does not have sort order") .exclude("do not replace hash aggregate if there is no group-by column") + .excludeGlutenTest("replace partial and final hash aggregate together with sort aggregate") enableSuite[GlutenReuseExchangeAndSubquerySuite] enableSuite[GlutenSameResultSuite] enableSuite[GlutenSortSuite] diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 0f3c43dfdf99..7f4c7ddac756 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -886,6 +886,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("replace partial and final hash aggregate together with sort aggregate") .exclude("do not replace hash aggregate if child does not have sort order") .exclude("do not replace hash aggregate if there is no group-by column") + .excludeGlutenTest("replace partial and final hash aggregate together with sort aggregate") enableSuite[GlutenReuseExchangeAndSubquerySuite] enableSuite[GlutenSameResultSuite] enableSuite[GlutenSortSuite] diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index a9525b1b0e4a..b09dc22f46da 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -900,6 +900,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("replace partial and final hash aggregate together with sort aggregate") .exclude("do not replace hash aggregate if child does not have sort order") .exclude("do not replace hash aggregate if there is no group-by column") + .excludeGlutenTest("replace partial and final hash aggregate together with sort aggregate") enableSuite[GlutenReuseExchangeAndSubquerySuite] enableSuite[GlutenSameResultSuite] enableSuite[GlutenSortSuite] diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index a28a7d26b386..f5ecfaa59cbe 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -56,6 +56,8 @@ class GlutenConfig(conf: SQLConf) extends Logging { def forceToUseHashAgg: Boolean = conf.getConf(COLUMNAR_FORCE_HASHAGG_ENABLED) + def mergeTwoPhasesAggEnabled: Boolean = conf.getConf(MERGE_TWO_PHASES_ENABLED) + def enableColumnarProject: Boolean = conf.getConf(COLUMNAR_PROJECT_ENABLED) def enableColumnarFilter: Boolean = conf.getConf(COLUMNAR_FILTER_ENABLED) @@ -876,6 +878,14 @@ object GlutenConfig { .booleanConf .createWithDefault(true) + val MERGE_TWO_PHASES_ENABLED = + buildConf("spark.gluten.sql.mergeTwoPhasesAggregate.enabled") + .internal() + .doc("Whether to merge two phases aggregate if if there are no other operators " + + "between two aggregates.") + .booleanConf + .createWithDefault(true) + val COLUMNAR_PROJECT_ENABLED = buildConf("spark.gluten.sql.columnar.project") .internal()