Skip to content

Commit

Permalink
[enhancement](Nereids) make stats unchanged (apache#23737)
Browse files Browse the repository at this point in the history
make stats unchanged when explore plan
  • Loading branch information
keanji-x authored Sep 14, 2023
1 parent 66bd2a4 commit 5ba1f62
Show file tree
Hide file tree
Showing 29 changed files with 312 additions and 313 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,13 @@ public static void estimate(GroupExpression groupExpression, CascadesContext con
private void estimate() {
Plan plan = groupExpression.getPlan();
Statistics newStats = plan.accept(this, null);
Statistics oldStats = groupExpression.getOwnerGroup().getStatistics();
/*
in an ideal cost model, every group expression in a group are equivalent, but in fact the cost are different.
we record the lowest expression cost as group cost to avoid missing this group.
*/
if (oldStats == null) {
// We ensure that the rowCount remains unchanged in order to make the cost of each plan comparable.
if (groupExpression.getOwnerGroup().getStatistics() == null) {
groupExpression.getOwnerGroup().setStatistics(newStats);
groupExpression.setEstOutputRowCount(newStats.getRowCount());
} else {
Statistics discardStats = newStats;
if (oldStats.getRowCount() > newStats.getRowCount()) {
groupExpression.getOwnerGroup().setStatistics(newStats);
discardStats = oldStats;
}
groupExpression.getOwnerGroup().getStatistics().updateNdv(discardStats);
groupExpression.getOwnerGroup().getStatistics().updateNdv(newStats);
}
groupExpression.setEstOutputRowCount(newStats.getRowCount());
groupExpression.setStatDerived(true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ PhysicalResultSink
----PhysicalDistribute
------hashAgg[LOCAL]
--------PhysicalProject
----------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
----------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
------------PhysicalProject
--------------PhysicalOlapScan[store]
--------------filter((date_dim.d_year = 2001))
----------------PhysicalOlapScan[date_dim]
------------PhysicalDistribute
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](store_sales.ss_sold_date_sk = date_dim.d_date_sk)
----------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
------------------PhysicalProject
--------------------filter((date_dim.d_year = 2001))
----------------------PhysicalOlapScan[date_dim]
--------------------PhysicalOlapScan[store]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------hashJoin[INNER_JOIN](customer_demographics.cd_demo_sk = store_sales.ss_cdemo_sk)((((((cast(cd_marital_status as VARCHAR(*)) = 'D') AND (cast(cd_education_status as VARCHAR(*)) = 'Unknown')) AND ((store_sales.ss_sales_price >= 100.00) AND (store_sales.ss_sales_price <= 150.00))) AND (household_demographics.hd_dep_count = 3)) OR ((((cast(cd_marital_status as VARCHAR(*)) = 'S') AND (cast(cd_education_status as VARCHAR(*)) = 'College')) AND ((store_sales.ss_sales_price >= 50.00) AND (store_sales.ss_sales_price <= 100.00))) AND (household_demographics.hd_dep_count = 1))) OR ((((cast(cd_marital_status as VARCHAR(*)) = 'M') AND (cast(cd_education_status as VARCHAR(*)) = '4 yr Degree')) AND ((store_sales.ss_sales_price >= 150.00) AND (store_sales.ss_sales_price <= 200.00))) AND (household_demographics.hd_dep_count = 1)))
Expand Down
55 changes: 28 additions & 27 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query17.out
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,36 @@ PhysicalResultSink
----------------------hashJoin[INNER_JOIN](store_returns.sr_item_sk = catalog_sales.cs_item_sk)(store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk)
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[catalog_sales]
------------------------PhysicalDistribute
--------------------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
--------------------------------PhysicalDistribute
----------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
--------------------------------------PhysicalProject
----------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[store_returns]
------------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[store_sales]
--------------------------------------------PhysicalDistribute
------------------------------------PhysicalOlapScan[store_returns]
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
----------------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
------------------------------------------PhysicalDistribute
--------------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
----------------------------------------------PhysicalProject
------------------------------------------------filter((cast(d_quarter_name as VARCHAR(*)) = '2001Q1'))
--------------------------------------------------PhysicalOlapScan[date_dim]
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalProject
------------------------------------------filter(d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3'))
--------------------------------------------PhysicalOlapScan[date_dim]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[item]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------PhysicalOlapScan[store]
------------------------------------------------PhysicalOlapScan[store_sales]
----------------------------------------------PhysicalDistribute
------------------------------------------------PhysicalProject
--------------------------------------------------filter((cast(d_quarter_name as VARCHAR(*)) = '2001Q1'))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------PhysicalDistribute
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[item]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[store]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------filter(d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3'))
------------------------------------PhysicalOlapScan[date_dim]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[catalog_sales]
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter(d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3'))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,22 @@ PhysicalCteAnchor ( cteId=CTEId#1 )
------------hashJoin[INNER_JOIN](expr_cast(d_week_seq1 as BIGINT) = expr_(d_week_seq2 - 53))
--------------PhysicalDistribute
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](date_dim.d_week_seq = d_week_seq2)
------------------hashJoin[INNER_JOIN](date_dim.d_week_seq = d_week_seq1)
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((date_dim.d_year = 1999))
------------------------filter((date_dim.d_year = 1998))
--------------------------PhysicalOlapScan[date_dim]
--------------PhysicalDistribute
----------------PhysicalProject
------------------hashJoin[INNER_JOIN](date_dim.d_week_seq = d_week_seq1)
------------------hashJoin[INNER_JOIN](date_dim.d_week_seq = d_week_seq2)
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------PhysicalCteConsumer ( cteId=CTEId#1 )
--------------------PhysicalDistribute
----------------------PhysicalProject
------------------------filter((date_dim.d_year = 1998))
------------------------filter((date_dim.d_year = 1999))
--------------------------PhysicalOlapScan[date_dim]

56 changes: 29 additions & 27 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query25.out
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,38 @@ PhysicalResultSink
----------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d3.d_date_sk)
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](store_returns.sr_item_sk = catalog_sales.cs_item_sk)(store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk)
----------------------PhysicalProject
------------------------PhysicalOlapScan[catalog_sales]
----------------------PhysicalDistribute
------------------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
------------------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------PhysicalOlapScan[catalog_sales]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
--------------------------------PhysicalProject
----------------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store_returns]
----------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[store_sales]
------------------------------------------PhysicalDistribute
----------------------------------PhysicalOlapScan[store_returns]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
--------------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
----------------------------------------PhysicalDistribute
------------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------filter((d1.d_year = 2000)(d1.d_moy = 4))
------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------PhysicalDistribute
--------------------------------------PhysicalProject
----------------------------------------filter((d2.d_moy <= 10)(d2.d_moy >= 4)(d2.d_year = 2000))
------------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[item]
--------------------------PhysicalDistribute
----------------------------PhysicalProject
------------------------------PhysicalOlapScan[store]
----------------------------------------------PhysicalOlapScan[store_sales]
--------------------------------------------PhysicalDistribute
----------------------------------------------PhysicalProject
------------------------------------------------filter((d1.d_year = 2000)(d1.d_moy = 4))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------PhysicalOlapScan[item]
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------filter((d2.d_moy <= 10)(d2.d_moy >= 4)(d2.d_year = 2000))
----------------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter((d3.d_year = 2000)(d3.d_moy <= 10)(d3.d_moy >= 4))
Expand Down
58 changes: 30 additions & 28 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query29.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,43 @@ PhysicalResultSink
----------PhysicalDistribute
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
----------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d3.d_date_sk)
------------------PhysicalProject
--------------------PhysicalOlapScan[store]
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
----------------------PhysicalProject
------------------------PhysicalOlapScan[item]
--------------------hashJoin[INNER_JOIN](store_returns.sr_item_sk = catalog_sales.cs_item_sk)(store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk)
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk = d3.d_date_sk)
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------filter(d_year IN (1999, 2000, 2001))
----------------------------------PhysicalOlapScan[date_dim]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN](store_returns.sr_item_sk = catalog_sales.cs_item_sk)(store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk)
--------------------------PhysicalOlapScan[catalog_sales]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
----------------------------PhysicalProject
------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_returns]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[catalog_sales]
----------------------------------PhysicalDistribute
------------------------------------PhysicalProject
--------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
----------------------------------------PhysicalProject
------------------------------------------hashJoin[INNER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)(store_sales.ss_customer_sk = store_returns.sr_customer_sk)
------------------------------------hashJoin[INNER_JOIN](store.s_store_sk = store_sales.ss_store_sk)
--------------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
----------------------------------------PhysicalDistribute
------------------------------------------hashJoin[INNER_JOIN](d1.d_date_sk = store_sales.ss_sold_date_sk)
--------------------------------------------PhysicalProject
----------------------------------------------PhysicalOlapScan[store_sales]
--------------------------------------------hashJoin[INNER_JOIN](store_returns.sr_returned_date_sk = d2.d_date_sk)
--------------------------------------------PhysicalDistribute
----------------------------------------------PhysicalProject
------------------------------------------------PhysicalOlapScan[store_returns]
----------------------------------------------PhysicalDistribute
------------------------------------------------PhysicalProject
--------------------------------------------------filter((d2.d_moy <= 7)(d2.d_moy >= 4)(d2.d_year = 1999))
----------------------------------------------------PhysicalOlapScan[date_dim]
------------------------------------------------filter((d1.d_year = 1999)(d1.d_moy = 4))
--------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------PhysicalDistribute
------------------------------------------PhysicalProject
--------------------------------------------filter((d1.d_year = 1999)(d1.d_moy = 4))
----------------------------------------------PhysicalOlapScan[date_dim]
--------------------------------------------PhysicalOlapScan[item]
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalProject
------------------------------------------PhysicalOlapScan[store]
----------------------------PhysicalDistribute
------------------------------PhysicalProject
--------------------------------filter((d2.d_moy <= 7)(d2.d_moy >= 4)(d2.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]
------------------PhysicalDistribute
--------------------PhysicalProject
----------------------filter(d_year IN (1999, 2000, 2001))
------------------------PhysicalOlapScan[date_dim]

Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------PhysicalTopN
----------PhysicalProject
------------hashJoin[INNER_JOIN](ctr1.ctr_state = ctr2.ctr_state)(cast(ctr_total_return as DOUBLE) > cast((avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2) as DOUBLE))
--------------hashJoin[INNER_JOIN](ctr1.ctr_customer_sk = customer.c_customer_sk)
----------------PhysicalDistribute
------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------PhysicalDistribute
------------------PhysicalProject
--------------PhysicalProject
----------------hashJoin[INNER_JOIN](ctr1.ctr_customer_sk = customer.c_customer_sk)
------------------PhysicalDistribute
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalDistribute
--------------------hashJoin[INNER_JOIN](customer_address.ca_address_sk = customer.c_current_addr_sk)
----------------------PhysicalProject
------------------------PhysicalOlapScan[customer]
Expand Down
Loading

0 comments on commit 5ba1f62

Please sign in to comment.