Skip to content

Commit

Permalink
planner: fix cost adjustment for high risk tablescan (#57086)
Browse files Browse the repository at this point in the history
close #57085
  • Loading branch information
terry1purcell authored Nov 6, 2024
1 parent 01e2ff0 commit 34ef14a
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 81 deletions.
2 changes: 1 addition & 1 deletion pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ func TestIgnoreRealtimeStats(t *testing.T) {
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("use test")
testKit.MustExec("drop table if exists t")
testKit.MustExec("create table t(a int, b int, index ib(b))")
testKit.MustExec("create table t(a int, b int)")
h := dom.StatsHandle()
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@
"children": [
{
"name": "TableFullScan_4",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 251172.51410485746,
"cost": 387557.2983724681,
"est_rows": 10000,
"act_rows": 2,
"task_type": 1,
Expand Down Expand Up @@ -104,7 +104,7 @@
"children": [
{
"name": "TableFullScan_16",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
Expand All @@ -115,31 +115,31 @@
"labels": [
1
],
"cost": 208932.51410485746,
"cost": 345317.2983724681,
"est_rows": 10000,
"act_rows": 4,
"task_type": 1,
"store_type": 1,
"operator_info": "data:TableFullScan_16"
}
],
"cost": 992002.8474381908,
"cost": 1128387.6317058015,
"est_rows": 100000000,
"act_rows": 8,
"task_type": 1,
"store_type": 1,
"operator_info": "CARTESIAN inner join"
}
],
"cost": 998992002.8474382,
"cost": 999128387.6317058,
"est_rows": 100000000,
"act_rows": 8,
"task_type": 1,
"store_type": 1,
"operator_info": "cast(test.t.a, decimal(10,0) BINARY)->Column#8"
}
],
"cost": 1996993511.427438,
"cost": 1997129896.2117057,
"est_rows": 1,
"act_rows": 1,
"task_type": 1,
Expand Down Expand Up @@ -184,22 +184,22 @@
"children": [
{
"name": "TableFullScan_5",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2999387.7115728618,
"cost": 5045159.475587022,
"est_rows": 3333.3333333333335,
"task_type": 2,
"store_type": 2,
"operator_info": "gt(test.t.a, 100)"
}
],
"cost": 214039.18077152412,
"cost": 350423.9650391348,
"est_rows": 3333.3333333333335,
"task_type": 1,
"store_type": 1,
Expand All @@ -222,15 +222,15 @@
"children": [
{
"name": "TableFullScan_35",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2999387.7115728618,
"cost": 5045159.475587022,
"est_rows": 9990,
"act_rows": 2,
"task_type": 2,
Expand All @@ -241,7 +241,7 @@
"labels": [
2
],
"cost": 284354.70077152416,
"cost": 420739.48503913474,
"est_rows": 9990,
"act_rows": 2,
"task_type": 1,
Expand All @@ -256,15 +256,15 @@
"children": [
{
"name": "TableFullScan_32",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2999387.7115728618,
"cost": 5045159.475587022,
"est_rows": 9990,
"act_rows": 4,
"task_type": 2,
Expand All @@ -275,15 +275,15 @@
"labels": [
1
],
"cost": 284354.70077152416,
"cost": 420739.48503913474,
"est_rows": 9990,
"act_rows": 4,
"task_type": 1,
"store_type": 1,
"operator_info": "data:Selection_33"
}
],
"cost": 1830544.8015430481,
"cost": 2103314.3700782694,
"est_rows": 12487.5,
"task_type": 1,
"store_type": 1,
Expand Down Expand Up @@ -389,22 +389,22 @@
"children": [
{
"name": "TableFullScan_6",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 4,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 2999387.7115728618,
"cost": 5045159.475587022,
"est_rows": 3333.3333333333335,
"task_type": 2,
"store_type": 2,
"operator_info": "gt(test.t2.b, 10)"
}
],
"cost": 242199.18077152412,
"cost": 378583.9650391348,
"est_rows": 3333.3333333333335,
"task_type": 1,
"store_type": 1,
Expand Down Expand Up @@ -441,15 +441,15 @@
"children": [
{
"name": "TableFullScan_6",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 2,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 251172.51410485746,
"cost": 387557.2983724681,
"est_rows": 10000,
"act_rows": 2,
"task_type": 1,
Expand All @@ -475,15 +475,15 @@
"children": [
{
"name": "TableFullScan_5",
"cost": 2500387.7115728618,
"cost": 4546159.475587022,
"est_rows": 10000,
"act_rows": 8,
"task_type": 2,
"store_type": 2,
"operator_info": "keep order:false, stats:pseudo"
}
],
"cost": 208932.51410485746,
"cost": 345317.2983724681,
"est_rows": 10000,
"act_rows": 8,
"task_type": 1,
Expand Down
4 changes: 2 additions & 2 deletions pkg/planner/core/casetest/dag/testdata/plan_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
},
{
"SQL": "select c from t order by t.a limit 1",
"Best": "TableReader(Table(t)->Limit)->Limit->Projection"
"Best": "IndexReader(Index(t.c_d_e)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->Projection"
},
{
"SQL": "select c from t order by t.a + t.b limit 1",
Expand Down Expand Up @@ -508,7 +508,7 @@
},
{
"SQL": "select a from t union all (select c from t) order by a limit 1",
"Best": "UnionAll{TableReader(Table(t)->Limit)->Limit->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
"Best": "UnionAll{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
}
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2021,9 +2021,9 @@
},
{
"SQL": "select /*+ USE_INDEX_MERGE(t1, c_d_e, f_g) */ * from t where c < 1 or f > 2",
"Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
"Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
"HasWarn": true,
"Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
"Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
},
{
"SQL": "select /*+ NO_INDEX_MERGE(), USE_INDEX_MERGE(t, primary, f_g, c_d_e) */ * from t where a < 1 or f > 2",
Expand All @@ -2039,15 +2039,15 @@
},
{
"SQL": "select /*+ USE_INDEX_MERGE(db2.t) */ * from t where c < 1 or f > 2",
"Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
"Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
"HasWarn": true,
"Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
"Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
},
{
"SQL": "select /*+ USE_INDEX_MERGE(db2.t, c_d_e, f_g) */ * from t where c < 1 or f > 2",
"Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
"Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
"HasWarn": true,
"Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
"Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
}
]
},
Expand Down Expand Up @@ -2257,11 +2257,11 @@
"Cases": [
{
"SQL": "select max(a) from t;",
"Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
"Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg"
},
{
"SQL": "select min(a) from t;",
"Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
"Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg"
},
{
"SQL": "select min(c_str) from t;",
Expand All @@ -2277,7 +2277,7 @@
},
{
"SQL": "select max(a), min(a) from t;",
"Best": "LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->TableReader(Table(t)->Limit)->Limit->StreamAgg}"
"Best": "LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg}"
},
{
"SQL": "select max(a), min(a) from t where a > 10",
Expand All @@ -2289,7 +2289,7 @@
},
{
"SQL": "select max(a), max(c), min(f) from t",
"Best": "LeftHashJoin{LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
"Best": "LeftHashJoin{LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
},
{
"SQL": "select max(a), max(b) from t",
Expand Down
16 changes: 9 additions & 7 deletions pkg/planner/core/plan_cost_ver2.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,29 +162,31 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
// Apply TiFlash startup cost to prefer TiKV for small table scans
if p.StoreType == kv.TiFlash {
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, TiFlashStartupRowPenalty, rowSize, scanFactor))
} else {
} else if !p.isChildOfIndexLookUp {
// Apply cost penalty for full scans that carry high risk of underestimation
sessionVars := p.SCtx().GetSessionVars()
allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
tblColHists := p.tblColHists

// preferRangeScan check here is same as in skylinePruning
preferRangeScanCondition := allowPreferRangeScan && (tblColHists.Pseudo || tblColHists.RealtimeCount < 1)
// hasUnreliableStats is a check for pseudo or zero stats
hasUnreliableStats := tblColHists.Pseudo || tblColHists.RealtimeCount < 1
// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
hasHighModifyCount := tblColHists.ModifyCount > tblColHists.RealtimeCount
// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
hasLowEstimate := rows > 1 && int64(rows) < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
hasLowEstimate := rows > 1 && tblColHists.ModifyCount < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
// preferRangeScan check here is same as in skylinePruning
preferRangeScanCondition := allowPreferRangeScan && (hasUnreliableStats || hasHighModifyCount || hasLowEstimate)
var unsignedIntHandle bool
if p.Table.PKIsHandle {
if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
}
}
hasFullRangeScan := !p.isChildOfIndexLookUp && ranger.HasFullRange(p.Ranges, unsignedIntHandle)
hasFullRangeScan := ranger.HasFullRange(p.Ranges, unsignedIntHandle)

shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
shouldApplyPenalty := hasFullRangeScan && preferRangeScanCondition
if shouldApplyPenalty {
newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
newRowCount := max(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
}
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/planner/core/plan_cost_ver2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ func TestCostModelVer2ScanRowSize(t *testing.T) {
{"select a, b from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
{"select a, b, c from t use index(abc) where a=1 and b=1 and c=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
// table scan row-size is always equal to row-size(*)
{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
}
for _, c := range cases {
rs := tk.MustQuery("explain analyze format=true_card_cost " + c.query).Rows()
Expand Down
18 changes: 10 additions & 8 deletions tests/integrationtest/r/executor/chunk_reuse.result
Original file line number Diff line number Diff line change
Expand Up @@ -223,14 +223,16 @@ explain format='brief' select id1 from t3 where id2 > '3' or id8 < 10 union (sel
id estRows task access object operator info
HashAgg 8878.22 root group by:Column#17, funcs:firstrow(Column#17)->Column#17
└─Union 11097.78 root
├─TableReader 5548.89 root data:Projection
│ └─Projection 5548.89 cop[tikv] executor__chunk_reuse.t3.id1->Column#17
│ └─Selection 5548.89 cop[tikv] or(gt(executor__chunk_reuse.t3.id2, "3"), lt(executor__chunk_reuse.t3.id8, 10))
│ └─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo
└─TableReader 5548.89 root data:Projection
└─Projection 5548.89 cop[tikv] executor__chunk_reuse.t3.id1->Column#17
└─Selection 5548.89 cop[tikv] or(gt(executor__chunk_reuse.t3.id2, "4"), lt(executor__chunk_reuse.t3.id8, 7))
└─TableFullScan 10000.00 cop[tikv] table:t3 keep order:false, stats:pseudo
├─Projection 5548.89 root executor__chunk_reuse.t3.id1->Column#17
│ └─IndexMerge 5548.89 root type: union
│ ├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t3, index:id2(id2) range:("3",+inf], keep order:false, stats:pseudo
│ ├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t3, index:id8(id8) range:[-inf,10), keep order:false, stats:pseudo
│ └─TableRowIDScan(Probe) 5548.89 cop[tikv] table:t3 keep order:false, stats:pseudo
└─Projection 5548.89 root executor__chunk_reuse.t3.id1->Column#17
└─IndexMerge 5548.89 root type: union
├─IndexRangeScan(Build) 3333.33 cop[tikv] table:t3, index:id2(id2) range:("4",+inf], keep order:false, stats:pseudo
├─IndexRangeScan(Build) 3323.33 cop[tikv] table:t3, index:id8(id8) range:[-inf,7), keep order:false, stats:pseudo
└─TableRowIDScan(Probe) 5548.89 cop[tikv] table:t3 keep order:false, stats:pseudo
select id1 from t3 where id2 > '3' or id8 < 10 union (select id1 from t3 where id2 > '4' or id8 < 7);
id1
1
Expand Down
Loading

0 comments on commit 34ef14a

Please sign in to comment.