planner: fix cost adjustment for high risk tablescan (#57086)

close #57085
pingcap · Nov 6, 2024 · 34ef14a · 34ef14a
1 parent 01e2ff0
commit 34ef14a
Show file tree

Hide file tree

Showing 13 changed files with 85 additions and 81 deletions.
diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
@@ -1205,7 +1205,7 @@ func TestIgnoreRealtimeStats(t *testing.T) {
 	testKit := testkit.NewTestKit(t, store)
 	testKit.MustExec("use test")
 	testKit.MustExec("drop table if exists t")
-	testKit.MustExec("create table t(a int, b int, index ib(b))")
+	testKit.MustExec("create table t(a int, b int)")
 	h := dom.StatsHandle()
 	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
 

diff --git a/pkg/planner/core/casetest/binaryplan/testdata/binary_plan_suite_out.json b/pkg/planner/core/casetest/binaryplan/testdata/binary_plan_suite_out.json
@@ -46,15 +46,15 @@
             "children": [
               {
                 "name": "TableFullScan_4",
-                "cost": 2500387.7115728618,
+                "cost": 4546159.475587022,
                 "est_rows": 10000,
                 "act_rows": 2,
                 "task_type": 2,
                 "store_type": 2,
                 "operator_info": "keep order:false, stats:pseudo"
               }
             ],
-            "cost": 251172.51410485746,
+            "cost": 387557.2983724681,
             "est_rows": 10000,
             "act_rows": 2,
             "task_type": 1,
@@ -104,7 +104,7 @@
                         "children": [
                           {
                             "name": "TableFullScan_16",
-                            "cost": 2500387.7115728618,
+                            "cost": 4546159.475587022,
                             "est_rows": 10000,
                             "act_rows": 4,
                             "task_type": 2,
@@ -115,31 +115,31 @@
                         "labels": [
                           1
                         ],
-                        "cost": 208932.51410485746,
+                        "cost": 345317.2983724681,
                         "est_rows": 10000,
                         "act_rows": 4,
                         "task_type": 1,
                         "store_type": 1,
                         "operator_info": "data:TableFullScan_16"
                       }
                     ],
-                    "cost": 992002.8474381908,
+                    "cost": 1128387.6317058015,
                     "est_rows": 100000000,
                     "act_rows": 8,
                     "task_type": 1,
                     "store_type": 1,
                     "operator_info": "CARTESIAN inner join"
                   }
                 ],
-                "cost": 998992002.8474382,
+                "cost": 999128387.6317058,
                 "est_rows": 100000000,
                 "act_rows": 8,
                 "task_type": 1,
                 "store_type": 1,
                 "operator_info": "cast(test.t.a, decimal(10,0) BINARY)->Column#8"
               }
             ],
-            "cost": 1996993511.427438,
+            "cost": 1997129896.2117057,
             "est_rows": 1,
             "act_rows": 1,
             "task_type": 1,
@@ -184,22 +184,22 @@
                 "children": [
                   {
                     "name": "TableFullScan_5",
-                    "cost": 2500387.7115728618,
+                    "cost": 4546159.475587022,
                     "est_rows": 10000,
                     "act_rows": 2,
                     "task_type": 2,
                     "store_type": 2,
                     "operator_info": "keep order:false, stats:pseudo"
                   }
                 ],
-                "cost": 2999387.7115728618,
+                "cost": 5045159.475587022,
                 "est_rows": 3333.3333333333335,
                 "task_type": 2,
                 "store_type": 2,
                 "operator_info": "gt(test.t.a, 100)"
               }
             ],
-            "cost": 214039.18077152412,
+            "cost": 350423.9650391348,
             "est_rows": 3333.3333333333335,
             "task_type": 1,
             "store_type": 1,
@@ -222,15 +222,15 @@
                     "children": [
                       {
                         "name": "TableFullScan_35",
-                        "cost": 2500387.7115728618,
+                        "cost": 4546159.475587022,
                         "est_rows": 10000,
                         "act_rows": 2,
                         "task_type": 2,
                         "store_type": 2,
                         "operator_info": "keep order:false, stats:pseudo"
                       }
                     ],
-                    "cost": 2999387.7115728618,
+                    "cost": 5045159.475587022,
                     "est_rows": 9990,
                     "act_rows": 2,
                     "task_type": 2,
@@ -241,7 +241,7 @@
                 "labels": [
                   2
                 ],
-                "cost": 284354.70077152416,
+                "cost": 420739.48503913474,
                 "est_rows": 9990,
                 "act_rows": 2,
                 "task_type": 1,
@@ -256,15 +256,15 @@
                     "children": [
                       {
                         "name": "TableFullScan_32",
-                        "cost": 2500387.7115728618,
+                        "cost": 4546159.475587022,
                         "est_rows": 10000,
                         "act_rows": 4,
                         "task_type": 2,
                         "store_type": 2,
                         "operator_info": "keep order:false, stats:pseudo"
                       }
                     ],
-                    "cost": 2999387.7115728618,
+                    "cost": 5045159.475587022,
                     "est_rows": 9990,
                     "act_rows": 4,
                     "task_type": 2,
@@ -275,15 +275,15 @@
                 "labels": [
                   1
                 ],
-                "cost": 284354.70077152416,
+                "cost": 420739.48503913474,
                 "est_rows": 9990,
                 "act_rows": 4,
                 "task_type": 1,
                 "store_type": 1,
                 "operator_info": "data:Selection_33"
               }
             ],
-            "cost": 1830544.8015430481,
+            "cost": 2103314.3700782694,
             "est_rows": 12487.5,
             "task_type": 1,
             "store_type": 1,
@@ -389,22 +389,22 @@
                     "children": [
                       {
                         "name": "TableFullScan_6",
-                        "cost": 2500387.7115728618,
+                        "cost": 4546159.475587022,
                         "est_rows": 10000,
                         "act_rows": 4,
                         "task_type": 2,
                         "store_type": 2,
                         "operator_info": "keep order:false, stats:pseudo"
                       }
                     ],
-                    "cost": 2999387.7115728618,
+                    "cost": 5045159.475587022,
                     "est_rows": 3333.3333333333335,
                     "task_type": 2,
                     "store_type": 2,
                     "operator_info": "gt(test.t2.b, 10)"
                   }
                 ],
-                "cost": 242199.18077152412,
+                "cost": 378583.9650391348,
                 "est_rows": 3333.3333333333335,
                 "task_type": 1,
                 "store_type": 1,
@@ -441,15 +441,15 @@
                 "children": [
                   {
                     "name": "TableFullScan_6",
-                    "cost": 2500387.7115728618,
+                    "cost": 4546159.475587022,
                     "est_rows": 10000,
                     "act_rows": 2,
                     "task_type": 2,
                     "store_type": 2,
                     "operator_info": "keep order:false, stats:pseudo"
                   }
                 ],
-                "cost": 251172.51410485746,
+                "cost": 387557.2983724681,
                 "est_rows": 10000,
                 "act_rows": 2,
                 "task_type": 1,
@@ -475,15 +475,15 @@
                 "children": [
                   {
                     "name": "TableFullScan_5",
-                    "cost": 2500387.7115728618,
+                    "cost": 4546159.475587022,
                     "est_rows": 10000,
                     "act_rows": 8,
                     "task_type": 2,
                     "store_type": 2,
                     "operator_info": "keep order:false, stats:pseudo"
                   }
                 ],
-                "cost": 208932.51410485746,
+                "cost": 345317.2983724681,
                 "est_rows": 10000,
                 "act_rows": 8,
                 "task_type": 1,

diff --git a/pkg/planner/core/casetest/dag/testdata/plan_suite_out.json b/pkg/planner/core/casetest/dag/testdata/plan_suite_out.json
@@ -48,7 +48,7 @@
       },
       {
         "SQL": "select c from t order by t.a limit 1",
-        "Best": "TableReader(Table(t)->Limit)->Limit->Projection"
+        "Best": "IndexReader(Index(t.c_d_e)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->Projection"
       },
       {
         "SQL": "select c from t order by t.a + t.b limit 1",
@@ -508,7 +508,7 @@
       },
       {
         "SQL": "select a from t union all (select c from t) order by a limit 1",
-        "Best": "UnionAll{TableReader(Table(t)->Limit)->Limit->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
+        "Best": "UnionAll{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit}->TopN([Column#25],0,1)"
       }
     ]
   },

diff --git a/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json b/pkg/planner/core/casetest/physicalplantest/testdata/plan_suite_out.json
@@ -2021,9 +2021,9 @@
       },
       {
         "SQL": "select /*+ USE_INDEX_MERGE(t1, c_d_e, f_g) */ * from t where c < 1 or f > 2",
-        "Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
+        "Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
         "HasWarn": true,
-        "Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
+        "Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
       },
       {
         "SQL": "select /*+ NO_INDEX_MERGE(), USE_INDEX_MERGE(t, primary, f_g, c_d_e) */ * from t where a < 1 or f > 2",
@@ -2039,15 +2039,15 @@
       },
       {
         "SQL": "select /*+ USE_INDEX_MERGE(db2.t) */ * from t where c < 1 or f > 2",
-        "Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
+        "Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
         "HasWarn": true,
-        "Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
+        "Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
       },
       {
         "SQL": "select /*+ USE_INDEX_MERGE(db2.t, c_d_e, f_g) */ * from t where c < 1 or f > 2",
-        "Best": "TableReader(Table(t)->Sel([or(lt(test.t.c, 1), gt(test.t.f, 2))]))",
+        "Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
         "HasWarn": true,
-        "Hints": "use_index(@`sel_1` `test`.`t` ), no_order_index(@`sel_1` `test`.`t` `primary`)"
+        "Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
       }
     ]
   },
@@ -2257,11 +2257,11 @@
     "Cases": [
       {
         "SQL": "select max(a) from t;",
-        "Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
+        "Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg"
       },
       {
         "SQL": "select min(a) from t;",
-        "Best": "TableReader(Table(t)->Limit)->Limit->StreamAgg"
+        "Best": "IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg"
       },
       {
         "SQL": "select min(c_str) from t;",
@@ -2277,7 +2277,7 @@
       },
       {
         "SQL": "select max(a), min(a) from t;",
-        "Best": "LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->TableReader(Table(t)->Limit)->Limit->StreamAgg}"
+        "Best": "LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a],0,1))->TopN([test.t.a],0,1)->StreamAgg}"
       },
       {
         "SQL": "select max(a), min(a) from t where a > 10",
@@ -2289,7 +2289,7 @@
       },
       {
         "SQL": "select max(a), max(c), min(f) from t",
-        "Best": "LeftHashJoin{LeftHashJoin{TableReader(Table(t)->Limit)->Limit->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
+        "Best": "LeftHashJoin{LeftHashJoin{IndexReader(Index(t.f)[[NULL,+inf]]->TopN([test.t.a true],0,1))->TopN([test.t.a true],0,1)->StreamAgg->IndexReader(Index(t.c_d_e)[[NULL,+inf]]->Limit)->Limit->StreamAgg}->IndexReader(Index(t.f)[[NULL,+inf]]->Limit)->Limit->StreamAgg}"
       },
       {
         "SQL": "select max(a), max(b) from t",

diff --git a/pkg/planner/core/plan_cost_ver2.go b/pkg/planner/core/plan_cost_ver2.go
@@ -162,29 +162,31 @@ func (p *PhysicalTableScan) GetPlanCostVer2(taskType property.TaskType, option *
 	// Apply TiFlash startup cost to prefer TiKV for small table scans
 	if p.StoreType == kv.TiFlash {
 		p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, TiFlashStartupRowPenalty, rowSize, scanFactor))
-	} else {
+	} else if !p.isChildOfIndexLookUp {
 		// Apply cost penalty for full scans that carry high risk of underestimation
 		sessionVars := p.SCtx().GetSessionVars()
 		allowPreferRangeScan := sessionVars.GetAllowPreferRangeScan()
 		tblColHists := p.tblColHists
 
-		// preferRangeScan check here is same as in skylinePruning
-		preferRangeScanCondition := allowPreferRangeScan && (tblColHists.Pseudo || tblColHists.RealtimeCount < 1)
+		// hasUnreliableStats is a check for pseudo or zero stats
+		hasUnreliableStats := tblColHists.Pseudo || tblColHists.RealtimeCount < 1
 		// hasHighModifyCount tracks the high risk of a tablescan where auto-analyze had not yet updated the table row count
 		hasHighModifyCount := tblColHists.ModifyCount > tblColHists.RealtimeCount
 		// hasLowEstimate is a check to capture a unique customer case where modifyCount is used for tablescan estimate (but it not adequately understood why)
-		hasLowEstimate := rows > 1 && int64(rows) < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
+		hasLowEstimate := rows > 1 && tblColHists.ModifyCount < tblColHists.RealtimeCount && int64(rows) <= tblColHists.ModifyCount
+		// preferRangeScan check here is same as in skylinePruning
+		preferRangeScanCondition := allowPreferRangeScan && (hasUnreliableStats || hasHighModifyCount || hasLowEstimate)
 		var unsignedIntHandle bool
 		if p.Table.PKIsHandle {
 			if pkColInfo := p.Table.GetPkColInfo(); pkColInfo != nil {
 				unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
 			}
 		}
-		hasFullRangeScan := !p.isChildOfIndexLookUp && ranger.HasFullRange(p.Ranges, unsignedIntHandle)
+		hasFullRangeScan := ranger.HasFullRange(p.Ranges, unsignedIntHandle)
 
-		shouldApplyPenalty := hasFullRangeScan && (preferRangeScanCondition || hasHighModifyCount || hasLowEstimate)
+		shouldApplyPenalty := hasFullRangeScan && preferRangeScanCondition
 		if shouldApplyPenalty {
-			newRowCount := math.Min(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
+			newRowCount := max(MaxPenaltyRowCount, max(float64(tblColHists.ModifyCount), float64(tblColHists.RealtimeCount)))
 			p.PlanCostVer2 = costusage.SumCostVer2(p.PlanCostVer2, scanCostVer2(option, newRowCount, rowSize, scanFactor))
 		}
 	}

diff --git a/pkg/planner/core/plan_cost_ver2_test.go b/pkg/planner/core/plan_cost_ver2_test.go
@@ -59,9 +59,9 @@ func TestCostModelVer2ScanRowSize(t *testing.T) {
 		{"select a, b from t use index(abc) where a=1 and b=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
 		{"select a, b, c from t use index(abc) where a=1 and b=1 and c=1", "scan(1*logrowsize(48)*tikv_scan_factor(40.7))"},
 		// table scan row-size is always equal to row-size(*)
-		{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
-		{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
-		{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(1000*logrowsize(80)*tikv_scan_factor(40.7)))"},
+		{"select a from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
+		{"select a, d from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
+		{"select * from t use index(primary) where a=1", "(scan(1*logrowsize(80)*tikv_scan_factor(40.7))) + (scan(10000*logrowsize(80)*tikv_scan_factor(40.7)))"},
 	}
 	for _, c := range cases {
 		rs := tk.MustQuery("explain analyze format=true_card_cost " + c.query).Rows()

diff --git a/tests/integrationtest/r/executor/chunk_reuse.result b/tests/integrationtest/r/executor/chunk_reuse.result
@@ -223,14 +223,16 @@ explain format='brief' select id1 from t3 where id2 > '3' or id8 < 10 union (sel
 id	estRows	task	access object	operator info
 HashAgg	8878.22	root		group by:Column#17, funcs:firstrow(Column#17)->Column#17
 └─Union	11097.78	root		
-  ├─TableReader	5548.89	root		data:Projection
-  │ └─Projection	5548.89	cop[tikv]		executor__chunk_reuse.t3.id1->Column#17
-  │   └─Selection	5548.89	cop[tikv]		or(gt(executor__chunk_reuse.t3.id2, "3"), lt(executor__chunk_reuse.t3.id8, 10))
-  │     └─TableFullScan	10000.00	cop[tikv]	table:t3	keep order:false, stats:pseudo
-  └─TableReader	5548.89	root		data:Projection
-    └─Projection	5548.89	cop[tikv]		executor__chunk_reuse.t3.id1->Column#17
-      └─Selection	5548.89	cop[tikv]		or(gt(executor__chunk_reuse.t3.id2, "4"), lt(executor__chunk_reuse.t3.id8, 7))
-        └─TableFullScan	10000.00	cop[tikv]	table:t3	keep order:false, stats:pseudo
+  ├─Projection	5548.89	root		executor__chunk_reuse.t3.id1->Column#17
+  │ └─IndexMerge	5548.89	root		type: union
+  │   ├─IndexRangeScan(Build)	3333.33	cop[tikv]	table:t3, index:id2(id2)	range:("3",+inf], keep order:false, stats:pseudo
+  │   ├─IndexRangeScan(Build)	3323.33	cop[tikv]	table:t3, index:id8(id8)	range:[-inf,10), keep order:false, stats:pseudo
+  │   └─TableRowIDScan(Probe)	5548.89	cop[tikv]	table:t3	keep order:false, stats:pseudo
+  └─Projection	5548.89	root		executor__chunk_reuse.t3.id1->Column#17
+    └─IndexMerge	5548.89	root		type: union
+      ├─IndexRangeScan(Build)	3333.33	cop[tikv]	table:t3, index:id2(id2)	range:("4",+inf], keep order:false, stats:pseudo
+      ├─IndexRangeScan(Build)	3323.33	cop[tikv]	table:t3, index:id8(id8)	range:[-inf,7), keep order:false, stats:pseudo
+      └─TableRowIDScan(Probe)	5548.89	cop[tikv]	table:t3	keep order:false, stats:pseudo
 select id1 from t3 where id2 > '3' or id8 < 10 union (select id1 from t3 where id2 > '4' or id8 < 7);
 id1
 1