Merge branch 'master' into snapshot-fail-test

pingcap · Feb 24, 2021 · e05ef4d · e05ef4d
2 parents 9bef1b8 + 806097c
commit e05ef4d
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 29 deletions.
diff --git a/cmd/explaintest/r/explain_indexmerge.result b/cmd/explaintest/r/explain_indexmerge.result
@@ -133,3 +133,20 @@ IndexMerge	4999999.00	root
 ├─TableRangeScan(Build)	49.00	cop[tikv]	table:t	range:[-inf,50), keep order:false
 ├─IndexRangeScan(Build)	4999999.00	cop[tikv]	table:t, index:tb(b)	range:[-inf,5000000), keep order:false
 └─TableRowIDScan(Probe)	4999999.00	cop[tikv]	table:t	keep order:false
+set session tidb_enable_index_merge = on;
+drop table if exists t;
+CREATE TABLE t (
+`id` int(11) NOT NULL,
+`aid` bigint,
+`c1` varchar(255) DEFAULT NULL,
+`c2` varchar(255) DEFAULT NULL,
+PRIMARY KEY (`id`),
+KEY `aid_c1` (`aid`,`c1`),
+KEY `aid_c2` (`aid`,`c2`)
+);
+desc select /*+ USE_INDEX_MERGE(t, aid_c1, aid_c2) */ * from t where (aid = 1 and c1='aaa') or (aid = 2 and c2='bbb');
+id	estRows	task	access object	operator info
+IndexMerge_8	269.49	root		
+├─IndexRangeScan_5(Build)	0.10	cop[tikv]	table:t, index:aid_c1(aid, c1)	range:[1 "aaa",1 "aaa"], keep order:false, stats:pseudo
+├─IndexRangeScan_6(Build)	0.10	cop[tikv]	table:t, index:aid_c2(aid, c2)	range:[2 "bbb",2 "bbb"], keep order:false, stats:pseudo
+└─TableRowIDScan_7(Probe)	269.49	cop[tikv]	table:t	keep order:false, stats:pseudo
diff --git a/cmd/explaintest/t/explain_indexmerge.test b/cmd/explaintest/t/explain_indexmerge.test
@@ -28,3 +28,16 @@ explain format = 'brief' select /*+ use_index_merge(t, tb) */ * from t where b <
 explain format = 'brief' select /*+ no_index_merge(), use_index_merge(t, tb, tc) */ * from t where b < 50 or c < 5000000;
 # tableScan can be a partial path to fetch handle
 explain format = 'brief' select /*+ use_index_merge(t, primary, tb) */ * from t where a < 50 or b < 5000000;
+# composed index
+set session tidb_enable_index_merge = on;
+drop table if exists t;
+CREATE TABLE t (
+  `id` int(11) NOT NULL,
+  `aid` bigint,
+  `c1` varchar(255) DEFAULT NULL,
+  `c2` varchar(255) DEFAULT NULL,
+  PRIMARY KEY (`id`),
+  KEY `aid_c1` (`aid`,`c1`),
+  KEY `aid_c2` (`aid`,`c2`)
+);
+desc select /*+ USE_INDEX_MERGE(t, aid_c1, aid_c2) */ * from t where (aid = 1 and c1='aaa') or (aid = 2 and c2='bbb');
diff --git a/planner/core/stats.go b/planner/core/stats.go
@@ -321,31 +321,38 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *
 		}
 	}
 	if isPossibleIdxMerge && sessionAndStmtPermission && needConsiderIndexMerge && isReadOnlyTxn {
-		ds.generateAndPruneIndexMergePath(ds.indexMergeHints != nil)
+		err := ds.generateAndPruneIndexMergePath(ds.indexMergeHints != nil)
+		if err != nil {
+			return nil, err
+		}
 	} else if len(ds.indexMergeHints) > 0 {
 		ds.indexMergeHints = nil
 		ds.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled"))
 	}
 	return ds.stats, nil
 }
 
-func (ds *DataSource) generateAndPruneIndexMergePath(needPrune bool) {
+func (ds *DataSource) generateAndPruneIndexMergePath(needPrune bool) error {
 	regularPathCount := len(ds.possibleAccessPaths)
-	ds.generateIndexMergeOrPaths()
+	err := ds.generateIndexMergeOrPaths()
+	if err != nil {
+		return err
+	}
 	// If without hints, it means that `enableIndexMerge` is true
 	if len(ds.indexMergeHints) == 0 {
-		return
+		return nil
 	}
 	// With hints and without generated IndexMerge paths
 	if regularPathCount == len(ds.possibleAccessPaths) {
 		ds.indexMergeHints = nil
 		ds.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("IndexMerge is inapplicable or disabled"))
-		return
+		return nil
 	}
 	// Do not need to consider the regular paths in find_best_task().
 	if needPrune {
 		ds.possibleAccessPaths = ds.possibleAccessPaths[regularPathCount:]
 	}
+	return nil
 }
 
 // DeriveStats implements LogicalPlan DeriveStats interface.
@@ -401,7 +408,7 @@ func (is *LogicalIndexScan) DeriveStats(childStats []*property.StatsInfo, selfSc
 }
 
 // getIndexMergeOrPath generates all possible IndexMergeOrPaths.
-func (ds *DataSource) generateIndexMergeOrPaths() {
+func (ds *DataSource) generateIndexMergeOrPaths() error {
 	usedIndexCount := len(ds.possibleAccessPaths)
 	for i, cond := range ds.pushedDownConds {
 		sf, ok := cond.(*expression.ScalarFunction)
@@ -417,7 +424,10 @@ func (ds *DataSource) generateIndexMergeOrPaths() {
 				partialPaths = nil
 				break
 			}
-			partialPath := ds.buildIndexMergePartialPath(itemPaths)
+			partialPath, err := ds.buildIndexMergePartialPath(itemPaths)
+			if err != nil {
+				return err
+			}
 			if partialPath == nil {
 				partialPaths = nil
 				break
@@ -438,7 +448,7 @@ func (ds *DataSource) generateIndexMergeOrPaths() {
 		if len(partialPaths) > 1 {
 			possiblePath := ds.buildIndexMergeOrPath(partialPaths, i)
 			if possiblePath == nil {
-				return
+				return nil
 			}
 
 			accessConds := make([]expression.Expression, 0, len(partialPaths))
@@ -455,6 +465,7 @@ func (ds *DataSource) generateIndexMergeOrPaths() {
 			ds.possibleAccessPaths = append(ds.possibleAccessPaths, possiblePath)
 		}
 	}
+	return nil
 }
 
 // isInIndexMergeHints checks whether current index or primary key is in IndexMerge hints.
@@ -541,26 +552,25 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us
 }
 
 // buildIndexMergePartialPath chooses the best index path from all possible paths.
-// Now we just choose the index with most columns.
-// We should improve this strategy, because it is not always better to choose index
-// with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e,
-// the former one is enough, and it is less expensive in execution compared with the latter one.
-// TODO: improve strategy of the partial path selection
-func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*util.AccessPath) *util.AccessPath {
+// Now we choose the index with minimal estimate row count.
+func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*util.AccessPath) (*util.AccessPath, error) {
 	if len(indexAccessPaths) == 1 {
-		return indexAccessPaths[0]
+		return indexAccessPaths[0], nil
 	}
 
-	maxColsIndex := 0
-	maxCols := len(indexAccessPaths[0].IdxCols)
-	for i := 1; i < len(indexAccessPaths); i++ {
-		current := len(indexAccessPaths[i].IdxCols)
-		if current > maxCols {
-			maxColsIndex = i
-			maxCols = current
+	minEstRowIndex := 0
+	minEstRow := math.MaxFloat64
+	for i := 0; i < len(indexAccessPaths); i++ {
+		rc, err := ds.stats.HistColl.GetRowCountByIndexRanges(ds.ctx.GetSessionVars().StmtCtx, indexAccessPaths[i].Index.ID, indexAccessPaths[i].Ranges)
+		if err != nil {
+			return nil, err
+		}
+		if rc < minEstRow {
+			minEstRowIndex = i
+			minEstRow = rc
 		}
 	}
-	return indexAccessPaths[maxColsIndex]
+	return indexAccessPaths[minEstRowIndex], nil
 }
 
 // buildIndexMergeOrPath generates one possible IndexMergePath.

diff --git a/planner/core/testdata/index_merge_suite_out.json b/planner/core/testdata/index_merge_suite_out.json
@@ -4,10 +4,10 @@
     "Cases": [
       "[]",
       "[]",
-      "[{Idxs:[c_d_e,f_g],TbFilters:[]}]",
-      "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7))]},{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2))]}]",
-      "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.c, 1), gt(test.t.g, 2))]},{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.c, 1), gt(test.t.g, 2))]},{Idxs:[c_d_e,g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(gt(test.t.c, 5), lt(test.t.f, 7))]}]",
-      "[{Idxs:[c_d_e,f_g],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.e, 1), gt(test.t.f, 2))]},{Idxs:[c_d_e,f_g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}]"
+      "[{Idxs:[c_d_e,f],TbFilters:[]}]",
+      "[{Idxs:[c_d_e,f],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7))]},{Idxs:[c_d_e,f],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2))]}]",
+      "[{Idxs:[c_d_e,f],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.c, 1), gt(test.t.g, 2))]},{Idxs:[c_d_e,f],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.c, 1), gt(test.t.g, 2))]},{Idxs:[c_d_e,g],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(gt(test.t.c, 5), lt(test.t.f, 7))]}]",
+      "[{Idxs:[c_d_e,f],TbFilters:[or(gt(test.t.c, 5), lt(test.t.f, 7)),or(lt(test.t.e, 1), gt(test.t.f, 2))]},{Idxs:[c_d_e,f],TbFilters:[or(lt(test.t.c, 1), gt(test.t.f, 2)),or(lt(test.t.e, 1), gt(test.t.f, 2))]}]"
     ]
   }
 ]
diff --git a/planner/core/testdata/plan_suite_out.json b/planner/core/testdata/plan_suite_out.json
@@ -226,9 +226,9 @@
       },
       {
         "SQL": "select /*+ USE_INDEX_MERGE(t) USE_INDEX_MERGE(t) */ * from t where c < 1 or f > 2",
-        "Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f_g)[(2,+inf]]], TablePlan->Table(t))",
+        "Best": "IndexMergeReader(PartialPlans->[Index(t.c_d_e)[[-inf,1)], Index(t.f)[(2,+inf]]], TablePlan->Table(t))",
         "HasWarn": false,
-        "Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f_g`)"
+        "Hints": "use_index_merge(@`sel_1` `t` `c_d_e`, `f`)"
       },
       {
         "SQL": "select /*+ USE_INDEX_MERGE(db2.t) */ * from t where c < 1 or f > 2",