From d16e6ac4ca1f971c05e503a25c5ec8cabf1be24a Mon Sep 17 00:00:00 2001 From: qiyanghe1998 Date: Mon, 10 Jul 2023 10:47:55 -0400 Subject: [PATCH] sql: add trie tree based workload index recommendations This commit adds the trie and the logic for getting the workload index recommendations. In addition, it fills the gap between built-in functions and backend implementation for workload index recommendations. The whole process consists of collecting candidates and finding representative indexes. All the index recommendations in the table "system.statement_statistics" (satisfying some time requirement) will be collected as the candidates and then inserted to the trie. The trie is designed for all the indexes of one table. The indexed columns will be regarded as the key to insert into the tree in their original orders. The storing part will be attached to the node after the insertion of indexed columns. The general idea of finding representative indexes is to use all the indexes represented by the leaf nodes. One optimization is to use the remove the storings that are covered by some leaf nodes. Next, we will push down all the storings attached to the internal nodes to the shallowest leaf nodes (You can find the reasons in RFC). Finally, all the indexes represented by the leaf nodes will be returned. As for the "DROP INDEX", since we collect all the indexes represented by the leaf nodes (a superset of dropped indexes), so we can directly drop all of them. Release note: None --- pkg/BUILD.bazel | 2 + .../testdata/logic_test/workload_indexrecs | 378 +++++++++++++++++- pkg/sql/opt/workloadindexrec/BUILD.bazel | 20 + pkg/sql/opt/workloadindexrec/index_trie.go | 211 ++++++++++ .../workloadindexrec/workload_indexrecs.go | 154 +++++++ pkg/sql/sem/builtins/BUILD.bazel | 1 + pkg/sql/sem/builtins/generator_builtins.go | 46 ++- 7 files changed, 789 insertions(+), 23 deletions(-) create mode 100644 pkg/sql/opt/workloadindexrec/BUILD.bazel create mode 100644 pkg/sql/opt/workloadindexrec/index_trie.go create mode 100644 pkg/sql/opt/workloadindexrec/workload_indexrecs.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index f52d08390090..994f404c1f64 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -1831,6 +1831,7 @@ GO_TARGETS = [ "//pkg/sql/opt/testutils/testexpr:testexpr", "//pkg/sql/opt/testutils:testutils", "//pkg/sql/opt/testutils:testutils_test", + "//pkg/sql/opt/workloadindexrec:workloadindexrec", "//pkg/sql/opt/xform:xform", "//pkg/sql/opt/xform:xform_test", "//pkg/sql/opt:opt", @@ -3107,6 +3108,7 @@ GET_X_DATA_TARGETS = [ "//pkg/sql/opt/testutils/opttester:get_x_data", "//pkg/sql/opt/testutils/testcat:get_x_data", "//pkg/sql/opt/testutils/testexpr:get_x_data", + "//pkg/sql/opt/workloadindexrec:get_x_data", "//pkg/sql/opt/xform:get_x_data", "//pkg/sql/optionalnodeliveness:get_x_data", "//pkg/sql/paramparse:get_x_data", diff --git a/pkg/sql/logictest/testdata/logic_test/workload_indexrecs b/pkg/sql/logictest/testdata/logic_test/workload_indexrecs index 8e2462afb7dd..9f99088e8cc0 100644 --- a/pkg/sql/logictest/testdata/logic_test/workload_indexrecs +++ b/pkg/sql/logictest/testdata/logic_test/workload_indexrecs @@ -1,31 +1,383 @@ +# Give root role permission to insert into system tables. +# DO NOT DO THIS IN PRODUCTION. +statement ok +INSERT INTO system.users VALUES ('node', NULL, true, 3); +GRANT NODE TO root; + +statement ok +CREATE TABLE t1 (k INT, i INT, f FLOAT, s STRING) + +# Basic tests for creation, replacement, alteration +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['creation : CREATE INDEX t1_k ON t1(k)'], + '2023-07-05 15:10:11+00:00', + 'fp_1', + 'tfp_1', + 'ph_1', + 'app_1', + 1, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-07-05 15:10:10+00:00"}}'::JSONB, + 'null' +); + +# get workload index-recs +query T nosort +SELECT workload_index_recs(); +---- +CREATE INDEX ON t1 (k); + + +# get workload index-recs with time filter +query T nosort +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); +---- +CREATE INDEX ON t1 (k); + + +# get workload index-recs with budget limit +query T nosort +SELECT workload_index_recs('42MB'); +---- +CREATE INDEX ON t1 (k); + + +# get workload index-recs with time filter and budget limit +query T nosort +SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); +---- +CREATE INDEX ON t1 (k); + + +statement ok +CREATE INDEX t1_i ON t1(i); + +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['replacement : CREATE INDEX t1_i2 ON t1(i) storing (k); DROP INDEX t1_i;'], + '2023-07-05 15:10:12+00:00', + 'fp_2', + 'tfp_2', + 'ph_2', + 'app_2', + 2, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-06-15 15:10:10+00:00"}}'::JSONB, + 'null' +); + +# get workload index-recs +query T nosort +SELECT workload_index_recs(); +---- +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter +query T nosort +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); +---- +CREATE INDEX ON t1 (k); + + +# get workload index-recs with budget limit +query T nosort +SELECT workload_index_recs('42MB'); +---- +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter and budget limit +query T nosort +SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); +---- +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['alteration : ALTER INDEX t1_i NOT VISIBLE'], + '2023-07-05 15:10:13+00:00', + 'fp_3', + 'tfp_3', + 'ph_3', + 'app_3', + 3, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-06-29 15:10:10+00:00"}}'::JSONB, + 'null' +); + # get workload index-recs query T nosort SELECT workload_index_recs(); ---- -1 -2 -3 +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + # get workload index-recs with time filter query T nosort -SELECT workload_index_recs(now() - '2 weeks'::interval); +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); ---- -1 -2 -3 +CREATE INDEX ON t1 (k); + # get workload index-recs with budget limit query T nosort SELECT workload_index_recs('42MB'); ---- -1 -2 -3 +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter and budget limit +query T nosort +SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); +---- +CREATE INDEX ON t1 (k); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +# Test for the new index "t1(k, i)" covering the previous one "t1(k)" +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['creation : CREATE INDEX t1_k_i ON t1(k, i)'], + '2023-07-05 15:10:14+00:00', + 'fp_4', + 'tfp_4', + 'ph_4', + 'app_4', + 4, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-07-05 15:10:10+00:00"}}'::JSONB, + 'null' +); + +# get workload index-recs +query T nosort +SELECT workload_index_recs(); +---- +CREATE INDEX ON t1 (i) STORING (k); +CREATE INDEX ON t1 (k, i); +DROP INDEX t1_i; + + +# get workload index-recs with time filter +query T nosort +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); +---- +CREATE INDEX ON t1 (k, i); + + +# get workload index-recs with budget limit +query T nosort +SELECT workload_index_recs('42MB'); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i) STORING (k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter and budget limit +query T nosort +SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); +---- +CREATE INDEX ON t1 (i) STORING (k); +CREATE INDEX ON t1 (k, i); +DROP INDEX t1_i; + + +# Test for the storing part "t1(i) storing (k)" covered by one index "t1(i, k)" +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['creation : CREATE INDEX t1_k_i ON t1(i, k)'], + '2023-07-05 15:10:15+00:00', + 'fp_5', + 'tfp_5', + 'ph_5', + 'app_5', + 5, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-07-05 15:10:10+00:00"}}'::JSONB, + 'null' +); + +# get workload index-recs +query T nosort +SELECT workload_index_recs(); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter +query T nosort +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); + + +# get workload index-recs with budget limit +query T nosort +SELECT workload_index_recs('42MB'); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter and budget limit +query T nosort +SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + + +# Test for duplicate DROP INDEX t1_i +statement ok +INSERT INTO system.statement_statistics ( + index_recommendations, + aggregated_ts, + fingerprint_id, + transaction_fingerprint_id, + plan_hash, + app_name, + node_id, + agg_interval, + metadata, + statistics, + plan +) +VALUES ( + ARRAY['replacement : CREATE INDEX t1_i2 ON t1(i) storing (k); DROP INDEX t1_i;'], + '2023-07-05 15:10:16+00:00', + 'fp_6', + 'tfp_6', + 'ph_6', + 'app_6', + 6, + '1 hr', + 'null', + '{"statistics": {"lastExecAt" : "2023-07-05 15:10:10+00:00"}}'::JSONB, + 'null' +); + +# get workload index-recs +query T nosort +SELECT workload_index_recs(); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + + +# get workload index-recs with time filter +query T nosort +SELECT workload_index_recs('2023-07-05 15:10:10+00:00'::TIMESTAMPTZ - '2 weeks'::interval); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + + +# get workload index-recs with budget limit +query T nosort +SELECT workload_index_recs('42MB'); +---- +CREATE INDEX ON t1 (k, i); +CREATE INDEX ON t1 (i, k); +DROP INDEX t1_i; + # get workload index-recs with time filter and budget limit query T nosort SELECT workload_index_recs('2023-06-13 10:10:10-05:00', '58GiB'); ---- -1 -2 -3 +CREATE INDEX ON t1 (i, k); +CREATE INDEX ON t1 (k, i); +DROP INDEX t1_i; diff --git a/pkg/sql/opt/workloadindexrec/BUILD.bazel b/pkg/sql/opt/workloadindexrec/BUILD.bazel new file mode 100644 index 000000000000..7ee61834d115 --- /dev/null +++ b/pkg/sql/opt/workloadindexrec/BUILD.bazel @@ -0,0 +1,20 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "workloadindexrec", + srcs = [ + "index_trie.go", + "workload_indexrecs.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/sql/opt/workloadindexrec", + visibility = ["//visibility:public"], + deps = [ + "//pkg/sql/parser", + "//pkg/sql/sem/eval", + "//pkg/sql/sem/tree", + "//pkg/sql/sessiondata", + ], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/sql/opt/workloadindexrec/index_trie.go b/pkg/sql/opt/workloadindexrec/index_trie.go new file mode 100644 index 000000000000..d7fd51f637cc --- /dev/null +++ b/pkg/sql/opt/workloadindexrec/index_trie.go @@ -0,0 +1,211 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package workloadindexrec + +import ( + "container/list" + + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" +) + +// TrieNode is an implementation of the node of a Trie-tree. +// +// TrieNode stores the indexed columns, storing columns, father node and the indexed +// column represented by the node (used for assign storings). +type TrieNode struct { + Children map[tree.IndexElem]*TrieNode + Storing map[string]bool + Fa *TrieNode + Col tree.IndexElem +} + +// Trie is an implementation of a Trie-tree specific for indexes of one table. +// +// Trie stores all the indexed columns in each node with/without storing columns, +// allowing insertion, removeStorings, assignStoring. Check the corresponding +// methods for more details. +type Trie struct { + Root *TrieNode +} + +// NewTrie returns a new trie tree +func NewTrie() *Trie { + return &Trie{ + Root: &TrieNode{ + Children: make(map[tree.IndexElem]*TrieNode), + Storing: make(map[string]bool), + Fa: nil, + Col: tree.IndexElem{}, + }, + } +} + +// Insert parses the columns in ci (CreateIndex) and updates the trie. +func (t *Trie) Insert(ci tree.CreateIndex) { + node := t.Root + for _, column := range ci.Columns { + if _, ok := node.Children[column]; !ok { + node.Children[column] = &TrieNode{ + Children: make(map[tree.IndexElem]*TrieNode), + Storing: make(map[string]bool), + Fa: node, + Col: column, + } + } + node = node.Children[column] + } + for _, column := range ci.Storing { + node.Storing[string(column)] = true + } +} + +// removeStorings removes those storings that are covered by the leaf nodes. +// It iterates the whole trie for each table by breadth-first search (BFS). +// Whenever there is a node with storing, it will invoke the removeStoringCoveredByLeaf +// to check whether there exists a leaf node covering its storing. +func removeStorings(tm map[tree.TableName]*Trie) { + queue := list.New() + for _, t := range tm { + queue.Init() + queue.PushBack(t.Root) + for queue.Len() > 0 { + node := queue.Front().Value.(*TrieNode) + queue.Remove(queue.Front()) + if len(node.Storing) > 0 { + if removeStoringCoveredByLeaf(node, node.Storing) { + node.Storing = make(map[string]bool) + } + } + for _, child := range node.Children { + queue.PushBack(child) + } + } + } +} + +// removeStoringCoveredByLeaf checks whether the storing is covered by the leaf nodes +// by depth-first search (DFS). +func removeStoringCoveredByLeaf(node *TrieNode, restStoring map[string]bool) bool { + // nothing else we need to cover for the storing + // even if we have not reached the leaf node. + if len(restStoring) == 0 { + return true + } + + // leaf node + if len(node.Children) == 0 { + return false + } + + for indexCol, child := range node.Children { + // delete the element covered by the child + var found = false + if _, ok := restStoring[string(indexCol.Column)]; ok { + found = true + delete(restStoring, string(indexCol.Column)) + } + + if removeStoringCoveredByLeaf(child, restStoring) { + return true + } + + // recover the deleted element so that we can reuse the restStoring + // for all the children + if found { + restStoring[string(indexCol.Column)] = true + } + } + + return false +} + +// assignStoring assigns the storings for all the tables in tm, see +// assignStoringToShallowestLeaf for its detailed functionality. +func assignStoring(tm map[tree.TableName]*Trie) { + for _, t := range tm { + assignStoringToShallowestLeaf(t.Root, 0) + } +} + +// assignStoringToShallowestLeaf assign the storing of each node to +// the shallowest leaf node inside its subtree. +func assignStoringToShallowestLeaf(node *TrieNode, curDep int16) (*TrieNode, int16) { + if len(node.Children) == 0 { + return node, curDep + } + + var shallowLeaf *TrieNode + // largest depth + var dep int16 = (1 << 15) - 1 + for _, child := range node.Children { + tempLeaf, tempDep := assignStoringToShallowestLeaf(child, curDep+1) + if tempDep < dep { + dep = tempDep + shallowLeaf = tempLeaf + } + } + + if len(node.Storing) > 0 { + // Assign the storing of node to the shallowLeaf, some columns + // may be covered along the path from node to the shallowLeaf + var tempNode = shallowLeaf + for tempNode != node { + delete(node.Storing, string(tempNode.Col.Column)) + tempNode = tempNode.Fa + } + + if len(node.Storing) > 0 { + for col := range node.Storing { + tempNode.Storing[col] = true + } + } + } + + return shallowLeaf, dep +} + +// collectAllLeaves4Tables collects all the indexes represented by the leaf +// nodes of all the tries in tm. +func collectAllLeaves4Tables(tm map[tree.TableName]*Trie) []tree.CreateIndex { + var cis []tree.CreateIndex + for table, trie := range tm { + var temp []tree.CreateIndex + collectAllLeaves(trie.Root, &temp, table, []tree.IndexElem{}) + cis = append(cis, temp...) + } + return cis +} + +// collectAllLeaves collects all the indexes represented by the leaf nodes recursively +func collectAllLeaves( + node *TrieNode, cis *[]tree.CreateIndex, tableName tree.TableName, indexedCols []tree.IndexElem, +) { + if len(node.Children) == 0 { + storingCols := make([]tree.Name, len(node.Storing)) + var idx = 0 + for storingCol := range node.Storing { + storingCols[idx] = tree.Name(storingCol) + idx++ + } + *cis = append(*cis, tree.CreateIndex{ + Table: tableName, + Columns: indexedCols, + Storing: storingCols, + Unique: false, + Inverted: false, + }) + return + } + + for indexCol, child := range node.Children { + collectAllLeaves(child, cis, tableName, append(indexedCols, indexCol)) + } +} diff --git a/pkg/sql/opt/workloadindexrec/workload_indexrecs.go b/pkg/sql/opt/workloadindexrec/workload_indexrecs.go new file mode 100644 index 000000000000..6506e5071e25 --- /dev/null +++ b/pkg/sql/opt/workloadindexrec/workload_indexrecs.go @@ -0,0 +1,154 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package workloadindexrec + +import ( + "context" + "fmt" + "regexp" + + "github.com/cockroachdb/cockroach/pkg/sql/parser" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" +) + +// FindWorkloadRecs finds index recommendations for the whole workload after the timestamp ts +// within the space budget represented by budgetBytes. +func FindWorkloadRecs( + ctx context.Context, evalCtx *eval.Context, ts *tree.DTimestampTZ, budgetBytes int64, +) ([]string, error) { + cis, dis, err := collectIndexRecs(ctx, evalCtx, ts) + if err != nil { + return nil, err + } + + trieMap := buildTrieForIndexRecs(cis) + newCis := extractIndexCovering(trieMap) + + var res = make([]string, len(newCis)) + + var idx = 0 + for _, ci := range newCis { + res[idx] = ci.String() + ";" + idx++ + } + + // Since we collect all the indexes represented by the leaf nodes, all the indexes with + // "DROP INDEX" has been covered, so we can directly drop all of them without duplicates. + var disMap = make(map[tree.TableIndexName]bool) + for _, di := range dis { + for _, index := range di.IndexList { + disMap[*index] = true + } + } + + for index := range disMap { + dropCmd := tree.DropIndex{ + IndexList: []*tree.TableIndexName{&index}, + } + res = append(res, dropCmd.String()+";") + } + + return res, nil +} + +// collectIndexRecs collects all the index recommendations stored in the system.statement_statistics +// with the time later than ts. +func collectIndexRecs( + ctx context.Context, evalCtx *eval.Context, ts *tree.DTimestampTZ, +) ([]tree.CreateIndex, []tree.DropIndex, error) { + query := `SELECT index_recommendations FROM system.statement_statistics + WHERE (statistics -> 'statistics' ->> 'lastExecAt')::TIMESTAMPTZ > $1 + AND array_length(index_recommendations, 1) > 0;` + indexRecs, err := evalCtx.Planner.QueryIteratorEx(ctx, "get-candidates-for-workload-indexrecs", + sessiondata.NoSessionDataOverride, query, ts.Time) + if err != nil { + return nil, nil, err + } + + // Since Alter index only makes invisible indexes visible, skip it for now. + var p parser.Parser + var cis []tree.CreateIndex + var dis []tree.DropIndex + var ok bool + + // The index recommendation starts with "creation", "replacement" or "alteration" + var r = regexp.MustCompile(`\s*(creation|replacement|alteration)\s*:\s*(.*)`) + + for ok, err = indexRecs.Next(ctx); ok; ok, err = indexRecs.Next(ctx) { + if !ok || err != nil { + continue + } + + indexes := tree.MustBeDArray(indexRecs.Cur()[0]) + for _, index := range indexes.Array { + indexStr, ok := index.(*tree.DString) + if !ok { + fmt.Println(index.String() + " is not a string!") + continue + } + + indexStrArr := r.FindStringSubmatch(string(*indexStr)) + if indexStrArr == nil { + fmt.Println(string(*indexStr) + " is not a valid index recommendation!") + continue + } + + // Ignore all the alter index recommendations right now + if indexStrArr[1] == "alteration" { + continue + } + + stmts, err := p.Parse(indexStrArr[2]) + if err != nil { + fmt.Println(indexStrArr[2] + " is not a valid index operation!") + continue + } + + for _, stmt := range stmts { + switch stmt := stmt.AST.(type) { + case *tree.CreateIndex: + // ignore all the inverted, partial and sharded indexes right now + if !stmt.Inverted && stmt.Predicate == nil && stmt.Sharded == nil { + cis = append(cis, *stmt) + } + case *tree.DropIndex: + dis = append(dis, *stmt) + } + } + } + } + + return cis, dis, nil +} + +// buildTrieForIndexRecs builds the relation among all the indexRecs by a trie tree. +func buildTrieForIndexRecs(cis []tree.CreateIndex) map[tree.TableName]*Trie { + trieMap := make(map[tree.TableName]*Trie) + for _, ci := range cis { + if _, ok := trieMap[ci.Table]; !ok { + trieMap[ci.Table] = NewTrie() + } + + trieMap[ci.Table].Insert(ci) + } + return trieMap +} + +// extractIndexCovering pushs down the storing part of the internal nodes: find whether it +// is covered by some leaf nodes. If yes, discard it; Otherwise, assign it to the shallowest +// leaf node. Then extractIndexCovering collects all the indexes represented by the leaf node. +func extractIndexCovering(tm map[tree.TableName]*Trie) []tree.CreateIndex { + removeStorings(tm) + assignStoring(tm) + return collectAllLeaves4Tables(tm) +} diff --git a/pkg/sql/sem/builtins/BUILD.bazel b/pkg/sql/sem/builtins/BUILD.bazel index 7685e2e0bb85..047084cc96c7 100644 --- a/pkg/sql/sem/builtins/BUILD.bazel +++ b/pkg/sql/sem/builtins/BUILD.bazel @@ -68,6 +68,7 @@ go_library( "//pkg/sql/lexbase", "//pkg/sql/memsize", "//pkg/sql/oidext", + "//pkg/sql/opt/workloadindexrec", "//pkg/sql/parser", "//pkg/sql/pgwire/pgcode", "//pkg/sql/pgwire/pgerror", diff --git a/pkg/sql/sem/builtins/generator_builtins.go b/pkg/sql/sem/builtins/generator_builtins.go index 29baf7d7f409..3eceae19e38c 100644 --- a/pkg/sql/sem/builtins/generator_builtins.go +++ b/pkg/sql/sem/builtins/generator_builtins.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/sql/lexbase" + "github.com/cockroachdb/cockroach/pkg/sql/opt/workloadindexrec" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/protoreflect" @@ -34,6 +35,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/arith" "github.com/cockroachdb/cockroach/pkg/util/duration" + "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/cockroach/pkg/util/randident" @@ -282,21 +284,21 @@ var generators = map[string]builtinDefinition{ makeGeneratorOverload( tree.ParamTypes{}, types.String, - makeWorkloadIndexRecsGeneratorFactory(false, false), + makeWorkloadIndexRecsGeneratorFactory(-1, -1), "Returns set of index recommendations", volatility.Immutable, ), makeGeneratorOverload( tree.ParamTypes{{Name: "timestamptz", Typ: types.TimestampTZ}}, types.String, - makeWorkloadIndexRecsGeneratorFactory(true, false), + makeWorkloadIndexRecsGeneratorFactory(0, -1), "Returns set of index recommendations", volatility.Immutable, ), makeGeneratorOverload( tree.ParamTypes{{Name: "budget", Typ: types.String}}, types.String, - makeWorkloadIndexRecsGeneratorFactory(false, true), + makeWorkloadIndexRecsGeneratorFactory(-1, 0), "Returns set of index recommendations", volatility.Immutable, ), @@ -306,7 +308,7 @@ var generators = map[string]builtinDefinition{ {Name: "budget", Typ: types.String}, }, types.String, - makeWorkloadIndexRecsGeneratorFactory(true, true), + makeWorkloadIndexRecsGeneratorFactory(0, 1), "Returns set of index recommendations", volatility.Immutable, ), @@ -1117,14 +1119,38 @@ func (s *multipleArrayValueGenerator) Values() (tree.Datums, error) { } // makeWorkloadIndexRecsGeneratorFactory uses the arrayValueGenerator to -// return all the index recommendations as an array of strings -func makeWorkloadIndexRecsGeneratorFactory(hasTs bool, hasBgt bool) eval.GeneratorOverload { - return func(_ context.Context, _ *eval.Context, _ tree.Datums) (eval.ValueGenerator, error) { - // Invoke the workloadindexrec.FindWorkloadRecs() to get indexRecs, err once it is implemented. - indexRecs := []string{"1", "2", "3"} +// return all the index recommendations as an array of strings. +// tsId, bgtId mean the index for the arguments +func makeWorkloadIndexRecsGeneratorFactory(tsId int, bgtId int) eval.GeneratorOverload { + return func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (eval.ValueGenerator, error) { + var ts tree.DTimestampTZ + var budget int64 + var err error + + if tsId != -1 { + ts = tree.MustBeDTimestampTZ(args[tsId]) + } else { + ts = tree.DTimestampTZ{} + } + + if bgtId != -1 { + budget, err = humanizeutil.ParseBytes(string(tree.MustBeDString(args[bgtId]))) + if err != nil { + return &arrayValueGenerator{}, err + } + } else { + budget = (1 << 63) - 1 + } + + var indexRecs []string + indexRecs, err = workloadindexrec.FindWorkloadRecs(ctx, evalCtx, &ts, budget) + if err != nil { + return &arrayValueGenerator{}, err + } + arr := tree.NewDArray(types.String) for _, indexRec := range indexRecs { - if err := arr.Append(tree.NewDString(indexRec)); err != nil { + if err = arr.Append(tree.NewDString(indexRec)); err != nil { return nil, err } }