Skip to content

Commit

Permalink
planner: simplify the bindinfo package (#58548)
Browse files Browse the repository at this point in the history
ref #51347
  • Loading branch information
qw4990 authored Dec 26, 2024
1 parent 30d8684 commit 77c6d1b
Show file tree
Hide file tree
Showing 22 changed files with 708 additions and 902 deletions.
16 changes: 4 additions & 12 deletions pkg/bindinfo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,12 @@ go_library(
srcs = [
"binding.go",
"binding_cache.go",
"binding_match.go",
"global_handle.go",
"session_handle.go",
],
importpath = "github.com/pingcap/tidb/pkg/bindinfo",
visibility = ["//visibility:public"],
deps = [
"//pkg/bindinfo/internal/logutil",
"//pkg/bindinfo/norm",
"//pkg/kv",
"//pkg/metrics",
"//pkg/parser",
Expand All @@ -31,6 +28,7 @@ go_library(
"//pkg/util/chunk",
"//pkg/util/hack",
"//pkg/util/hint",
"//pkg/util/logutil",
"//pkg/util/parser",
"//pkg/util/sqlexec",
"@com_github_dgraph_io_ristretto//:ristretto",
Expand All @@ -46,32 +44,26 @@ go_test(
timeout = "moderate",
srcs = [
"binding_cache_test.go",
"binding_match_test.go",
"cross_db_binding_test.go",
"global_handle_test.go",
"main_test.go",
"optimize_test.go",
"session_handle_test.go",
],
embed = [":bindinfo"],
flaky = True,
race = "on",
shard_count = 34,
shard_count = 31,
deps = [
"//pkg/bindinfo/internal",
"//pkg/bindinfo/norm",
"//pkg/parser",
"//pkg/parser/ast",
"//pkg/parser/auth",
"//pkg/parser/format",
"//pkg/parser/mysql",
"//pkg/parser/terror",
"//pkg/server",
"//pkg/session/types",
"//pkg/sessionctx/variable",
"//pkg/testkit",
"//pkg/testkit/testsetup",
"//pkg/types",
"//pkg/util",
"//pkg/util/parser",
"//pkg/util/stmtsummary",
"@com_github_ngaut_pools//:pools",
"@com_github_pingcap_failpoint//:failpoint",
Expand Down
284 changes: 284 additions & 0 deletions pkg/bindinfo/binding.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
package bindinfo

import (
"strings"
"sync"
"unsafe"

"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/hint"
utilparser "github.com/pingcap/tidb/pkg/util/parser"
"github.com/pkg/errors"
)

Expand Down Expand Up @@ -82,6 +86,182 @@ func (b *Binding) size() float64 {
return float64(res)
}

var (
// GetGlobalBindingHandle is a function to get the global binding handle.
// It is mainly used to resolve cycle import issue.
GetGlobalBindingHandle func(sctx sessionctx.Context) GlobalBindingHandle
)

// BindingMatchInfo records necessary information for cross-db binding matching.
// This is mainly for plan cache to avoid normalizing the same statement repeatedly.
type BindingMatchInfo struct {
NoDBDigest string
TableNames []*ast.TableName
}

// MatchSQLBindingForPlanCache matches binding for plan cache.
func MatchSQLBindingForPlanCache(sctx sessionctx.Context, stmtNode ast.StmtNode, info *BindingMatchInfo) (bindingSQL string, ignoreBinding bool) {
binding, matched, _ := matchSQLBinding(sctx, stmtNode, info)
if matched {
bindingSQL = binding.BindSQL
ignoreBinding = binding.Hint.ContainTableHint(hint.HintIgnorePlanCache)
}
return
}

// MatchSQLBinding returns the matched binding for this statement.
func MatchSQLBinding(sctx sessionctx.Context, stmtNode ast.StmtNode) (binding *Binding, matched bool, scope string) {
return matchSQLBinding(sctx, stmtNode, nil)
}

func matchSQLBinding(sctx sessionctx.Context, stmtNode ast.StmtNode, info *BindingMatchInfo) (binding *Binding, matched bool, scope string) {
useBinding := sctx.GetSessionVars().UsePlanBaselines
if !useBinding || stmtNode == nil {
return
}
// When the domain is initializing, the bind will be nil.
if sctx.Value(SessionBindInfoKeyType) == nil {
return
}

// record the normalization result into info to avoid repeat normalization next time.
var noDBDigest string
var tableNames []*ast.TableName
if info == nil || info.TableNames == nil || info.NoDBDigest == "" {
_, noDBDigest = NormalizeStmtForBinding(stmtNode, WithoutDB(true))
tableNames = CollectTableNames(stmtNode)
if info != nil {
info.NoDBDigest = noDBDigest
info.TableNames = tableNames
}
} else {
noDBDigest = info.NoDBDigest
tableNames = info.TableNames
}

sessionHandle := sctx.Value(SessionBindInfoKeyType).(SessionBindingHandle)
if binding, matched := sessionHandle.MatchSessionBinding(sctx, noDBDigest, tableNames); matched {
return binding, matched, metrics.ScopeSession
}
globalHandle := GetGlobalBindingHandle(sctx)
if globalHandle == nil {
return
}
binding, matched = globalHandle.MatchGlobalBinding(sctx, noDBDigest, tableNames)
if matched {
return binding, matched, metrics.ScopeGlobal
}

return
}

func noDBDigestFromBinding(binding *Binding) (string, error) {
p := parser.New()
stmt, err := p.ParseOneStmt(binding.BindSQL, binding.Charset, binding.Collation)
if err != nil {
return "", err
}
_, bindingNoDBDigest := NormalizeStmtForBinding(stmt, WithoutDB(true))
return bindingNoDBDigest, nil
}

func crossDBMatchBindings(sctx sessionctx.Context, tableNames []*ast.TableName, bindings []*Binding) (matchedBinding *Binding, isMatched bool) {
leastWildcards := len(tableNames) + 1
enableCrossDBBinding := sctx.GetSessionVars().EnableFuzzyBinding
for _, binding := range bindings {
numWildcards, matched := crossDBMatchBindingTableName(sctx.GetSessionVars().CurrentDB, tableNames, binding.TableNames)
if matched && numWildcards > 0 && sctx != nil && !enableCrossDBBinding {
continue // cross-db binding is disabled, skip this binding
}
if matched && numWildcards < leastWildcards {
matchedBinding = binding
isMatched = true
leastWildcards = numWildcards
}
}
return
}

func crossDBMatchBindingTableName(currentDB string, stmtTableNames, bindingTableNames []*ast.TableName) (numWildcards int, matched bool) {
if len(stmtTableNames) != len(bindingTableNames) {
return 0, false
}
for i := range stmtTableNames {
if stmtTableNames[i].Name.L != bindingTableNames[i].Name.L {
return 0, false
}
if bindingTableNames[i].Schema.L == "*" {
numWildcards++
}
if bindingTableNames[i].Schema.L == stmtTableNames[i].Schema.L || // exactly same, or
(stmtTableNames[i].Schema.L == "" && bindingTableNames[i].Schema.L == strings.ToLower(currentDB)) || // equal to the current DB, or
bindingTableNames[i].Schema.L == "*" { // cross-db match successfully
continue
}
return 0, false
}
return numWildcards, true
}

// isCrossDBBinding checks whether the stmtNode is a cross-db binding.
func isCrossDBBinding(stmt ast.Node) bool {
for _, t := range CollectTableNames(stmt) {
if t.Schema.L == "*" {
return true
}
}
return false
}

// CollectTableNames gets all table names from ast.Node.
// This function is mainly for binding cross-db matching.
// ** the return is read-only.
// For example:
//
// `select * from t1 where a < 1` --> [t1]
// `select * from db1.t1, t2 where a < 1` --> [db1.t1, t2]
//
// You can see more example at the TestExtractTableName.
func CollectTableNames(in ast.Node) []*ast.TableName {
collector := tableNameCollectorPool.Get().(*tableNameCollector)
defer func() {
collector.tableNames = nil
tableNameCollectorPool.Put(collector)
}()
in.Accept(collector)
return collector.tableNames
}

var tableNameCollectorPool = sync.Pool{
New: func() any {
return newCollectTableName()
},
}

type tableNameCollector struct {
tableNames []*ast.TableName
}

func newCollectTableName() *tableNameCollector {
return &tableNameCollector{
tableNames: make([]*ast.TableName, 0, 4),
}
}

// Enter implements Visitor interface.
func (c *tableNameCollector) Enter(in ast.Node) (out ast.Node, skipChildren bool) {
if node, ok := in.(*ast.TableName); ok {
c.tableNames = append(c.tableNames, node)
return in, true
}
return in, false
}

// Leave implements Visitor interface.
func (*tableNameCollector) Leave(in ast.Node) (out ast.Node, ok bool) {
return in, true
}

// prepareHints builds ID and Hint for Bindings. If sctx is not nil, we check if
// the BindSQL is still valid.
func prepareHints(sctx sessionctx.Context, binding *Binding) (rerr error) {
Expand Down Expand Up @@ -186,3 +366,107 @@ func pickCachedBinding(cachedBinding *Binding, bindingsFromStorage ...*Binding)
// should only have one binding.
return bindings[0]
}

type option struct {
specifiedDB string
noDB bool
}

type optionFunc func(*option)

// WithoutDB specifies whether to eliminate schema names.
func WithoutDB(noDB bool) optionFunc {
return func(user *option) {
user.noDB = noDB
}
}

// WithSpecifiedDB specifies the specified DB name.
func WithSpecifiedDB(specifiedDB string) optionFunc {
return func(user *option) {
user.specifiedDB = specifiedDB
}
}

// NormalizeStmtForBinding normalizes a statement for binding.
// when noDB is false, schema names will be completed automatically: `select * from t` --> `select * from db . t`.
// when noDB is true, schema names will be eliminated automatically: `select * from db . t` --> `select * from t`.
func NormalizeStmtForBinding(stmtNode ast.StmtNode, options ...optionFunc) (normalizedStmt, exactSQLDigest string) {
opt := &option{}
for _, option := range options {
option(opt)
}
return normalizeStmt(stmtNode, opt.specifiedDB, opt.noDB)
}

// NormalizeStmtForBinding normalizes a statement for binding.
// This function skips Explain automatically, and literals in in-lists will be normalized as '...'.
// For normal bindings, DB name will be completed automatically:
//
// e.g. `select * from t where a in (1, 2, 3)` --> `select * from test.t where a in (...)`
func normalizeStmt(stmtNode ast.StmtNode, specifiedDB string, noDB bool) (normalizedStmt, sqlDigest string) {
normalize := func(n ast.StmtNode) (normalizedStmt, sqlDigest string) {
eraseLastSemicolon(n)
var digest *parser.Digest
var normalizedSQL string
if !noDB {
normalizedSQL = utilparser.RestoreWithDefaultDB(n, specifiedDB, n.Text())
} else {
normalizedSQL = utilparser.RestoreWithoutDB(n)
}
normalizedStmt, digest = parser.NormalizeDigestForBinding(normalizedSQL)
return normalizedStmt, digest.String()
}

switch x := stmtNode.(type) {
case *ast.ExplainStmt:
// This function is only used to find bind record.
// For some SQLs, such as `explain select * from t`, they will be entered here many times,
// but some of them do not want to obtain bind record.
// The difference between them is whether len(x.Text()) is empty. They cannot be distinguished by stmt.restore.
// For these cases, we need return "" as normalize SQL and hash.
if len(x.Text()) == 0 {
return "", ""
}
switch x.Stmt.(type) {
case *ast.SelectStmt, *ast.DeleteStmt, *ast.UpdateStmt, *ast.InsertStmt:
normalizeSQL, digest := normalize(x.Stmt)
return normalizeSQL, digest
case *ast.SetOprStmt:
normalizeExplainSQL, _ := normalize(x)

idx := strings.Index(normalizeExplainSQL, "select")
parenthesesIdx := strings.Index(normalizeExplainSQL, "(")
if parenthesesIdx != -1 && parenthesesIdx < idx {
idx = parenthesesIdx
}
// If the SQL is `EXPLAIN ((VALUES ROW ()) ORDER BY 1);`, the idx will be -1.
if idx == -1 {
hash := parser.DigestNormalized(normalizeExplainSQL)
return normalizeExplainSQL, hash.String()
}
normalizeSQL := normalizeExplainSQL[idx:]
hash := parser.DigestNormalized(normalizeSQL)
return normalizeSQL, hash.String()
}
case *ast.SelectStmt, *ast.SetOprStmt, *ast.DeleteStmt, *ast.UpdateStmt, *ast.InsertStmt:
// This function is only used to find bind record.
// For some SQLs, such as `explain select * from t`, they will be entered here many times,
// but some of them do not want to obtain bind record.
// The difference between them is whether len(x.Text()) is empty. They cannot be distinguished by stmt.restore.
// For these cases, we need return "" as normalize SQL and hash.
if len(x.Text()) == 0 {
return "", ""
}
normalizedSQL, digest := normalize(x)
return normalizedSQL, digest
}
return "", ""
}

func eraseLastSemicolon(stmt ast.StmtNode) {
sql := stmt.Text()
if len(sql) > 0 && sql[len(sql)-1] == ';' {
stmt.SetText(nil, sql[:len(sql)-1])
}
}
3 changes: 1 addition & 2 deletions pkg/bindinfo/binding_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"sync"

"github.com/dgraph-io/ristretto"
"github.com/pingcap/tidb/pkg/bindinfo/norm"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/sessionctx"
Expand Down Expand Up @@ -215,7 +214,7 @@ func (c *bindingCache) SetBinding(sqlDigest string, binding *Binding) (err error
if err != nil {
return err
}
_, noDBDigest := norm.NormalizeStmtForBinding(stmt, norm.WithoutDB(true))
_, noDBDigest := NormalizeStmtForBinding(stmt, WithoutDB(true))
c.digestBiMap.Add(noDBDigest, sqlDigest)
// NOTE: due to LRU eviction, the underlying BindingCache state might be inconsistent with digestBiMap,
// but it's acceptable, the optimizer will load the binding when cache-miss.
Expand Down
Loading

0 comments on commit 77c6d1b

Please sign in to comment.