Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: testing the ExtractFD just after the plan building phase #6

Open
wants to merge 3 commits into
base: functional-dependency
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions planner/core/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ type LogicalProjection struct {
// ExtractFD implements the logical plan interface, extracting the FD from bottom up.
func (p *LogicalProjection) ExtractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := p.logicalSchemaProducer.ExtractFD()
fds := &fd.FDSet{}
fds.CopyFrom(p.logicalSchemaProducer.ExtractFD())
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
Expand Down Expand Up @@ -458,7 +459,8 @@ func (la *LogicalAggregation) HasOrderBy() bool {
// depend on logicalAgg.ExtractFD() to finish the only_full_group_by checking problem rather than by 1 & 2.
func (la *LogicalAggregation) ExtractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := la.logicalSchemaProducer.ExtractFD()
fds := &fd.FDSet{}
fds.CopyFrom(la.logicalSchemaProducer.ExtractFD())
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
Expand Down Expand Up @@ -728,7 +730,8 @@ func extractEquivalenceCols(Conditions []expression.Expression, p LogicalPlan, f

func (p *LogicalSelection) ExtractFD() *fd.FDSet {
// basically extract the children's fdSet.
fds := p.baseLogicalPlan.ExtractFD()
fds := &fd.FDSet{}
fds.CopyFrom(p.baseLogicalPlan.ExtractFD())
// collect the output columns' unique ID.
outputColsUniqueIDs := fd.NewFastIntSet()
notnullColsUniqueIDs := fd.NewFastIntSet()
Expand Down
2 changes: 2 additions & 0 deletions planner/core/stringer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ func fdToString(in LogicalPlan, strs []string, idxs []int) ([]string, []int) {
}
case *DataSource:
strs = append(strs, "{"+x.fdSet.String()+"}")
case *LogicalSelection:
strs = append(strs, "{"+x.fdSet.String()+"}")
default:
}
return strs, idxs
Expand Down
102 changes: 57 additions & 45 deletions planner/functional_dependency/extract_fd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ func testGetIS(ass *assert.Assertions, ctx sessionctx.Context) infoschema.InfoSc
}

func TestFDSet_ExtractFD(t *testing.T) {
t.Parallel()
ass := assert.New(t)

store, clean := testkit.CreateMockStore(t)
Expand All @@ -34,10 +33,12 @@ func TestFDSet_ExtractFD(t *testing.T) {

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("set sql_mode=''")
tk.MustExec("create table t1(a int key, b int, c int, unique(b,c))")
tk.MustExec("create table t2(m int key, n int, p int, unique(m,n))")
tk.MustExec("create table x1(a int not null primary key, b int not null, c int default null, d int not null, unique key I_b_c (b,c), unique key I_b_d (b,d))")
tk.MustExec("create table x2(a int not null primary key, b int not null, c int default null, d int not null, unique key I_b_c (b,c), unique key I_b_d (b,d))")
tk.MustExec("create table test_expr (a varchar(12));")

tests := []struct {
sql string
Expand Down Expand Up @@ -76,145 +77,156 @@ func TestFDSet_ExtractFD(t *testing.T) {
{
sql: "select b+1, sum(a) from t1 group by(b)",
// since b is projected out, b --> b+1 and b ~~> sum(a) is eliminated.
best: "DataScan(t1)->Aggr(sum(test.t1.a),firstrow(test.t1.b))->Projection",
fd: "{(1)-->(2,3), (2,3)~~>(1)} >>> {(2)~~>(4)} >>> {(2)~~>(4), (2)-->(5)}",
best: "DataScan(t1)->Aggr(sum(test.t1.a),firstrow(test.t1.a),firstrow(test.t1.b),firstrow(test.t1.c))->Projection",
fd: "{(1)-->(2,3), (2,3)~~>(1)} >>> {(1)-->(2,3), (2,3)~~>(1), (2)~~>(1,3,4)} >>> {(2)-->(5)}",
},
{
sql: "select b+1, b, sum(a) from t1 group by(b)",
best: "DataScan(t1)->Aggr(sum(test.t1.a),firstrow(test.t1.b))->Projection",
fd: "{(1)-->(2,3), (2,3)~~>(1)} >>> {(2)~~>(4)} >>> {(2)~~>(4), (2)-->(5)}",
best: "DataScan(t1)->Aggr(sum(test.t1.a),firstrow(test.t1.a),firstrow(test.t1.b),firstrow(test.t1.c))->Projection",
fd: "{(1)-->(2,3), (2,3)~~>(1)} >>> {(1)-->(2,3), (2,3)~~>(1), (2)~~>(1,3,4)} >>> {(2)-->(5)}",
},
// test for table x1 and x2
{
sql: "select a from x1 group by a,b,c",
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {}",
},
{
sql: "select b from x1 group by b",
best: "DataScan(x1)->Aggr(firstrow(test.x1.b))->Projection",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
// b --> b is natural existed, so it won't exist in fd.
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {}",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2)~~>(1,3,4)} >>> {}",
},
{
sql: "select b as e from x1 group by b",
best: "DataScan(x1)->Aggr(firstrow(test.x1.b))->Projection",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
// b --> b is naturally existed, so it won't exist in fd.
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {}",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2)~~>(1,3,4)} >>> {}",
},
{
sql: "select b+c from x1 group by b+c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.b),firstrow(test.x1.c))->Projection",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
// avoid allocating unique ID 7 from fd temporarily, and substituted by unique ID 5
// attention:
// b+c is an expr assigned with new plan ID when building upper-layer projection.
// when extracting FD after build phase is done, we should be able to recognize a+b in lower-layer group by item with the same unique ID.
// that's why we introduce session variable MapHashCode2UniqueID4ExtendedCol in.
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)-->(5), (5)~~>(2,3)} >>> {(2,3)-->(5), (5)~~>(2,3)}",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2,3)-->(5), (5)~~>(1-4)} >>> {(2,3)-->(5), (5)~~>(2,3)}",
},
{
sql: "select b+c, min(a) from x1 group by b+c, b-c",
best: "DataScan(x1)->Aggr(min(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)-->(6,8), (6,8)~~>(2,3,5)} >>> {(2,3)-->(6)}",
best: "DataScan(x1)->Aggr(min(test.x1.a),firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2,3)-->(6,7), (6,7)~~>(1-5)} >>> {(2,3)-->(6)}",
},
{
sql: "select b+c, min(a) from x1 group by b, c",
best: "DataScan(x1)->Aggr(min(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(5)} >>> {(2,3)~~>(5), (2,3)-->(6)}",
best: "DataScan(x1)->Aggr(min(test.x1.a),firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2,3)~~>(1,4,5)} >>> {(2,3)~~>(5), (2,3)-->(6)}",
},
{
sql: "select b+c from x1 group by b,c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.b),firstrow(test.x1.c))->Projection",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
// b --> b is naturally existed, so it won't exist in fd.
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {(2,3)-->(5)}",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)-->(1,4), (2,4)-->(1,3)} >>> {(2,3)-->(5)}",
},
{
sql: "select case b when 1 then c when 2 then d else d end from x1 group by b,c,d",
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
},
{
// scalar sub query will be substituted with constant datum.
sql: "select c > (select b from x1) from x1 group by c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.c))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {(3)-->(15)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (3)~~>(1,2,4)} >>> {(3)-->(15)}",
},
{
sql: "select exists (select * from x1) from x1 group by d",
best: "DataScan(x1)->Aggr(firstrow(1))->Projection",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
// 14 is added in the logicAgg pruning process cause all the columns of agg has been pruned.
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(4)~~>(14)} >>> {()-->(13)}",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (4)~~>(1-3)} >>> {()-->(13)}",
},
{
sql: "select c is null from x1 group by c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.c))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {(3)-->(5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (3)~~>(1,2,4)} >>> {(3)-->(5)}",
},
{
sql: "select c is true from x1 group by c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.c))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {(3)-->(5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (3)~~>(1,2,4)} >>> {(3)-->(5)}",
},
{
sql: "select (c+b)*d from x1 group by c,b,d",
// agg elimination.
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
},
{
sql: "select b in (c,d) from x1 group by b,c,d",
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(2,3)~~>(4), (2,4)-->(3,5)}",
},
{
sql: "select b like '%a' from x1 group by b",
best: "DataScan(x1)->Aggr(firstrow(test.x1.b))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {} >>> {(2)-->(5)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3), (2)~~>(1,3,4)} >>> {(2)-->(5)}",
},
// test functional dependency on primary key
{
sql: "select * from x1 group by a",
// agg eliminated by primary key.
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)}",
},
// test functional dependency on unique key with not null
{
sql: "select * from x1 group by b,d",
best: "DataScan(x1)->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)}",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
fd: "{(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)} >>> {(1)-->(2-4), (2,3)~~>(1,4), (2,4)-->(1,3)}",
},
// test functional dependency derived from keys in where condition
{
sql: "select * from x1 where c = d group by b, c",
best: "DataScan(x1)->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
best: "DataScan(x1)->Sel([eq(test.x1.c, test.x1.d)])->Aggr(firstrow(test.x1.a),firstrow(test.x1.b),firstrow(test.x1.c),firstrow(test.x1.d))->Projection",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seem need to show 4 item fd here? A>>>B>>>C>>>D

// c = d derives:
// 1: c and d are not null, make lax FD (2,3)~~>(1,4) to be strict one.
// 2: c and d are equivalent.
fd: "{(1)-->(2-4), (2,3)-->(1,4), (2,4)-->(1,3), (3,4)==(3,4)} >>> {(1)-->(2-4), (2,3)-->(1,4), (2,4)-->(1,3), (3,4)==(3,4)} >>> {(1)-->(2-4), (2,3)-->(1,4), (2,4)-->(1,3), (3,4)==(3,4)}",
},
// Test https://github.com/pingcap/tidb/issues/27723
{
sql: "select substr(a, 12, 1) from test_expr group by SUBSTR(a, 12, 1)",
best: "DataScan(test_expr)->Aggr(firstrow(test.test_expr.a),firstrow(test.test_expr._tidb_rowid))->Projection",
fd: "{} >>> {(1)-->(3), (3)~~>(1,2)} >>> {(1)-->(3), (3)~~>(1)}",
},
// Test https://github.com/pingcap/tidb/issues/25199
{
sql: "SELECT c FROM(SELECT d c, d e FROM(SELECT 2 d FROM t1) f) g GROUP BY e;",
best: "DataScan(t1)->Projection->Projection->Aggr(firstrow(Column#4),firstrow(Column#4))->Projection",
fd: "{(1)-->(2,3), (2,3)~~>(1)} >>> {()-->(4)} >>> {()-->(4)} >>> {()-->(4)} >>> {()-->(4)}",
},
}

ctx := context.TODO()
is := testGetIS(ass, tk.Session())
for i, tt := range tests {
comment := fmt.Sprintf("case:%v sql:%s", i, tt.sql)
stmt, err := par.ParseOneStmt(tt.sql, "", "")
ass.Nil(err, comment)
ass.NoError(err, comment)
tk.Session().GetSessionVars().PlanID = 0
tk.Session().GetSessionVars().PlanColumnID = 0
err = plannercore.Preprocess(tk.Session(), stmt, plannercore.WithPreprocessorReturn(&plannercore.PreprocessorReturn{InfoSchema: is}))
ass.Nil(err)
ass.NoError(err)
tk.Session().PrepareTSFuture(ctx)
builder, _ := plannercore.NewPlanBuilder().Init(tk.Session(), is, &hint.BlockHintProcessor{})
// extract FD to every OP
p, err := builder.Build(ctx, stmt)
ass.Nil(err)
p, err = plannercore.LogicalOptimizeTest(ctx, builder.GetOptFlag(), p.(plannercore.LogicalPlan))
ass.Nil(err)
ass.NoError(err)
ass.Equal(tt.best, plannercore.ToString(p), comment)
// extract FD to every OP
p.(plannercore.LogicalPlan).ExtractFD()
comment = comment + fmt.Sprintf(" plan: %v", plannercore.ToString(p))
ass.Equal(tt.fd, plannercore.FDToString(p.(plannercore.LogicalPlan)), comment)
}
}
10 changes: 10 additions & 0 deletions planner/functional_dependency/fd_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,16 @@ func (s *FDSet) MaxOneRow(cols FastIntSet) {
}
}

func (s *FDSet) CopyFrom(srcSet *FDSet) {
s.fdEdges = make([]*fdEdge, len(srcSet.fdEdges))
copy(s.fdEdges, srcSet.fdEdges)
s.NotNullCols.CopyFrom(srcSet.NotNullCols)
s.HashCodeToUniqueID = make(map[string]int, len(srcSet.HashCodeToUniqueID))
for k, v := range srcSet.HashCodeToUniqueID {
s.HashCodeToUniqueID[k] = v
}
}

// ProjectCols projects FDSet to the target columns
// Formula:
// Strict decomposition FD4A: If X −→ Y Z then X −→ Y and X −→ Z.
Expand Down