From 3e9c07b81a4890522f4277f7343dfbd9ae3dfb47 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 08:05:01 +0200 Subject: [PATCH 01/14] Renamed leJoin to joinTables Signed-off-by: Florent Poinsard --- go/vt/vtgate/planbuilder/jointree.go | 10 +++++----- go/vt/vtgate/planbuilder/jointree_transformers.go | 2 +- go/vt/vtgate/planbuilder/route_planning.go | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/go/vt/vtgate/planbuilder/jointree.go b/go/vt/vtgate/planbuilder/jointree.go index 91ff3c66751..8aedd07e00e 100644 --- a/go/vt/vtgate/planbuilder/jointree.go +++ b/go/vt/vtgate/planbuilder/jointree.go @@ -46,7 +46,7 @@ type ( tableNames() []string } - leJoin struct { + joinTables struct { lhs, rhs relation pred sqlparser.Expr } @@ -128,14 +128,14 @@ type ( var _ joinTree = (*routePlan)(nil) var _ joinTree = (*joinPlan)(nil) var _ relation = (*routeTable)(nil) -var _ relation = (*leJoin)(nil) +var _ relation = (*joinTables)(nil) var _ relation = (parenTables)(nil) func (rp *routeTable) tableID() semantics.TableSet { return rp.qtable.TableID } -func (rp *leJoin) tableID() semantics.TableSet { return rp.lhs.tableID().Merge(rp.rhs.tableID()) } +func (rp *joinTables) tableID() semantics.TableSet { return rp.lhs.tableID().Merge(rp.rhs.tableID()) } -func (rp *leJoin) tableNames() []string { +func (rp *joinTables) tableNames() []string { return append(rp.lhs.tableNames(), rp.rhs.tableNames()...) } @@ -181,7 +181,7 @@ func visitTables(r relation, f func(tbl *routeTable) error) error { } } return nil - case *leJoin: + case *joinTables: err := visitTables(r.lhs, f) if err != nil { return err diff --git a/go/vt/vtgate/planbuilder/jointree_transformers.go b/go/vt/vtgate/planbuilder/jointree_transformers.go index a99c0fb16be..dfa98d649b8 100644 --- a/go/vt/vtgate/planbuilder/jointree_transformers.go +++ b/go/vt/vtgate/planbuilder/jointree_transformers.go @@ -186,7 +186,7 @@ func relToTableExpr(t relation) (sqlparser.TableExpr, error) { tables = append(tables, tableExpr) } return &sqlparser.ParenTableExpr{Exprs: tables}, nil - case *leJoin: + case *joinTables: lExpr, err := relToTableExpr(t.lhs) if err != nil { return nil, err diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 59f9dade24f..8e1abd396fd 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -721,7 +721,7 @@ func createRoutePlanForOuter(aRoute, bRoute *routePlan, semTable *semantics.SemT aTbl, bTbl, newTables := findTables(deps, tables) tables = newTables if aTbl != nil && bTbl != nil { - tables = append(tables, &leJoin{ + tables = append(tables, &joinTables{ lhs: aTbl, rhs: bTbl, pred: predicate, From 3154a94d1f112b5503effbf740682b7ccf91acab Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 09:04:46 +0200 Subject: [PATCH 02/14] Initial support for distinct construct in gen4 Signed-off-by: Florent Poinsard --- .../vtgate/planbuilder/abstract/queryprojection.go | 13 ++++++++----- go/vt/vtgate/planbuilder/distinct.go | 6 +----- go/vt/vtgate/planbuilder/route_planning.go | 7 ++++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 2e97aa9a735..fe6ecb635dd 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -38,6 +38,7 @@ type ( // If you change the contents here, please update the toString() method SelectExprs []SelectExpr HasAggr bool + Distinct bool GroupByExprs []GroupBy OrderExprs []OrderBy CanPushDownSorting bool @@ -58,7 +59,9 @@ type ( // CreateQPFromSelect created the QueryProjection for the input *sqlparser.Select func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { - qp := &QueryProjection{} + qp := &QueryProjection{ + Distinct: sel.Distinct, + } hasNonAggr := false for _, selExp := range sel.SelectExprs { @@ -71,9 +74,6 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { if len(fExpr.Exprs) != 1 { return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.SyntaxError, "aggregate functions take a single argument '%s'", sqlparser.String(fExpr)) } - if fExpr.Distinct { - return nil, semantics.Gen4NotSupportedF("distinct aggregation") - } qp.HasAggr = true qp.SelectExprs = append(qp.SelectExprs, SelectExpr{ Col: exp, @@ -120,7 +120,6 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { canPushDownSorting = canPushDownSorting && !sqlparser.ContainsAggregation(weightStrExpr) } qp.CanPushDownSorting = canPushDownSorting - return qp, nil } @@ -204,3 +203,7 @@ func (qp *QueryProjection) toString() string { func (qp *QueryProjection) NeedsAggregation() bool { return qp.HasAggr || len(qp.GroupByExprs) > 0 } + +func (qp *QueryProjection) NeedsDistinct() bool { + return qp.Distinct +} diff --git a/go/vt/vtgate/planbuilder/distinct.go b/go/vt/vtgate/planbuilder/distinct.go index b3642eaef09..666d39ac2ee 100644 --- a/go/vt/vtgate/planbuilder/distinct.go +++ b/go/vt/vtgate/planbuilder/distinct.go @@ -24,11 +24,7 @@ import ( var _ logicalPlan = (*distinct)(nil) -// limit is the logicalPlan for engine.Limit. -// This gets built if a limit needs to be applied -// after rows are returned from an underlying -// operation. Since a limit is the final operation -// of a SELECT, most pushes are not applicable. +// distinct is the logicalPlan for engine.Distinct. type distinct struct { logicalPlanCommon } diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 8e1abd396fd..8b55fda7dba 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -219,6 +219,10 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se } } + if qp.NeedsDistinct() { + plan = newDistinct(plan) + } + return plan, nil } @@ -237,9 +241,6 @@ func createSingleShardRoutePlan(sel *sqlparser.Select, rb *route) { } func checkUnsupportedConstructs(sel *sqlparser.Select) error { - if sel.Distinct { - return semantics.Gen4NotSupportedF("DISTINCT") - } if sel.Having != nil { return semantics.Gen4NotSupportedF("HAVING") } From 96085f013d56f831d203ee2371eb5cf4962a40ae Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 10:04:51 +0200 Subject: [PATCH 03/14] Support more distinct cases in plan tests Signed-off-by: Florent Poinsard --- .../planbuilder/abstract/queryprojection.go | 4 +++ .../planbuilder/testdata/aggr_cases.txt | 30 +++++++++++++++++++ .../planbuilder/testdata/select_cases.txt | 1 + 3 files changed, 35 insertions(+) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index fe6ecb635dd..a8e724f1188 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -74,6 +74,9 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { if len(fExpr.Exprs) != 1 { return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.SyntaxError, "aggregate functions take a single argument '%s'", sqlparser.String(fExpr)) } + if fExpr.Distinct { + return nil, semantics.Gen4NotSupportedF("distinct aggregation") + } qp.HasAggr = true qp.SelectExprs = append(qp.SelectExprs, SelectExpr{ Col: exp, @@ -204,6 +207,7 @@ func (qp *QueryProjection) NeedsAggregation() bool { return qp.HasAggr || len(qp.GroupByExprs) > 0 } +// NeedsDistinct returns true if the query needs explicit distinct func (qp *QueryProjection) NeedsDistinct() bool { return qp.Distinct } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index c8caa2a717a..f27f4743488 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -1669,6 +1669,36 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select distinct a, count(*) from user group by a", + "Instructions": { + "OperatorType": "Distinct", + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select a, count(*), weight_string(a) from `user` where 1 != 1 group by a", + "OrderBy": "(0|2) ASC", + "Query": "select a, count(*), weight_string(a) from `user` group by a order by a asc", + "Table": "`user`" + } + ] + } + ] + } +} # Group by invalid column number (code is duplicated from symab). "select id from user group by 1.1" diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.txt b/go/vt/vtgate/planbuilder/testdata/select_cases.txt index 9da1cad5785..06aa843b48a 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.txt @@ -1772,6 +1772,7 @@ Gen4 plan same as above ] } } +Gen4 plan same as above # Select from information schema query with two tables that route should be merged "SELECT DELETE_RULE, UPDATE_RULE FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU INNER JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS AS RC ON KCU.CONSTRAINT_NAME = RC.CONSTRAINT_NAME WHERE KCU.TABLE_SCHEMA = 'test' AND KCU.TABLE_NAME = 'data_type_table' AND KCU.COLUMN_NAME = 'id' AND KCU.REFERENCED_TABLE_SCHEMA = 'test' AND KCU.CONSTRAINT_NAME = 'data_type_table_id_fkey' ORDER BY KCU.CONSTRAINT_NAME, KCU.COLUMN_NAME" From b31c26addbcc064418f1ae1535cfc8881b4b7901 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 10:24:06 +0200 Subject: [PATCH 04/14] Optimize group by when using distinct without aggregation exprs Co-authored-by: Andres Taylor Signed-off-by: Florent Poinsard --- .../planbuilder/abstract/queryprojection.go | 5 ++ .../abstract/queryprojection_test.go | 22 +++++++-- .../planbuilder/testdata/aggr_cases.txt | 47 +++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index a8e724f1188..86ace46ac05 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -122,6 +122,9 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { }) canPushDownSorting = canPushDownSorting && !sqlparser.ContainsAggregation(weightStrExpr) } + if qp.Distinct && !qp.HasAggr { + qp.GroupByExprs = nil + } qp.CanPushDownSorting = canPushDownSorting return qp, nil } @@ -171,11 +174,13 @@ func (qp *QueryProjection) toString() string { Select []string Grouping []string OrderBy []string + Distinct bool } out := output{ Select: []string{}, Grouping: []string{}, OrderBy: []string{}, + Distinct: qp.NeedsDistinct(), } for _, expr := range qp.SelectExprs { diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go index e1d90c148ae..e734dc3138f 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go @@ -132,7 +132,8 @@ func TestQPSimplifiedExpr(t *testing.T) { "Grouping": [ "intcol" ], - "OrderBy": [] + "OrderBy": [], + "Distinct": false }`, }, { @@ -147,7 +148,8 @@ func TestQPSimplifiedExpr(t *testing.T) { "OrderBy": [ "intcol asc", "textcol asc" - ] + ], + "Distinct": false }`, }, { @@ -166,7 +168,21 @@ func TestQPSimplifiedExpr(t *testing.T) { ], "OrderBy": [ "textcol desc" - ] + ], + "Distinct": false +}`, + }, + { + query: "select distinct col1, col2 from user group by col1, col2", + expected: ` +{ + "Select": [ + "col1", + "col2" + ], + "Grouping": [], + "OrderBy": [], + "Distinct": true }`, }, } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index f27f4743488..2346e0ca37b 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -1713,3 +1713,50 @@ Gen4 plan same as above "select count(distinct *) from user" "syntax error: count(distinct *)" Gen4 plan same as above + +# optimize group by when using distinct with no aggregation +"select distinct col1, col2 from user group by col1, col2" +{ + "QueryType": "SELECT", + "Original": "select distinct col1, col2 from user group by col1, col2", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "GroupBy": "0, 1, 0, 1", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col1, col2, weight_string(col1), weight_string(col2) from `user` where 1 != 1 group by col1, col2", + "OrderBy": "(0|2) ASC, (1|3) ASC, (0|2) ASC, (1|3) ASC", + "Query": "select distinct col1, col2, weight_string(col1), weight_string(col2) from `user` group by col1, col2 order by col1 asc, col2 asc, col1 asc, col2 asc", + "ResultColumns": 2, + "Table": "`user`" + } + ] + } +} +{ + "QueryType": "SELECT", + "Original": "select distinct col1, col2 from user group by col1, col2", + "Instructions": { + "OperatorType": "Distinct", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col1, col2 from `user` where 1 != 1", + "Query": "select col1, col2 from `user`", + "Table": "`user`" + } + ] + } +} From f3cb4dd482daecd1ac69b11e9c22e091f12f232d Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 10:36:29 +0200 Subject: [PATCH 05/14] Optimize distinct when using only aggregation exprs and no group by Co-authored-by: Andres Taylor Signed-off-by: Florent Poinsard --- .../planbuilder/abstract/queryprojection.go | 20 ++++++- .../abstract/queryprojection_test.go | 12 +++++ .../planbuilder/testdata/aggr_cases.txt | 52 +++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 86ace46ac05..311af294f15 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -212,7 +212,25 @@ func (qp *QueryProjection) NeedsAggregation() bool { return qp.HasAggr || len(qp.GroupByExprs) > 0 } +func (qp QueryProjection) onlyAggr() bool { + if !qp.HasAggr { + return false + } + for _, expr := range qp.SelectExprs { + if !expr.Aggr { + return false + } + } + return true +} + // NeedsDistinct returns true if the query needs explicit distinct func (qp *QueryProjection) NeedsDistinct() bool { - return qp.Distinct + if !qp.Distinct { + return false + } + if qp.onlyAggr() && len(qp.GroupByExprs) == 0 { + return false + } + return true } diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go index e734dc3138f..0963dae3289 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go @@ -183,6 +183,18 @@ func TestQPSimplifiedExpr(t *testing.T) { "Grouping": [], "OrderBy": [], "Distinct": true +}`, + }, + { + query: "select distinct count(*) from user", + expected: ` +{ + "Select": [ + "aggr: count(*)" + ], + "Grouping": [], + "OrderBy": [], + "Distinct": false }`, }, } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 2346e0ca37b..96a88d67e46 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -1760,3 +1760,55 @@ Gen4 plan same as above ] } } + +# do not use distinct when using only aggregates and no group by +"select distinct count(*) from user" +{ + "QueryType": "SELECT", + "Original": "select distinct count(*) from user", + "Instructions": { + "OperatorType": "Distinct", + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*) from `user` where 1 != 1", + "Query": "select count(*) from `user`", + "Table": "`user`" + } + ] + } + ] + } +} +{ + "QueryType": "SELECT", + "Original": "select distinct count(*) from user", + "Instructions": { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(0)", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select count(*) from `user` where 1 != 1", + "Query": "select count(*) from `user`", + "Table": "`user`" + } + ] + } +} From dfe1dc83daca7543328d444368bfc447da30acff Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 11:01:18 +0200 Subject: [PATCH 06/14] Refactored unique vindex methods in route planning Co-authored-by: Andres Taylor Signed-off-by: Florent Poinsard --- go/vt/vtgate/planbuilder/route_planning.go | 49 ++++++++++++++++++- go/vt/vtgate/planbuilder/selectGen4.go | 26 +--------- .../planbuilder/testdata/aggr_cases.txt | 1 + 3 files changed, 51 insertions(+), 25 deletions(-) diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 8b55fda7dba..7dcb60aa3c1 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -220,12 +220,59 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se } if qp.NeedsDistinct() { - plan = newDistinct(plan) + switch p := plan.(type) { + case *route: + if rb.isSingleShard() || selectHasUniqueVindex(vschema, semTable, qp.SelectExprs) { + p.Select.MakeDistinct() + } else { + plan = newDistinct(plan) + } + default: + plan = newDistinct(plan) + } } return plan, nil } +func selectHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, sel []abstract.SelectExpr) bool { + for _, expr := range sel { + if exprHasUniqueVindex(vschema, semTable, expr.Col.Expr) { + return true + } + } + return false +} + +func exprHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, expr sqlparser.Expr) bool { + col, isCol := expr.(*sqlparser.ColName) + if !isCol { + return false + } + ts := semTable.Dependencies(expr) + tableInfo, err := semTable.TableInfoFor(ts) + if err != nil { + return false + } + tableName, err := tableInfo.Name() + if err != nil { + return false + } + vschemaTable, _, _, _, _, err := vschema.FindTableOrVindex(tableName) + if err != nil { + return false + } + for _, vindex := range vschemaTable.ColumnVindexes { + if len(vindex.Columns) > 1 || !vindex.Vindex.IsUnique() { + return false + } + if col.Name.Equal(vindex.Columns[0]) { + return true + } + } + return false +} + func createSingleShardRoutePlan(sel *sqlparser.Select, rb *route) { ast := rb.Select.(*sqlparser.Select) ast.Distinct = sel.Distinct diff --git a/go/vt/vtgate/planbuilder/selectGen4.go b/go/vt/vtgate/planbuilder/selectGen4.go index 583620301a0..171adc909c4 100644 --- a/go/vt/vtgate/planbuilder/selectGen4.go +++ b/go/vt/vtgate/planbuilder/selectGen4.go @@ -185,30 +185,8 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * func hasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, groupByExprs []abstract.GroupBy) bool { for _, groupByExpr := range groupByExprs { - col, isCol := groupByExpr.WeightStrExpr.(*sqlparser.ColName) - if !isCol { - continue - } - ts := semTable.Dependencies(groupByExpr.WeightStrExpr) - tableInfo, err := semTable.TableInfoFor(ts) - if err != nil { - continue - } - tableName, err := tableInfo.Name() - if err != nil { - continue - } - vschemaTable, _, _, _, _, err := vschema.FindTableOrVindex(tableName) - if err != nil { - continue - } - for _, vindex := range vschemaTable.ColumnVindexes { - if len(vindex.Columns) > 1 || !vindex.Vindex.IsUnique() { - continue - } - if col.Name.Equal(vindex.Columns[0]) { - return true - } + if exprHasUniqueVindex(vschema, semTable, groupByExpr.WeightStrExpr) { + return true } } return false diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 96a88d67e46..2dc4e227bbe 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -78,6 +78,7 @@ Gen4 plan same as above "Table": "`user`" } } +Gen4 plan same as above # distinct and group by together for single route. "select distinct col1, id from user group by col1" From 31492c4f83296c23051fdd267fd1ffb049824aa9 Mon Sep 17 00:00:00 2001 From: Florent Poinsard Date: Thu, 15 Jul 2021 12:17:42 +0200 Subject: [PATCH 07/14] Better error handling for aggregations Co-authored-by: Andres Taylor Signed-off-by: Florent Poinsard --- go/mysql/sql_error.go | 1 + go/vt/sqlparser/ast_funcs.go | 20 ++- go/vt/vterrors/state.go | 1 + .../planbuilder/abstract/queryprojection.go | 39 +++-- go/vt/vtgate/planbuilder/route_planning.go | 14 +- .../planbuilder/testdata/aggr_cases.txt | 152 ++++++++++++------ 6 files changed, 156 insertions(+), 71 deletions(-) diff --git a/go/mysql/sql_error.go b/go/mysql/sql_error.go index fc436cd7788..509663ae78f 100644 --- a/go/mysql/sql_error.go +++ b/go/mysql/sql_error.go @@ -191,6 +191,7 @@ var stateToMysqlCode = map[vterrors.State]struct { vterrors.WrongNumberOfColumnsInSelect: {num: ERWrongNumberOfColumnsInSelect, state: SSWrongNumberOfColumns}, vterrors.WrongTypeForVar: {num: ERWrongTypeForVar, state: SSClientError}, vterrors.WrongValueForVar: {num: ERWrongValueForVar, state: SSClientError}, + vterrors.WrongFieldWithGroup: {num: ERWrongFieldWithGroup, state: SSClientError}, vterrors.ServerNotAvailable: {num: ERServerIsntAvailable, state: SSNetError}, vterrors.CantDoThisInTransaction: {num: ERCantDoThisDuringAnTransaction, state: SSCantDoThisDuringAnTransaction}, vterrors.RequiresPrimaryKey: {num: ERRequiresPrimaryKey, state: SSClientError}, diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index 624ce3a39e4..b7b8f2a51ab 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -1371,17 +1371,21 @@ func ToString(exprs []TableExpr) string { func ContainsAggregation(e Expr) bool { hasAggregates := false _ = Walk(func(node SQLNode) (kontinue bool, err error) { - switch node := node.(type) { - case *FuncExpr: - if node.IsAggregate() { - hasAggregates = true - return false, nil - } - case *GroupConcatExpr: + if IsAggregation(node) { hasAggregates = true - return false, nil } return true, nil }, e) return hasAggregates } + +// IsAggregation returns true if the node is an aggregation expression +func IsAggregation(node SQLNode) bool { + switch node := node.(type) { + case *FuncExpr: + return node.IsAggregate() + case *GroupConcatExpr: + return true + } + return false +} diff --git a/go/vt/vterrors/state.go b/go/vt/vterrors/state.go index 40ec8390db0..00c73231d51 100644 --- a/go/vt/vterrors/state.go +++ b/go/vt/vterrors/state.go @@ -35,6 +35,7 @@ const ( NonUniqTable NonUpdateableTable SyntaxError + WrongFieldWithGroup WrongGroupField WrongTypeForVar WrongValueForVar diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 311af294f15..15f50021ce9 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -63,7 +63,6 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { Distinct: sel.Distinct, } - hasNonAggr := false for _, selExp := range sel.SelectExprs { exp, ok := selExp.(*sqlparser.AliasedExpr) if !ok { @@ -84,18 +83,9 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { }) continue } - - if sqlparser.ContainsAggregation(exp.Expr) { - return nil, vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") - } - hasNonAggr = true qp.SelectExprs = append(qp.SelectExprs, SelectExpr{Col: exp}) } - if hasNonAggr && qp.HasAggr && sel.GroupBy == nil { - return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.MixOfGroupFuncAndFields, "Mixing of aggregation and non-aggregation columns is not allowed if there is no GROUP BY clause") - } - for _, group := range sel.GroupBy { expr, weightStrExpr, err := qp.getSimplifiedExpr(group, "group statement") if err != nil { @@ -107,6 +97,16 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { qp.GroupByExprs = append(qp.GroupByExprs, GroupBy{Inner: expr, WeightStrExpr: weightStrExpr}) } + if qp.HasAggr { + expr := qp.getNonAggrExprNotMatchingGroupByExprs() + if expr != nil { + if len(qp.GroupByExprs) > 0 { + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongFieldWithGroup, "Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column '%s' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) + } + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.MixOfGroupFuncAndFields, "In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column '%s'; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) + } + } + canPushDownSorting := true for _, order := range sel.OrderBy { expr, weightStrExpr, err := qp.getSimplifiedExpr(order.Expr, "order clause") @@ -129,6 +129,25 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { return qp, nil } +func (qp *QueryProjection) getNonAggrExprNotMatchingGroupByExprs() sqlparser.Expr { + for _, expr := range qp.SelectExprs { + if expr.Aggr { + continue + } + isGroupByOk := false + for _, groupByExpr := range qp.GroupByExprs { + if sqlparser.EqualsExpr(groupByExpr.WeightStrExpr, expr.Col.Expr) { + isGroupByOk = true + break + } + } + if !isGroupByOk { + return expr.Col.Expr + } + } + return nil +} + // getSimplifiedExpr takes an expression used in ORDER BY or GROUP BY, which can reference both aliased columns and // column offsets, and returns an expression that is simpler to evaluate func (qp *QueryProjection) getSimplifiedExpr(e sqlparser.Expr, caller string) (expr sqlparser.Expr, weightStrExpr sqlparser.Expr, err error) { diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 7dcb60aa3c1..ac144ec292d 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -165,12 +165,12 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se return plan, nil } - if err := checkUnsupportedConstructs(sel); err != nil { + qp, err := abstract.CreateQPFromSelect(sel) + if err != nil { return nil, err } - qp, err := abstract.CreateQPFromSelect(sel) - if err != nil { + if err := checkUnsupportedConstructs(sel, qp); err != nil { return nil, err } @@ -287,7 +287,13 @@ func createSingleShardRoutePlan(sel *sqlparser.Select, rb *route) { } } -func checkUnsupportedConstructs(sel *sqlparser.Select) error { +func checkUnsupportedConstructs(sel *sqlparser.Select, qp *abstract.QueryProjection) error { + for _, expr := range qp.SelectExprs { + colExpr := expr.Col.Expr + if !sqlparser.IsAggregation(colExpr) && sqlparser.ContainsAggregation(colExpr) { + return vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") + } + } if sel.Having != nil { return semantics.Gen4NotSupportedF("HAVING") } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 2dc4e227bbe..187f69b42a3 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -741,6 +741,7 @@ Gen4 plan same as above ] } } +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'id'; this is incompatible with sql_mode=only_full_group_by # scatter aggregate using distinct "select distinct col from user" @@ -768,6 +769,26 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select distinct col from user", + "Instructions": { + "OperatorType": "Distinct", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col from `user` where 1 != 1", + "Query": "select col from `user`", + "Table": "`user`" + } + ] + } +} # scatter aggregate group by select col "select col from user group by col" @@ -1178,16 +1199,15 @@ Gen4 plan same as above ] } } - -# scatter aggregate group by column number -"select col from user group by 1" { "QueryType": "SELECT", - "Original": "select col from user group by 1", + "Original": "select a, b, count(*) from user group by b, a", "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "GroupBy": "0", + "Aggregates": "count(2)", + "GroupBy": "(1|3), (0|4)", + "ResultColumns": 3, "Inputs": [ { "OperatorType": "Route", @@ -1196,30 +1216,24 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select col, weight_string(col) from `user` where 1 != 1 group by 1", - "OrderBy": "(0|1) ASC", - "Query": "select col, weight_string(col) from `user` group by 1 order by col asc", - "ResultColumns": 1, + "FieldQuery": "select a, b, count(*), weight_string(b), weight_string(a) from `user` where 1 != 1 group by b, a", + "OrderBy": "(1|3) ASC, (0|4) ASC", + "Query": "select a, b, count(*), weight_string(b), weight_string(a) from `user` group by b, a order by b asc, a asc", "Table": "`user`" } ] } } -# scatter aggregate group by invalid column number -"select col from user group by 2" -"Unknown column '2' in 'group statement'" -Gen4 plan same as above - -# scatter aggregate order by null -"select count(*) from user order by null" +# scatter aggregate group by column number +"select col from user group by 1" { "QueryType": "SELECT", - "Original": "select count(*) from user order by null", + "Original": "select col from user group by 1", "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "Aggregates": "count(0)", + "GroupBy": "0", "Inputs": [ { "OperatorType": "Route", @@ -1228,28 +1242,23 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select count(*) from `user` where 1 != 1", - "Query": "select count(*) from `user`", + "FieldQuery": "select col, weight_string(col) from `user` where 1 != 1 group by 1", + "OrderBy": "(0|1) ASC", + "Query": "select col, weight_string(col) from `user` group by 1 order by col asc", + "ResultColumns": 1, "Table": "`user`" } ] } } - -# scatter aggregate with complex select list (can't build order by) -"select distinct a+1 from user" -"generating order by clause: cannot reference a complex expression" - -# scatter aggregate with numbered order by columns -"select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3" { "QueryType": "SELECT", - "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3", + "Original": "select col from user group by 1", "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "Aggregates": "count(4)", - "GroupBy": "0, 1, 2", + "GroupBy": "(0|1)", + "ResultColumns": 1, "Inputs": [ { "OperatorType": "Route", @@ -1258,24 +1267,29 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by 1, 2, 3", - "OrderBy": "(0|5) ASC, (1|6) ASC, (2|7) ASC", - "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by 1, 2, 3 order by 1 asc, 2 asc, 3 asc", - "ResultColumns": 5, + "FieldQuery": "select col, weight_string(col) from `user` where 1 != 1 group by col", + "OrderBy": "(0|1) ASC", + "Query": "select col, weight_string(col) from `user` group by col order by col asc", "Table": "`user`" } ] } } + +# scatter aggregate group by invalid column number +"select col from user group by 2" +"Unknown column '2' in 'group statement'" +Gen4 plan same as above + +# scatter aggregate order by null +"select count(*) from user order by null" { "QueryType": "SELECT", - "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3", + "Original": "select count(*) from user order by null", "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "Aggregates": "count(4)", - "GroupBy": "(0|5), (1|6), (2|7)", - "ResultColumns": 5, + "Aggregates": "count(0)", "Inputs": [ { "OperatorType": "Route", @@ -1284,20 +1298,23 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by a, b, c", - "OrderBy": "(0|5) ASC, (1|6) ASC, (2|7) ASC", - "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by a, b, c order by a asc, b asc, c asc", + "FieldQuery": "select count(*) from `user` where 1 != 1", + "Query": "select count(*) from `user`", "Table": "`user`" } ] } } -# scatter aggregate with named order by columns -"select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c" +# scatter aggregate with complex select list (can't build order by) +"select distinct a+1 from user" +"generating order by clause: cannot reference a complex expression" + +# scatter aggregate with numbered order by columns +"select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3" { "QueryType": "SELECT", - "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c", + "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3", "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", @@ -1313,13 +1330,17 @@ Gen4 plan same as above }, "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by 1, 2, 3", "OrderBy": "(0|5) ASC, (1|6) ASC, (2|7) ASC", - "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by 1, 2, 3 order by a asc, b asc, c asc", + "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by 1, 2, 3 order by 1 asc, 2 asc, 3 asc", "ResultColumns": 5, "Table": "`user`" } ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'd' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by + +# scatter aggregate with named order by columns +"select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c" { "QueryType": "SELECT", "Original": "select a, b, c, d, count(*) from user group by 1, 2, 3 order by a, b, c", @@ -1327,8 +1348,7 @@ Gen4 plan same as above "OperatorType": "Aggregate", "Variant": "Ordered", "Aggregates": "count(4)", - "GroupBy": "(0|5), (1|6), (2|7)", - "ResultColumns": 5, + "GroupBy": "0, 1, 2", "Inputs": [ { "OperatorType": "Route", @@ -1337,14 +1357,16 @@ Gen4 plan same as above "Name": "user", "Sharded": true }, - "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by a, b, c", + "FieldQuery": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` where 1 != 1 group by 1, 2, 3", "OrderBy": "(0|5) ASC, (1|6) ASC, (2|7) ASC", - "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by a, b, c order by a asc, b asc, c asc", + "Query": "select a, b, c, d, count(*), weight_string(a), weight_string(b), weight_string(c) from `user` group by 1, 2, 3 order by a asc, b asc, c asc", + "ResultColumns": 5, "Table": "`user`" } ] } } +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'd' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # scatter aggregate with jumbled order by columns "select a, b, c, d, count(*) from user group by 1, 2, 3, 4 order by d, b, a, c" @@ -1543,6 +1565,37 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select col, count(*) from user group by col limit 10", + "Instructions": { + "OperatorType": "Limit", + "Count": 10, + "Inputs": [ + { + "OperatorType": "Aggregate", + "Variant": "Ordered", + "Aggregates": "count(1)", + "GroupBy": "(0|2)", + "ResultColumns": 2, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col, count(*), weight_string(col) from `user` where 1 != 1 group by col", + "OrderBy": "(0|2) ASC", + "Query": "select col, count(*), weight_string(col) from `user` group by col order by col asc limit :__upper_limit", + "Table": "`user`" + } + ] + } + ] + } +} # Group by with collate operator "select user.col1 as a from user where user.id = 5 group by a collate utf8_general_ci" @@ -1637,6 +1690,7 @@ Gen4 plan same as above ] } } +Gen4 error: In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column 'a'; this is incompatible with sql_mode=only_full_group_by # distinct and aggregate functions "select distinct a, count(*) from user group by a" From 2e6c2ae3fc3f2f2a5de01b0c81a8b361b6bcb435 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Thu, 15 Jul 2021 14:02:41 +0200 Subject: [PATCH 08/14] push down DISTINCT when possible Signed-off-by: Andres Taylor --- .../abstract/queryprojection_test.go | 8 ++--- go/vt/vtgate/planbuilder/route_planning.go | 30 +++++++++++++------ .../planbuilder/testdata/aggr_cases.txt | 4 +-- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go index 0963dae3289..57e5ab03ce2 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection_test.go @@ -46,7 +46,7 @@ func TestQP(t *testing.T) { }, { sql: "select 1, count(1) from user", - expErr: "Mixing of aggregation and non-aggregation columns is not allowed if there is no GROUP BY clause", + expErr: "In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column '1'; this is incompatible with sql_mode=only_full_group_by", }, { sql: "select max(id) from user", @@ -55,13 +55,9 @@ func TestQP(t *testing.T) { sql: "select max(a, b) from user", expErr: "aggregate functions take a single argument 'max(a, b)'", }, - { - sql: "select func(max(id)) from user", - expErr: "unsupported: in scatter query: complex aggregate expression", - }, { sql: "select 1, count(1) from user order by 1", - expErr: "Mixing of aggregation and non-aggregation columns is not allowed if there is no GROUP BY clause", + expErr: "In aggregated query without GROUP BY, expression of SELECT list contains nonaggregated column '1'; this is incompatible with sql_mode=only_full_group_by", }, { sql: "select id from user order by col, id, 1", diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index ac144ec292d..be6ea7222dc 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -220,21 +220,33 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se } if qp.NeedsDistinct() { - switch p := plan.(type) { - case *route: - if rb.isSingleShard() || selectHasUniqueVindex(vschema, semTable, qp.SelectExprs) { - p.Select.MakeDistinct() - } else { - plan = newDistinct(plan) - } - default: - plan = newDistinct(plan) + plan, err = pushDistinct(plan, semTable, vschema, qp) + if err != nil { + return nil, err } } return plan, nil } +func pushDistinct(plan logicalPlan, semTable *semantics.SemTable, vschema ContextVSchema, qp *abstract.QueryProjection) (logicalPlan, error) { + switch p := plan.(type) { + case *route: + // we always make the underlying query distinct, + // and then we might also add a distinct operator on top if it is needed + p.Select.MakeDistinct() + if !p.isSingleShard() && !selectHasUniqueVindex(vschema, semTable, qp.SelectExprs) { + plan = newDistinct(plan) + } + return plan, nil + case *orderedAggregate, *joinGen4: + return newDistinct(plan), nil + + default: + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no you didnt %T", plan) + } +} + func selectHasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, sel []abstract.SelectExpr) bool { for _, expr := range sel { if exprHasUniqueVindex(vschema, semTable, expr.Col.Expr) { diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 187f69b42a3..6e728f58025 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -783,7 +783,7 @@ Gen4 error: In aggregated query without GROUP BY, expression of SELECT list cont "Sharded": true }, "FieldQuery": "select col from `user` where 1 != 1", - "Query": "select col from `user`", + "Query": "select distinct col from `user`", "Table": "`user`" } ] @@ -1809,7 +1809,7 @@ Gen4 plan same as above "Sharded": true }, "FieldQuery": "select col1, col2 from `user` where 1 != 1", - "Query": "select col1, col2 from `user`", + "Query": "select distinct col1, col2 from `user`", "Table": "`user`" } ] From da61ee6cd7b551cee8bc098c9b4021aa506dc788 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Thu, 15 Jul 2021 14:20:55 +0200 Subject: [PATCH 09/14] let complex expressions through if we can send it all to a simple route Signed-off-by: Andres Taylor --- .../planbuilder/abstract/queryprojection.go | 2 ++ go/vt/vtgate/planbuilder/route_planning.go | 18 ++++++++++++------ .../vtgate/planbuilder/testdata/aggr_cases.txt | 3 +++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 15f50021ce9..83db6311d8c 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -99,6 +99,8 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { if qp.HasAggr { expr := qp.getNonAggrExprNotMatchingGroupByExprs() + // if we have aggregation functions, non aggregating columns and GROUP BY, + // the non-aggregating expressions must all be listed in the GROUP BY list if expr != nil { if len(qp.GroupByExprs) > 0 { return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongFieldWithGroup, "Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column '%s' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by", sqlparser.String(expr)) diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index be6ea7222dc..230ef456a3f 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -180,6 +180,18 @@ func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.Se if err != nil { return nil, err } + + if _, planIsRoute := plan.(*route); !planIsRoute { + // if we had to build up additional operators around the route, we have to fail this query + for _, expr := range qp.SelectExprs { + colExpr := expr.Col.Expr + if !sqlparser.IsAggregation(colExpr) && sqlparser.ContainsAggregation(colExpr) { + return nil, vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") + } + } + + } + } else { for _, e := range qp.SelectExprs { if _, _, err := pushProjection(e.Col, plan, semTable, true, false); err != nil { @@ -300,12 +312,6 @@ func createSingleShardRoutePlan(sel *sqlparser.Select, rb *route) { } func checkUnsupportedConstructs(sel *sqlparser.Select, qp *abstract.QueryProjection) error { - for _, expr := range qp.SelectExprs { - colExpr := expr.Col.Expr - if !sqlparser.IsAggregation(colExpr) && sqlparser.ContainsAggregation(colExpr) { - return vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") - } - } if sel.Having != nil { return semantics.Gen4NotSupportedF("HAVING") } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 6e728f58025..7bbfeba8dc7 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -607,6 +607,7 @@ Gen4 plan same as above "Table": "`user`" } } +Gen4 plan same as above # group by a unique vindex where alias from select list is used "select id as val, 1+count(*) from user group by val" @@ -625,6 +626,7 @@ Gen4 plan same as above "Table": "`user`" } } +Gen4 plan same as above # group by a unique vindex where expression is qualified (alias should be ignored) "select val as id, 1+count(*) from user group by user.id" @@ -643,6 +645,7 @@ Gen4 plan same as above "Table": "`user`" } } +Gen4 plan same as above # group by a unique vindex where it should skip non-aliased expressions. "select *, id, 1+count(*) from user group by id" From 22d832ce43a606b37437e750babc6fbf13cbcc3f Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 16 Jul 2021 06:53:44 +0200 Subject: [PATCH 10/14] added solved plans Signed-off-by: Andres Taylor --- .../planbuilder/testdata/aggr_cases.txt | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 7bbfeba8dc7..3814a7bc013 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -80,7 +80,7 @@ Gen4 plan same as above } Gen4 plan same as above -# distinct and group by together for single route. +# distinct and group by together for single route - group by is redundant "select distinct col1, id from user group by col1" { "QueryType": "SELECT", @@ -97,6 +97,21 @@ Gen4 plan same as above "Table": "`user`" } } +{ + "QueryType": "SELECT", + "Original": "select distinct col1, id from user group by col1", + "Instructions": { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col1, id from `user` where 1 != 1", + "Query": "select distinct col1, id from `user`", + "Table": "`user`" + } +} # scatter group by a text column "select count(*), a, textcol1, b from user group by a, textcol1, b" @@ -379,6 +394,26 @@ Gen4 plan same as above ] } } +{ + "QueryType": "SELECT", + "Original": "select distinct col1, col2 from user group by col1", + "Instructions": { + "OperatorType": "Distinct", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select col1, col2 from `user` where 1 != 1", + "Query": "select distinct col1, col2 from `user`", + "Table": "`user`" + } + ] + } +} # aggregate on RHS subquery (tests symbol table merge) "select user.a, t.b from user join (select count(*) b from unsharded) as t" @@ -1313,6 +1348,7 @@ Gen4 plan same as above "select distinct a+1 from user" "generating order by clause: cannot reference a complex expression" + # scatter aggregate with numbered order by columns "select a, b, c, d, count(*) from user group by 1, 2, 3 order by 1, 2, 3" { From 4f8c5b182148a0edb47b0740fd459706c3763f25 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 16 Jul 2021 09:32:59 +0200 Subject: [PATCH 11/14] fail aggregations queries that are not supported Signed-off-by: Andres Taylor --- go/vt/sqlparser/ast_funcs.go | 1 + .../planbuilder/abstract/queryprojection.go | 40 ++++++++++++------- go/vt/vtgate/planbuilder/selectGen4.go | 5 ++- .../planbuilder/testdata/aggr_cases.txt | 2 +- go/vt/vtgate/planbuilder/testdata/onecase.txt | 2 +- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index b7b8f2a51ab..248cb35ec2c 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -1373,6 +1373,7 @@ func ContainsAggregation(e Expr) bool { _ = Walk(func(node SQLNode) (kontinue bool, err error) { if IsAggregation(node) { hasAggregates = true + return false, nil } return true, nil }, e) diff --git a/go/vt/vtgate/planbuilder/abstract/queryprojection.go b/go/vt/vtgate/planbuilder/abstract/queryprojection.go index 83db6311d8c..d8a0be7b875 100644 --- a/go/vt/vtgate/planbuilder/abstract/queryprojection.go +++ b/go/vt/vtgate/planbuilder/abstract/queryprojection.go @@ -68,22 +68,19 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { if !ok { return nil, semantics.Gen4NotSupportedF("%T in select list", selExp) } - fExpr, ok := exp.Expr.(*sqlparser.FuncExpr) - if ok && fExpr.IsAggregate() { - if len(fExpr.Exprs) != 1 { - return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.SyntaxError, "aggregate functions take a single argument '%s'", sqlparser.String(fExpr)) - } - if fExpr.Distinct { - return nil, semantics.Gen4NotSupportedF("distinct aggregation") - } + + if err := checkForInvalidAggregations(exp); err != nil { + return nil, err + } + col := SelectExpr{ + Col: exp, + } + if sqlparser.ContainsAggregation(exp.Expr) { + col.Aggr = true qp.HasAggr = true - qp.SelectExprs = append(qp.SelectExprs, SelectExpr{ - Col: exp, - Aggr: true, - }) - continue } - qp.SelectExprs = append(qp.SelectExprs, SelectExpr{Col: exp}) + + qp.SelectExprs = append(qp.SelectExprs, col) } for _, group := range sel.GroupBy { @@ -131,6 +128,21 @@ func CreateQPFromSelect(sel *sqlparser.Select) (*QueryProjection, error) { return qp, nil } +func checkForInvalidAggregations(exp *sqlparser.AliasedExpr) error { + return sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { + fExpr, ok := node.(*sqlparser.FuncExpr) + if ok && fExpr.IsAggregate() { + if len(fExpr.Exprs) != 1 { + return false, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.SyntaxError, "aggregate functions take a single argument '%s'", sqlparser.String(fExpr)) + } + if fExpr.Distinct { + return false, semantics.Gen4NotSupportedF("distinct aggregation") + } + } + return true, nil + }, exp.Expr) +} + func (qp *QueryProjection) getNonAggrExprNotMatchingGroupByExprs() sqlparser.Expr { for _, expr := range qp.SelectExprs { if expr.Aggr { diff --git a/go/vt/vtgate/planbuilder/selectGen4.go b/go/vt/vtgate/planbuilder/selectGen4.go index 171adc909c4..90131187845 100644 --- a/go/vt/vtgate/planbuilder/selectGen4.go +++ b/go/vt/vtgate/planbuilder/selectGen4.go @@ -143,7 +143,10 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * return nil, false, false, err } if e.Aggr && oa != nil { - fExpr := e.Col.Expr.(*sqlparser.FuncExpr) + fExpr, isFunc := e.Col.Expr.(*sqlparser.FuncExpr) + if !isFunc { + return nil, false, false, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") + } opcode := engine.SupportedAggregates[fExpr.Name.Lowered()] oa.eaggr.Aggregates = append(oa.eaggr.Aggregates, engine.AggregateParams{ Opcode: opcode, diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt index 3814a7bc013..373cde13a9b 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.txt @@ -680,7 +680,7 @@ Gen4 plan same as above "Table": "`user`" } } -Gen4 plan same as above +Gen4 error: Expression of SELECT list is not in GROUP BY clause and contains nonaggregated column 'val' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by # group by a unique vindex where it should skip non-aliased expressions. "select *, id, 1+count(*) from user group by id" diff --git a/go/vt/vtgate/planbuilder/testdata/onecase.txt b/go/vt/vtgate/planbuilder/testdata/onecase.txt index 16a368ddafd..e819513f354 100644 --- a/go/vt/vtgate/planbuilder/testdata/onecase.txt +++ b/go/vt/vtgate/planbuilder/testdata/onecase.txt @@ -1 +1 @@ -# Add your test case here for debugging and run go test -run=One. \ No newline at end of file +# Add your test case here for debugging and run go test -run=One. From f2f3fb1fd48b925ab150add8e8124e077b83a385 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 16 Jul 2021 10:39:38 +0200 Subject: [PATCH 12/14] refactor code to make it more readable Signed-off-by: Andres Taylor --- go/vt/vtgate/planbuilder/route_planning.go | 113 +++++++++++--------- go/vt/vtgate/planbuilder/selectGen4.go | 115 ++++++++++++++------- 2 files changed, 138 insertions(+), 90 deletions(-) diff --git a/go/vt/vtgate/planbuilder/route_planning.go b/go/vt/vtgate/planbuilder/route_planning.go index 230ef456a3f..91903c2af39 100644 --- a/go/vt/vtgate/planbuilder/route_planning.go +++ b/go/vt/vtgate/planbuilder/route_planning.go @@ -80,7 +80,14 @@ func newBuildSelectPlan(sel *sqlparser.Select, vschema ContextVSchema) (engine.P return nil, err } - plan, err = planHorizon(sel, plan, semTable, vschema) + hp := horizonPlanning{ + sel: sel, + plan: plan, + semTable: semTable, + vschema: vschema, + } + + plan, err = hp.planHorizon() if err != nil { return nil, err } @@ -154,91 +161,97 @@ func planLimit(limit *sqlparser.Limit, plan logicalPlan) (logicalPlan, error) { return lPlan, nil } -func planHorizon(sel *sqlparser.Select, plan logicalPlan, semTable *semantics.SemTable, vschema ContextVSchema) (logicalPlan, error) { - rb, ok := plan.(*route) - if !ok && semTable.ProjectionErr != nil { - return nil, semTable.ProjectionErr +type horizonPlanning struct { + sel *sqlparser.Select + plan logicalPlan + semTable *semantics.SemTable + vschema ContextVSchema + qp *abstract.QueryProjection + needsTruncation bool + vtgateGrouping bool +} + +func (hp horizonPlanning) planHorizon() (logicalPlan, error) { + rb, ok := hp.plan.(*route) + if !ok && hp.semTable.ProjectionErr != nil { + return nil, hp.semTable.ProjectionErr } if ok && rb.isSingleShard() { - createSingleShardRoutePlan(sel, rb) - return plan, nil + createSingleShardRoutePlan(hp.sel, rb) + return hp.plan, nil } - qp, err := abstract.CreateQPFromSelect(sel) + qp2, err := abstract.CreateQPFromSelect(hp.sel) if err != nil { return nil, err } - if err := checkUnsupportedConstructs(sel, qp); err != nil { + hp.qp = qp2 + + if err := checkUnsupportedConstructs(hp.sel); err != nil { return nil, err } - var needsTruncation, vtgateGrouping bool - if qp.NeedsAggregation() { - plan, needsTruncation, vtgateGrouping, err = planAggregations(qp, plan, semTable, vschema) + if hp.qp.NeedsAggregation() { + err = hp.planAggregations() if err != nil { return nil, err } - - if _, planIsRoute := plan.(*route); !planIsRoute { - // if we had to build up additional operators around the route, we have to fail this query - for _, expr := range qp.SelectExprs { - colExpr := expr.Col.Expr - if !sqlparser.IsAggregation(colExpr) && sqlparser.ContainsAggregation(colExpr) { - return nil, vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") - } - } - - } - } else { - for _, e := range qp.SelectExprs { - if _, _, err := pushProjection(e.Col, plan, semTable, true, false); err != nil { + for _, e := range hp.qp.SelectExprs { + if _, _, err := pushProjection(e.Col, hp.plan, hp.semTable, true, false); err != nil { return nil, err } } } - if len(qp.OrderExprs) > 0 { - var colAdded bool - plan, colAdded, err = planOrderBy(qp, qp.OrderExprs, plan, semTable) + if len(hp.qp.OrderExprs) > 0 { + hp.plan, err = hp.planOrderBy(hp.qp.OrderExprs, hp.plan) if err != nil { return nil, err } - needsTruncation = needsTruncation || colAdded } - if qp.CanPushDownSorting && vtgateGrouping { - var colAdded bool - plan, colAdded, err = planOrderByUsingGroupBy(qp, plan, semTable) + if hp.qp.CanPushDownSorting && hp.vtgateGrouping { + hp.plan, err = hp.planOrderByUsingGroupBy() if err != nil { return nil, err } - needsTruncation = needsTruncation || colAdded } - if needsTruncation { - switch p := plan.(type) { - case *route: - p.eroute.SetTruncateColumnCount(sel.GetColumnCount()) - case *orderedAggregate: - p.eaggr.SetTruncateColumnCount(sel.GetColumnCount()) - case *memorySort: - p.truncater.SetTruncateColumnCount(sel.GetColumnCount()) - default: - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "plan type not known for column truncation: %T", plan) - } + err = hp.truncateColumnsIfNeeded() + if err != nil { + return nil, err } - if qp.NeedsDistinct() { - plan, err = pushDistinct(plan, semTable, vschema, qp) + if hp.qp.NeedsDistinct() { + hp.plan, err = pushDistinct(hp.plan, hp.semTable, hp.vschema, hp.qp) if err != nil { return nil, err } } - return plan, nil + return hp.plan, nil +} + +func (hp horizonPlanning) truncateColumnsIfNeeded() error { + if !hp.needsTruncation { + return nil + } + + switch p := hp.plan.(type) { + case *route: + p.eroute.SetTruncateColumnCount(hp.sel.GetColumnCount()) + case *orderedAggregate: + p.eaggr.SetTruncateColumnCount(hp.sel.GetColumnCount()) + case *memorySort: + p.truncater.SetTruncateColumnCount(hp.sel.GetColumnCount()) + default: + return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "plan type not known for column truncation: %T", hp.plan) + } + + return nil } func pushDistinct(plan logicalPlan, semTable *semantics.SemTable, vschema ContextVSchema, qp *abstract.QueryProjection) (logicalPlan, error) { @@ -255,7 +268,7 @@ func pushDistinct(plan logicalPlan, semTable *semantics.SemTable, vschema Contex return newDistinct(plan), nil default: - return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "no you didnt %T", plan) + return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown plan type for DISTINCT %T", plan) } } @@ -311,7 +324,7 @@ func createSingleShardRoutePlan(sel *sqlparser.Select, rb *route) { } } -func checkUnsupportedConstructs(sel *sqlparser.Select, qp *abstract.QueryProjection) error { +func checkUnsupportedConstructs(sel *sqlparser.Select) error { if sel.Having != nil { return semantics.Gen4NotSupportedF("HAVING") } diff --git a/go/vt/vtgate/planbuilder/selectGen4.go b/go/vt/vtgate/planbuilder/selectGen4.go index 90131187845..14d3c31900e 100644 --- a/go/vt/vtgate/planbuilder/selectGen4.go +++ b/go/vt/vtgate/planbuilder/selectGen4.go @@ -121,31 +121,36 @@ func checkIfAlreadyExists(expr *sqlparser.AliasedExpr, sel *sqlparser.Select) in return -1 } -func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable *semantics.SemTable, vschema ContextVSchema) (logicalPlan, bool, bool, error) { - newPlan := plan +func (hp *horizonPlanning) haveToTruncate(v bool) { + hp.needsTruncation = hp.needsTruncation || v +} + +func (hp *horizonPlanning) planAggregations() error { + newPlan := hp.plan var oa *orderedAggregate - if !hasUniqueVindex(vschema, semTable, qp.GroupByExprs) { + if !hasUniqueVindex(hp.vschema, hp.semTable, hp.qp.GroupByExprs) { eaggr := &engine.OrderedAggregate{} oa = &orderedAggregate{ resultsBuilder: resultsBuilder{ - logicalPlanCommon: newBuilderCommon(plan), + logicalPlanCommon: newBuilderCommon(hp.plan), weightStrings: make(map[*resultColumn]int), truncater: eaggr, }, eaggr: eaggr, } newPlan = oa + hp.vtgateGrouping = true } - for _, e := range qp.SelectExprs { - offset, _, err := pushProjection(e.Col, plan, semTable, true, false) + for _, e := range hp.qp.SelectExprs { + offset, _, err := pushProjection(e.Col, hp.plan, hp.semTable, true, false) if err != nil { - return nil, false, false, err + return err } if e.Aggr && oa != nil { fExpr, isFunc := e.Col.Expr.(*sqlparser.FuncExpr) if !isFunc { - return nil, false, false, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") + return vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") } opcode := engine.SupportedAggregates[fExpr.Name.Lowered()] oa.eaggr.Aggregates = append(oa.eaggr.Aggregates, engine.AggregateParams{ @@ -156,34 +161,47 @@ func planAggregations(qp *abstract.QueryProjection, plan logicalPlan, semTable * } } - var colAdded bool - for _, groupExpr := range qp.GroupByExprs { - added, err := planGroupByGen4(groupExpr, newPlan, semTable) + for _, groupExpr := range hp.qp.GroupByExprs { + added, err := planGroupByGen4(groupExpr, newPlan, hp.semTable) if err != nil { - return nil, false, false, err + return err } - colAdded = colAdded || added + hp.haveToTruncate(added) } - if !qp.CanPushDownSorting && oa != nil { + if !hp.qp.CanPushDownSorting && oa != nil { var orderExprs []abstract.OrderBy // if we can't at a later stage push down the sorting to our inputs, we have to do ordering here - for _, groupExpr := range qp.GroupByExprs { + for _, groupExpr := range hp.qp.GroupByExprs { orderExprs = append(orderExprs, abstract.OrderBy{ Inner: &sqlparser.Order{Expr: groupExpr.Inner}, WeightStrExpr: groupExpr.WeightStrExpr}, ) } if len(orderExprs) > 0 { - newInput, added, err := planOrderBy(qp, orderExprs, plan, semTable) + newInput, err := hp.planOrderBy(orderExprs, hp.plan) if err != nil { - return nil, false, false, err + return err } oa.input = newInput - return oa, colAdded || added, true, nil + hp.plan = oa } + } else { + hp.plan = newPlan } - return newPlan, colAdded, oa != nil, nil + + // done with aggregation planning. let's check if we should fail the query + if _, planIsRoute := hp.plan.(*route); !planIsRoute { + // if we had to build up additional operators around the route, we have to fail this query + for _, expr := range hp.qp.SelectExprs { + colExpr := expr.Col.Expr + if !sqlparser.IsAggregation(colExpr) && sqlparser.ContainsAggregation(colExpr) { + return vterrors.New(vtrpcpb.Code_UNIMPLEMENTED, "unsupported: in scatter query: complex aggregate expression") + } + } + } + + return nil } func hasUniqueVindex(vschema ContextVSchema, semTable *semantics.SemTable, groupByExprs []abstract.GroupBy) bool { @@ -217,11 +235,11 @@ func planGroupByGen4(groupExpr abstract.GroupBy, plan logicalPlan, semTable *sem } } -func planOrderByUsingGroupBy(qp *abstract.QueryProjection, plan logicalPlan, semTable *semantics.SemTable) (logicalPlan, bool, error) { +func (hp *horizonPlanning) planOrderByUsingGroupBy() (logicalPlan, error) { var orderExprs []abstract.OrderBy - for _, groupExpr := range qp.GroupByExprs { + for _, groupExpr := range hp.qp.GroupByExprs { addExpr := true - for _, orderExpr := range qp.OrderExprs { + for _, orderExpr := range hp.qp.OrderExprs { if sqlparser.EqualsExpr(groupExpr.Inner, orderExpr.Inner.Expr) { addExpr = false break @@ -235,37 +253,47 @@ func planOrderByUsingGroupBy(qp *abstract.QueryProjection, plan logicalPlan, sem } } if len(orderExprs) > 0 { - return planOrderBy(qp, orderExprs, plan, semTable) + return hp.planOrderBy(orderExprs, hp.plan) } - return plan, false, nil + return hp.plan, nil } -func planOrderBy(qp *abstract.QueryProjection, orderExprs []abstract.OrderBy, plan logicalPlan, semTable *semantics.SemTable) (logicalPlan, bool, error) { +func (hp *horizonPlanning) planOrderBy(orderExprs []abstract.OrderBy, plan logicalPlan) (logicalPlan, error) { switch plan := plan.(type) { case *route: - return planOrderByForRoute(orderExprs, plan, semTable) + newPlan, truncate, err := planOrderByForRoute(orderExprs, plan, hp.semTable) + if err != nil { + return nil, err + } + hp.haveToTruncate(truncate) + return newPlan, nil case *joinGen4: - return planOrderByForJoin(qp, orderExprs, plan, semTable) + newPlan, err := hp.planOrderByForJoin(orderExprs, plan) + if err != nil { + return nil, err + } + + return newPlan, nil case *orderedAggregate: for _, order := range orderExprs { if sqlparser.ContainsAggregation(order.WeightStrExpr) { ms, err := createMemorySortPlanOnAggregation(plan, orderExprs) if err != nil { - return nil, false, err + return nil, err } - return ms, false, nil + return ms, nil } } - newInput, colAdded, err := planOrderBy(qp, orderExprs, plan.input, semTable) + newInput, err := hp.planOrderBy(orderExprs, plan.input) if err != nil { - return nil, false, err + return nil, err } plan.input = newInput - return plan, colAdded, nil + return plan, nil case *memorySort: - return plan, false, nil + return plan, nil default: - return nil, false, semantics.Gen4NotSupportedF("ordering on complex query %T", plan) + return nil, semantics.Gen4NotSupportedF("ordering on complex query %T", plan) } } @@ -335,16 +363,23 @@ func needsWeightString(tbl semantics.TableInfo, colName *sqlparser.ColName) bool return true // we didn't find the column. better to add just to be safe1 } -func planOrderByForJoin(qp *abstract.QueryProjection, orderExprs []abstract.OrderBy, plan *joinGen4, semTable *semantics.SemTable) (logicalPlan, bool, error) { - if allLeft(orderExprs, semTable, plan.Left.ContainsTables()) { - newLeft, _, err := planOrderBy(qp, orderExprs, plan.Left, semTable) +func (hp *horizonPlanning) planOrderByForJoin(orderExprs []abstract.OrderBy, plan *joinGen4) (logicalPlan, error) { + if allLeft(orderExprs, hp.semTable, plan.Left.ContainsTables()) { + newLeft, err := hp.planOrderBy(orderExprs, plan.Left) if err != nil { - return nil, false, err + return nil, err } plan.Left = newLeft - return plan, false, nil + hp.needsTruncation = false // since this is a join, we can safely + // add extra columns and not need to truncate them + return plan, nil + } + sortPlan, truncate, err := createMemorySortPlan(plan, orderExprs, hp.semTable) + if err != nil { + return nil, err } - return createMemorySortPlan(plan, orderExprs, semTable) + hp.haveToTruncate(truncate) + return sortPlan, nil } func createMemorySortPlanOnAggregation(plan *orderedAggregate, orderExprs []abstract.OrderBy) (logicalPlan, error) { From b8d193f3419828a7542cb2904f82d4bd1d167cd8 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 16 Jul 2021 10:44:56 +0200 Subject: [PATCH 13/14] move code around Signed-off-by: Andres Taylor --- .../{selectGen4.go => horizon_planning.go} | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) rename go/vt/vtgate/planbuilder/{selectGen4.go => horizon_planning.go} (97%) diff --git a/go/vt/vtgate/planbuilder/selectGen4.go b/go/vt/vtgate/planbuilder/horizon_planning.go similarity index 97% rename from go/vt/vtgate/planbuilder/selectGen4.go rename to go/vt/vtgate/planbuilder/horizon_planning.go index 14d3c31900e..dc27578b58b 100644 --- a/go/vt/vtgate/planbuilder/selectGen4.go +++ b/go/vt/vtgate/planbuilder/horizon_planning.go @@ -374,11 +374,10 @@ func (hp *horizonPlanning) planOrderByForJoin(orderExprs []abstract.OrderBy, pla // add extra columns and not need to truncate them return plan, nil } - sortPlan, truncate, err := createMemorySortPlan(plan, orderExprs, hp.semTable) + sortPlan, err := hp.createMemorySortPlan(plan, orderExprs) if err != nil { return nil, err } - hp.haveToTruncate(truncate) return sortPlan, nil } @@ -422,7 +421,7 @@ func findExprInOrderedAggr(plan *orderedAggregate, order abstract.OrderBy) (int, return 0, 0, false } -func createMemorySortPlan(plan logicalPlan, orderExprs []abstract.OrderBy, semTable *semantics.SemTable) (logicalPlan, bool, error) { +func (hp *horizonPlanning) createMemorySortPlan(plan logicalPlan, orderExprs []abstract.OrderBy) (logicalPlan, error) { primitive := &engine.MemorySort{} ms := &memorySort{ resultsBuilder: resultsBuilder{ @@ -433,13 +432,12 @@ func createMemorySortPlan(plan logicalPlan, orderExprs []abstract.OrderBy, semTa eMemorySort: primitive, } - var colAdded bool for _, order := range orderExprs { - offset, weightStringOffset, added, err := wrapAndPushExpr(order.Inner.Expr, order.WeightStrExpr, plan, semTable) + offset, weightStringOffset, added, err := wrapAndPushExpr(order.Inner.Expr, order.WeightStrExpr, plan, hp.semTable) if err != nil { - return nil, false, err + return nil, err } - colAdded = colAdded || added + hp.haveToTruncate(added) ms.eMemorySort.OrderBy = append(ms.eMemorySort.OrderBy, engine.OrderByParams{ Col: offset, WeightStringCol: weightStringOffset, @@ -447,7 +445,7 @@ func createMemorySortPlan(plan logicalPlan, orderExprs []abstract.OrderBy, semTa StarColFixedIndex: offset, }) } - return ms, colAdded, nil + return ms, nil } func allLeft(orderExprs []abstract.OrderBy, semTable *semantics.SemTable, lhsTables semantics.TableSet) bool { From 548a08afb1983734ef6bd8e87a2ffb63c20dbbcb Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 16 Jul 2021 15:05:29 +0200 Subject: [PATCH 14/14] clean ups Signed-off-by: Andres Taylor --- go/vt/sqlparser/ast_funcs.go | 2 +- go/vt/vtgate/planbuilder/ordered_aggregate.go | 24 +------- go/vt/vtgate/planbuilder/project.go | 2 +- .../testdata/postprocess_cases.txt | 60 +++++++++++++++++++ .../planbuilder/testdata/select_cases.txt | 4 +- .../testdata/unsupported_cases.txt | 60 ------------------- 6 files changed, 65 insertions(+), 87 deletions(-) diff --git a/go/vt/sqlparser/ast_funcs.go b/go/vt/sqlparser/ast_funcs.go index 248cb35ec2c..de6df303125 100644 --- a/go/vt/sqlparser/ast_funcs.go +++ b/go/vt/sqlparser/ast_funcs.go @@ -1368,7 +1368,7 @@ func ToString(exprs []TableExpr) string { } // ContainsAggregation returns true if the expression contains aggregation -func ContainsAggregation(e Expr) bool { +func ContainsAggregation(e SQLNode) bool { hasAggregates := false _ = Walk(func(node SQLNode) (kontinue bool, err error) { if IsAggregation(node) { diff --git a/go/vt/vtgate/planbuilder/ordered_aggregate.go b/go/vt/vtgate/planbuilder/ordered_aggregate.go index d7e9a0a29bc..cf4bbed2e42 100644 --- a/go/vt/vtgate/planbuilder/ordered_aggregate.go +++ b/go/vt/vtgate/planbuilder/ordered_aggregate.go @@ -74,8 +74,7 @@ func (pb *primitiveBuilder) checkAggregates(sel *sqlparser.Select) error { } // Check if we can allow aggregates. - hasAggregates := nodeHasAggregates(sel.SelectExprs) || len(sel.GroupBy) > 0 - + hasAggregates := sqlparser.ContainsAggregation(sel.SelectExprs) || len(sel.GroupBy) > 0 if !hasAggregates && !sel.Distinct { return nil } @@ -134,27 +133,6 @@ func (pb *primitiveBuilder) checkAggregates(sel *sqlparser.Select) error { return nil } -func nodeHasAggregates(node sqlparser.SQLNode) bool { - hasAggregates := false - _ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { - switch node := node.(type) { - case *sqlparser.FuncExpr: - if node.IsAggregate() { - hasAggregates = true - return false, errors.New("unused error") - } - case *sqlparser.GroupConcatExpr: - hasAggregates = true - return false, errors.New("unused error") - case *sqlparser.Subquery: - // Subqueries are analyzed by themselves. - return false, nil - } - return true, nil - }, node) - return hasAggregates -} - // groupbyHasUniqueVindex looks ahead at the group by expression to see if // it references a unique vindex. // diff --git a/go/vt/vtgate/planbuilder/project.go b/go/vt/vtgate/planbuilder/project.go index 3a1989cfee4..669118346e4 100644 --- a/go/vt/vtgate/planbuilder/project.go +++ b/go/vt/vtgate/planbuilder/project.go @@ -83,7 +83,7 @@ func planProjection(pb *primitiveBuilder, in logicalPlan, expr *sqlparser.Aliase } // Ensure that there are no aggregates in the expression. - if nodeHasAggregates(expr.Expr) { + if sqlparser.ContainsAggregation(expr.Expr) { return nil, nil, 0, errors.New("unsupported: in scatter query: complex aggregate expression") } diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt index a06bec71dc1..3d956547db4 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.txt @@ -1546,3 +1546,63 @@ Gen4 plan same as above ] } } + +# join order by with ambiguous column reference ; valid in MySQL +"select name, name from user, music order by name" +"ambiguous symbol reference: `name`" +{ + "QueryType": "SELECT", + "Original": "select name, name from user, music order by name", + "Instructions": { + "OperatorType": "Join", + "Variant": "Join", + "JoinColumnIndexes": "-1,-1", + "TableName": "`user`_music", + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select `name`, weight_string(`name`) from `user` where 1 != 1", + "OrderBy": "(0|1) ASC", + "Query": "select `name`, weight_string(`name`) from `user` order by `name` asc", + "Table": "`user`" + }, + { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select 1 from music where 1 != 1", + "Query": "select 1 from music", + "Table": "music" + } + ] + } +} + +# order by with ambiguous column reference ; valid in MySQL +"select id, id from user order by id" +"ambiguous symbol reference: id" +{ + "QueryType": "SELECT", + "Original": "select id, id from user order by id", + "Instructions": { + "OperatorType": "Route", + "Variant": "SelectScatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id, id, weight_string(id) from `user` where 1 != 1", + "OrderBy": "(0|2) ASC", + "Query": "select id, id, weight_string(id) from `user` order by id asc", + "ResultColumns": 2, + "Table": "`user`" + } +} diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.txt b/go/vt/vtgate/planbuilder/testdata/select_cases.txt index 06aa843b48a..c4992c4b7ac 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.txt @@ -1687,10 +1687,10 @@ Gen4 plan same as above } # select from unsharded keyspace into outfile -"select * from main.unsharded into outfile 'x.txt' character set binary fields terminated by 'term' optionally enclosed by 'c' escaped by 'e' lines starting by 'a' terminated by '\n'" +"select * from main.unsharded into outfile 'x.txt' character set binary fields terminated by 'term' optionally enclosed by 'c' escaped by 'e' lines starting by 'a' terminated by '\\n'" { "QueryType": "SELECT", - "Original": "select * from main.unsharded into outfile 'x.txt' character set binary fields terminated by 'term' optionally enclosed by 'c' escaped by 'e' lines starting by 'a' terminated by '\n'", + "Original": "select * from main.unsharded into outfile 'x.txt' character set binary fields terminated by 'term' optionally enclosed by 'c' escaped by 'e' lines starting by 'a' terminated by '\\n'", "Instructions": { "OperatorType": "Route", "Variant": "SelectUnsharded", diff --git a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt index f0b32471c79..f8d35069956 100644 --- a/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt +++ b/go/vt/vtgate/planbuilder/testdata/unsupported_cases.txt @@ -118,66 +118,6 @@ Gen4 error: aggregate functions take a single argument 'count(a, b)' "select id, b as id, count(*) from user order by id" "ambiguous symbol reference: id" -# order by with ambiguous column reference ; valid in MySQL -"select id, id from user order by id" -"ambiguous symbol reference: id" -{ - "QueryType": "SELECT", - "Original": "select id, id from user order by id", - "Instructions": { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select id, id, weight_string(id) from `user` where 1 != 1", - "OrderBy": "(0|2) ASC", - "Query": "select id, id, weight_string(id) from `user` order by id asc", - "ResultColumns": 2, - "Table": "`user`" - } -} - -# join order by with ambiguous column reference ; valid in MySQL -"select name, name from user, music order by name" -"ambiguous symbol reference: `name`" -{ - "QueryType": "SELECT", - "Original": "select name, name from user, music order by name", - "Instructions": { - "OperatorType": "Join", - "Variant": "Join", - "JoinColumnIndexes": "-1,-1", - "TableName": "`user`_music", - "Inputs": [ - { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select `name`, weight_string(`name`) from `user` where 1 != 1", - "OrderBy": "(0|1) ASC", - "Query": "select `name`, weight_string(`name`) from `user` order by `name` asc", - "Table": "`user`" - }, - { - "OperatorType": "Route", - "Variant": "SelectScatter", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select 1 from music where 1 != 1", - "Query": "select 1 from music", - "Table": "music" - } - ] - } -} - # scatter aggregate with ambiguous aliases "select distinct a, b as a from user" "generating order by clause: ambiguous symbol reference: a"