Skip to content

Commit

Permalink
[release-20.0-rc] feat: add a LIMIT 1 on EXISTS subqueries to limit n…
Browse files Browse the repository at this point in the history
…etwork overhead (#16153) (#16192)

Signed-off-by: Andres Taylor <[email protected]>
Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com>
  • Loading branch information
vitess-bot[bot] authored Jun 15, 2024
1 parent 1f308de commit 6986d74
Show file tree
Hide file tree
Showing 9 changed files with 485 additions and 89 deletions.
165 changes: 164 additions & 1 deletion go/vt/vtgate/planbuilder/operators/query_planning.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package operators
import (
"fmt"
"io"
"strconv"

"vitess.io/vitess/go/vt/sqlparser"
"vitess.io/vitess/go/vt/vterrors"
Expand Down Expand Up @@ -244,8 +245,170 @@ func tryPushLimit(ctx *plancontext.PlanningContext, in *Limit) (Operator, *Apply
}

return src, Rewrote(fmt.Sprintf("push limit to %s of apply join", side))
case *Limit:
combinedLimit := mergeLimits(in.AST, src.AST)
if combinedLimit == nil {
break
}
// we can remove the other LIMIT
in.AST = combinedLimit
in.Source = src.Source
return in, Rewrote("merged two limits")

}
return setUpperLimit(in)
}

func mergeLimits(l1, l2 *sqlparser.Limit) *sqlparser.Limit {
// To merge two relational LIMIT operators with LIMIT and OFFSET, we need to combine their
// LIMIT and OFFSET values appropriately.
// Let's denote the first LIMIT operator as LIMIT_1 with LIMIT_1 and OFFSET_1,
// and the second LIMIT operator as LIMIT_2 with LIMIT_2 and OFFSET_2.
// The second LIMIT operator receives the output of the first LIMIT operator, meaning the first LIMIT and
// OFFSET are applied first, and then the second LIMIT and OFFSET are applied to the resulting subset.
//
// The goal is to determine the effective combined LIMIT and OFFSET values when applying these two operators sequentially.
//
// Combined Offset:
// The combined offset (OFFSET_combined) is the sum of the two offsets because you need to skip OFFSET_1 rows first,
// and then apply the second offset OFFSET_2 to the result.
// OFFSET_combined = OFFSET_1 + OFFSET_2

// Combined Limit:
// The combined limit (LIMIT_combined) needs to account for both limits. The effective limit should not exceed the rows returned by the first limit,
// so it is the minimum of the remaining rows after the first offset and the second limit.
// LIMIT_combined = min(LIMIT_2, LIMIT_1 - OFFSET_2)

// Note: If LIMIT_1 - OFFSET_2 is negative or zero, it means there are no rows left to limit, so LIMIT_combined should be zero.

// Example:
// First LIMIT operator: LIMIT 10 OFFSET 5 (LIMIT_1 = 10, OFFSET_1 = 5)
// Second LIMIT operator: LIMIT 7 OFFSET 3 (LIMIT_2 = 7, OFFSET_2 = 3)

// Calculations:
// Combined OFFSET:
// OFFSET_combined = 5 + 3 = 8

// Combined LIMIT:
// remaining rows after OFFSET_2 = 10 - 3 = 7
// LIMIT_combined = min(7, 7) = 7

// So, the combined result would be:
// LIMIT 7 OFFSET 8

// This method ensures that the final combined LIMIT and OFFSET correctly reflect the sequential application of the two original operators.
combinedLimit, failed := mergeLimitExpressions(l1.Rowcount, l2.Rowcount, l2.Offset)
if failed {
return nil
}
combinedOffset, failed := mergeOffsetExpressions(l1.Offset, l2.Offset)
if failed {
return nil
}

return &sqlparser.Limit{
Offset: combinedOffset,
Rowcount: combinedLimit,
}
}

func mergeOffsetExpressions(e1, e2 sqlparser.Expr) (expr sqlparser.Expr, failed bool) {
switch {
case e1 == nil && e2 == nil:
return nil, false
case e1 == nil:
return e2, false
case e2 == nil:
return e1, false
default:
return setUpperLimit(in)
v1str, ok := e1.(*sqlparser.Literal)
if !ok {
return nil, true
}
v2str, ok := e2.(*sqlparser.Literal)
if !ok {
return nil, true
}
v1, _ := strconv.Atoi(v1str.Val)
v2, _ := strconv.Atoi(v2str.Val)
return sqlparser.NewIntLiteral(strconv.Itoa(v1 + v2)), false
}
}

// mergeLimitExpressions merges two LIMIT expressions with an OFFSET expression.
// l1: First LIMIT expression.
// l2: Second LIMIT expression.
// off2: Second OFFSET expression.
// Returns the merged LIMIT expression and a boolean indicating if the merge failed.
func mergeLimitExpressions(l1, l2, off2 sqlparser.Expr) (expr sqlparser.Expr, failed bool) {
switch {
// If both limits are nil, there's nothing to merge, return nil without failure.
case l1 == nil && l2 == nil:
return nil, false

// If the first limit is nil, the second limit determines the final limit.
case l1 == nil:
return l2, false

// If the second limit is nil, calculate the remaining limit after applying the offset to the first limit.
case l2 == nil:
if off2 == nil {
// No offset, so the first limit is used directly.
return l1, false
}
off2, ok := off2.(*sqlparser.Literal)
if !ok {
// If the offset is not a literal, fail the merge.
return nil, true
}
lim1str, ok := l1.(*sqlparser.Literal)
if !ok {
// If the first limit is not a literal, return the first limit without failing.
return nil, false
}
// Calculate the remaining limit after the offset.
off2int, _ := strconv.Atoi(off2.Val)
l1int, _ := strconv.Atoi(lim1str.Val)
lim := l1int - off2int
if lim < 0 {
lim = 0
}
return sqlparser.NewIntLiteral(strconv.Itoa(lim)), false

default:
v1str, ok1 := l1.(*sqlparser.Literal)
if ok1 && v1str.Val == "1" {
// If the first limit is "1", it dominates, so return it.
return l1, false
}
v2str, ok2 := l2.(*sqlparser.Literal)
if ok2 && v2str.Val == "1" {
// If the second limit is "1", it dominates, so return it.
return l2, false
}
if !ok1 || !ok2 {
// If either limit is not a literal, fail the merge.
return nil, true
}

var off2int int
if off2 != nil {
off2, ok := off2.(*sqlparser.Literal)
if !ok {
// If the offset is not a literal, fail the merge.
return nil, true
}
off2int, _ = strconv.Atoi(off2.Val)
}

v1, _ := strconv.Atoi(v1str.Val)
v2, _ := strconv.Atoi(v2str.Val)
lim := min(v2, v1-off2int)
if lim < 0 {
// If the combined limit is negative, set it to zero.
lim = 0
}
return sqlparser.NewIntLiteral(strconv.Itoa(lim)), false
}
}

Expand Down
185 changes: 185 additions & 0 deletions go/vt/vtgate/planbuilder/operators/query_planning_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
Copyright 2024 The Vitess Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package operators

import (
"testing"

"github.com/stretchr/testify/assert"

"vitess.io/vitess/go/vt/sqlparser"
)

func TestMergeOffsetExpressions(t *testing.T) {
tests := []struct {
name string
offset1 sqlparser.Expr
offset2 sqlparser.Expr
expectedExpr sqlparser.Expr
expectedFailed bool
}{
{
name: "both offsets are integers",
offset1: sqlparser.NewIntLiteral("5"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: sqlparser.NewIntLiteral("8"),
expectedFailed: false,
},
{
name: "first offset is nil",
offset1: nil,
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: sqlparser.NewIntLiteral("3"),
expectedFailed: false,
},
{
name: "second offset is nil",
offset1: sqlparser.NewIntLiteral("5"),
offset2: nil,
expectedExpr: sqlparser.NewIntLiteral("5"),
expectedFailed: false,
},
{
name: "both offsets are nil",
offset1: nil,
offset2: nil,
expectedExpr: nil,
expectedFailed: false,
},
{
name: "first offset is argument",
offset1: sqlparser.NewArgument("offset1"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: nil,
expectedFailed: true,
},
{
name: "second offset is argument",
offset1: sqlparser.NewIntLiteral("5"),
offset2: sqlparser.NewArgument("offset2"),
expectedExpr: nil,
expectedFailed: true,
},
{
name: "both offsets are arguments",
offset1: sqlparser.NewArgument("offset1"),
offset2: sqlparser.NewArgument("offset2"),
expectedExpr: nil,
expectedFailed: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
expr, failed := mergeOffsetExpressions(tt.offset1, tt.offset2)
assert.Equal(t, tt.expectedExpr, expr)
assert.Equal(t, tt.expectedFailed, failed, "failed")
})
}
}

func TestMergeLimitExpressions(t *testing.T) {
tests := []struct {
name string
limit1 sqlparser.Expr
limit2 sqlparser.Expr
offset2 sqlparser.Expr
expectedExpr sqlparser.Expr
expectedFailed bool
}{
{
name: "valid limits and offset",
limit1: sqlparser.NewIntLiteral("10"),
limit2: sqlparser.NewIntLiteral("7"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: sqlparser.NewIntLiteral("7"),
expectedFailed: false,
},
{
name: "remaining rows after offset2 is zero",
limit1: sqlparser.NewIntLiteral("3"),
limit2: sqlparser.NewIntLiteral("7"),
offset2: sqlparser.NewIntLiteral("5"),
expectedExpr: sqlparser.NewIntLiteral("0"),
expectedFailed: false,
},
{
name: "first limit is nil",
limit1: nil,
limit2: sqlparser.NewIntLiteral("7"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: sqlparser.NewIntLiteral("7"),
expectedFailed: false,
},
{
name: "second limit is nil",
limit1: sqlparser.NewIntLiteral("10"),
limit2: nil,
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: sqlparser.NewIntLiteral("7"),
expectedFailed: false,
},
{
name: "offset2 is nil",
limit1: sqlparser.NewIntLiteral("10"),
limit2: sqlparser.NewIntLiteral("7"),
offset2: nil,
expectedExpr: sqlparser.NewIntLiteral("7"),
expectedFailed: false,
},
{
name: "first limit is argument",
limit1: sqlparser.NewArgument("limit1"),
limit2: sqlparser.NewIntLiteral("7"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: nil,
expectedFailed: true,
},
{
name: "second limit is argument",
limit1: sqlparser.NewIntLiteral("10"),
limit2: sqlparser.NewArgument("limit2"),
offset2: sqlparser.NewIntLiteral("3"),
expectedExpr: nil,
expectedFailed: true,
},
{
name: "offset2 is argument",
limit1: sqlparser.NewIntLiteral("10"),
limit2: sqlparser.NewIntLiteral("7"),
offset2: sqlparser.NewArgument("offset2"),
expectedExpr: nil,
expectedFailed: true,
},
{
name: "all are arguments",
limit1: sqlparser.NewArgument("limit1"),
limit2: sqlparser.NewArgument("limit2"),
offset2: sqlparser.NewArgument("offset2"),
expectedExpr: nil,
expectedFailed: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
expr, failed := mergeLimitExpressions(tt.limit1, tt.limit2, tt.offset2)
assert.Equal(t, tt.expectedExpr, expr)
assert.Equal(t, tt.expectedFailed, failed, "failed")
})
}
}
12 changes: 12 additions & 0 deletions go/vt/vtgate/planbuilder/operators/subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,21 @@ func (sq *SubQuery) settle(ctx *plancontext.PlanningContext, outer Operator) Ope
var correlatedSubqueryErr = vterrors.VT12001("correlated subquery is only supported for EXISTS")
var subqueryNotAtTopErr = vterrors.VT12001("unmergable subquery can not be inside complex expression")

func (sq *SubQuery) addLimit() {
// for a correlated subquery, we can add a limit 1 to the subquery
sq.Subquery = &Limit{
Source: sq.Subquery,
AST: &sqlparser.Limit{Rowcount: sqlparser.NewIntLiteral("1")},
Top: true,
}
}

func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operator) Operator {
if len(sq.Predicates) > 0 {
if sq.FilterType != opcode.PulloutExists {
panic(correlatedSubqueryErr)
}
sq.addLimit()
return outer
}

Expand Down Expand Up @@ -260,8 +270,10 @@ func (sq *SubQuery) settleFilter(ctx *plancontext.PlanningContext, outer Operato
var predicates []sqlparser.Expr
switch sq.FilterType {
case opcode.PulloutExists:
sq.addLimit()
predicates = append(predicates, sqlparser.NewArgument(hasValuesArg()))
case opcode.PulloutNotExists:
sq.addLimit()
sq.FilterType = opcode.PulloutExists // it's the same pullout as EXISTS, just with a NOT in front of the predicate
predicates = append(predicates, sqlparser.NewNotExpr(sqlparser.NewArgument(hasValuesArg())))
case opcode.PulloutIn:
Expand Down
Loading

0 comments on commit 6986d74

Please sign in to comment.