Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

colexec: fix LIKE operators when patterns have escape characters #68289

Merged
merged 2 commits into from
Aug 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 24 additions & 16 deletions pkg/sql/colexec/colbuilder/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1889,11 +1889,7 @@ func planSelectionOperators(
case tree.In, tree.NotIn:
negate := cmpOp.Symbol == tree.NotIn
datumTuple, ok := tree.AsDTuple(constArg)
if !ok || tupleContainsTuples(datumTuple) {
// Optimized IN operator is supported only on constant
// expressions that don't contain tuples (because tuples
// require special null-handling logic), so we fallback to
// the default comparison operator.
if !ok || useDefaultCmpOpForIn(datumTuple) {
break
}
op, err = colexec.GetInOperator(lTyp, leftOp, leftIdx, datumTuple, negate)
Expand Down Expand Up @@ -2249,10 +2245,20 @@ func planProjectionExpr(
}
allocator := colmem.NewAllocator(ctx, acc, factory)
resultIdx = -1

cmpProjOp, isCmpProjOp := projOp.(tree.ComparisonOperator)
var hasOptimizedOp bool
if isCmpProjOp {
switch cmpProjOp.Symbol {
case tree.Like, tree.NotLike, tree.In, tree.NotIn, tree.IsDistinctFrom, tree.IsNotDistinctFrom:
hasOptimizedOp = true
}
}
// There are 3 cases. Either the left is constant, the right is constant,
// or neither are constant.
if lConstArg, lConst := left.(tree.Datum); lConst {
// Case one: The left is constant.
if lConstArg, lConst := left.(tree.Datum); lConst && !hasOptimizedOp {
// Case one: The left is constant (and we don't have an optimized
// operator for this expression).
// Normally, the optimizer normalizes binary exprs so that the constant
// argument is on the right side. This doesn't happen for
// non-commutative operators such as - and /, though, so we still need
Expand Down Expand Up @@ -2292,8 +2298,6 @@ func planProjectionExpr(
right = tupleDatum
}

cmpProjOp, isCmpProjOp := projOp.(tree.ComparisonOperator)

// We have a special case behavior for Is{Not}DistinctFrom before
// checking whether the right expression is constant below in order to
// extract NULL from the cast expression.
Expand Down Expand Up @@ -2325,11 +2329,7 @@ func planProjectionExpr(
case tree.In, tree.NotIn:
negate := cmpProjOp.Symbol == tree.NotIn
datumTuple, ok := tree.AsDTuple(rConstArg)
if !ok || tupleContainsTuples(datumTuple) {
// Optimized IN operator is supported only on constant
// expressions that don't contain tuples (because tuples
// require special null-handling logic), so we fallback to
// the default comparison operator.
if !ok || useDefaultCmpOpForIn(datumTuple) {
break
}
op, err = colexec.GetInProjectionOperator(
Expand Down Expand Up @@ -2491,8 +2491,16 @@ func appendOneType(typs []*types.T, t *types.T) []*types.T {
return newTyps
}

func tupleContainsTuples(tuple *tree.DTuple) bool {
for _, typ := range tuple.ResolvedType().TupleContents() {
// useDefaultCmpOpForIn returns whether IN and NOT IN projection/selection
// operators should be handled via the default operators. This is the case when
// we have an empty tuple or the tuple contains other tuples (these cases
// require special null-handling logic).
func useDefaultCmpOpForIn(tuple *tree.DTuple) bool {
tupleContents := tuple.ResolvedType().TupleContents()
if len(tupleContents) == 0 {
return true
}
for _, typ := range tupleContents {
if typ.Family() == types.TupleFamily {
return true
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/sql/colexec/colexeccmp/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,19 @@ func GetLikeOperatorType(pattern string, negate bool) (LikeOpType, string, error
}
return LikeAlwaysMatch, "", nil
}
if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") {
// There are no wildcards in the middle of the string, so we only need to
// use a regular expression if both the first and last characters are
hasEscape := strings.Contains(pattern, `\`)
if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") && !hasEscape {
// There are no wildcards in the middle of the string as well as no
// escape characters in the whole string, so we only need to use a
// regular expression if both the first and last characters are
// wildcards.
//
// The presence of the escape characters breaks the assumptions of the
// optimized versions since we no longer could just use the string for a
// direct match - we'd need to do some preprocessing here to remove the
// escape characters.
// TODO(yuzefovich): add that preprocessing (for example, `\\` needs to
// be replaced with `\`).
firstChar := pattern[0]
lastChar := pattern[len(pattern)-1]
if !isWildcard(firstChar) && !isWildcard(lastChar) {
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/vectorize
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,12 @@ SELECT b FROM t66706@u WHERE NOT (b = 'foo')
----
bar
bar

# Regression test for ignoring the escaping in the LIKE pattern (#68040).
statement ok
CREATE TABLE t68040 (c) AS SELECT 'string with \ backslash'

query T
SELECT c FROM t68040 WHERE c LIKE '%\\%'
----
string with \ backslash