Skip to content

Commit

Permalink
Merge #68289
Browse files Browse the repository at this point in the history
68289: colexec: fix LIKE operators when patterns have escape characters r=yuzefovich a=yuzefovich

**colexec: fix LIKE operators when patterns have escape characters**

Fixes: #68040.

Release note (bug fix): Previously, CockroachDB could incorrectly
evaluate LIKE expressions when the pattern contained the escape
characters `\` if the expressions were executed via the vectorized
engine.

**colbuilder: force planning of optimized projection operators**

Whenever we're planning a projection expression, we have 3 cases: the
left is constant, the right is constants, and neither are constants. For
the second case we have some optimized operators. Previously, those
operators weren't exercised via the TestEval/vectorized because in the
eval tests the left side is constant. This commit switches the planning
to force planning of those optimized operators. This shouldn't really
have an effect on planning of actual queries.

This was prompted by the bug in LIKE operators that is fixed in the
previous commit. Had we forced the planning for our eval tests, we would
have caught it earlier.

This also revealed an incompatibility for our IN operator implementation when
the right side is an empty tuple which this commit also fixes. However,
I don't think this scenario can be hit in production because the
optimizer folds such an expression into correct `false`. Thus, there is
no release note.

Release note: None

Co-authored-by: Yahor Yuzefovich <[email protected]>
  • Loading branch information
craig[bot] and yuzefovich committed Aug 2, 2021
2 parents 0c94dba + 9b590d3 commit bab7f59
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 19 deletions.
40 changes: 24 additions & 16 deletions pkg/sql/colexec/colbuilder/execplan.go
Original file line number Diff line number Diff line change
Expand Up @@ -1914,11 +1914,7 @@ func planSelectionOperators(
case tree.In, tree.NotIn:
negate := cmpOp.Symbol == tree.NotIn
datumTuple, ok := tree.AsDTuple(constArg)
if !ok || tupleContainsTuples(datumTuple) {
// Optimized IN operator is supported only on constant
// expressions that don't contain tuples (because tuples
// require special null-handling logic), so we fallback to
// the default comparison operator.
if !ok || useDefaultCmpOpForIn(datumTuple) {
break
}
op, err = colexec.GetInOperator(lTyp, leftOp, leftIdx, datumTuple, negate)
Expand Down Expand Up @@ -2275,10 +2271,20 @@ func planProjectionExpr(
}
allocator := colmem.NewAllocator(ctx, acc, factory)
resultIdx = -1

cmpProjOp, isCmpProjOp := projOp.(tree.ComparisonOperator)
var hasOptimizedOp bool
if isCmpProjOp {
switch cmpProjOp.Symbol {
case tree.Like, tree.NotLike, tree.In, tree.NotIn, tree.IsDistinctFrom, tree.IsNotDistinctFrom:
hasOptimizedOp = true
}
}
// There are 3 cases. Either the left is constant, the right is constant,
// or neither are constant.
if lConstArg, lConst := left.(tree.Datum); lConst {
// Case one: The left is constant.
if lConstArg, lConst := left.(tree.Datum); lConst && !hasOptimizedOp {
// Case one: The left is constant (and we don't have an optimized
// operator for this expression).
// Normally, the optimizer normalizes binary exprs so that the constant
// argument is on the right side. This doesn't happen for
// non-commutative operators such as - and /, though, so we still need
Expand Down Expand Up @@ -2318,8 +2324,6 @@ func planProjectionExpr(
right = tupleDatum
}

cmpProjOp, isCmpProjOp := projOp.(tree.ComparisonOperator)

// We have a special case behavior for Is{Not}DistinctFrom before
// checking whether the right expression is constant below in order to
// extract NULL from the cast expression.
Expand Down Expand Up @@ -2351,11 +2355,7 @@ func planProjectionExpr(
case tree.In, tree.NotIn:
negate := cmpProjOp.Symbol == tree.NotIn
datumTuple, ok := tree.AsDTuple(rConstArg)
if !ok || tupleContainsTuples(datumTuple) {
// Optimized IN operator is supported only on constant
// expressions that don't contain tuples (because tuples
// require special null-handling logic), so we fallback to
// the default comparison operator.
if !ok || useDefaultCmpOpForIn(datumTuple) {
break
}
op, err = colexec.GetInProjectionOperator(
Expand Down Expand Up @@ -2517,8 +2517,16 @@ func appendOneType(typs []*types.T, t *types.T) []*types.T {
return newTyps
}

func tupleContainsTuples(tuple *tree.DTuple) bool {
for _, typ := range tuple.ResolvedType().TupleContents() {
// useDefaultCmpOpForIn returns whether IN and NOT IN projection/selection
// operators should be handled via the default operators. This is the case when
// we have an empty tuple or the tuple contains other tuples (these cases
// require special null-handling logic).
func useDefaultCmpOpForIn(tuple *tree.DTuple) bool {
tupleContents := tuple.ResolvedType().TupleContents()
if len(tupleContents) == 0 {
return true
}
for _, typ := range tupleContents {
if typ.Family() == types.TupleFamily {
return true
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/sql/colexec/colexeccmp/like_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,19 @@ func GetLikeOperatorType(pattern string, negate bool) (LikeOpType, string, error
}
return LikeAlwaysMatch, "", nil
}
if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") {
// There are no wildcards in the middle of the string, so we only need to
// use a regular expression if both the first and last characters are
hasEscape := strings.Contains(pattern, `\`)
if len(pattern) > 1 && !strings.ContainsAny(pattern[1:len(pattern)-1], "_%") && !hasEscape {
// There are no wildcards in the middle of the string as well as no
// escape characters in the whole string, so we only need to use a
// regular expression if both the first and last characters are
// wildcards.
//
// The presence of the escape characters breaks the assumptions of the
// optimized versions since we no longer could just use the string for a
// direct match - we'd need to do some preprocessing here to remove the
// escape characters.
// TODO(yuzefovich): add that preprocessing (for example, `\\` needs to
// be replaced with `\`).
firstChar := pattern[0]
lastChar := pattern[len(pattern)-1]
if !isWildcard(firstChar) && !isWildcard(lastChar) {
Expand Down
9 changes: 9 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/vectorize
Original file line number Diff line number Diff line change
Expand Up @@ -1271,3 +1271,12 @@ SELECT b FROM t66706@u WHERE NOT (b = 'foo')
----
bar
bar

# Regression test for ignoring the escaping in the LIKE pattern (#68040).
statement ok
CREATE TABLE t68040 (c) AS SELECT 'string with \ backslash'

query T
SELECT c FROM t68040 WHERE c LIKE '%\\%'
----
string with \ backslash

0 comments on commit bab7f59

Please sign in to comment.