Skip to content

Commit

Permalink
opt: fix like escape processing for span constraints
Browse files Browse the repository at this point in the history
Fixes: #44123

Previously no attempt was made to properly handle escape ('\\') sequence
in like patterns being turned into constraints. Refactor code used to
process like at runtime to generate a regexp and use that to properly
handle index constraint generation.

Release note (sql change): Escape character processing was missing from
constraint span generation which resulted in incorrect results when
doing escaped like lookups.

Release justification: visible logical error
  • Loading branch information
cucaroach committed Jan 4, 2022
1 parent dc72c17 commit 53c860b
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 31 deletions.
32 changes: 31 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/select_index
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ statement ok
CREATE TABLE str (k INT PRIMARY KEY, v STRING, INDEX(v))

statement ok
INSERT INTO str VALUES (1, 'A'), (4, 'AB'), (2, 'ABC'), (5, 'ABCD'), (3, 'ABCDEZ'), (9, 'ABD')
INSERT INTO str VALUES (1, 'A'), (4, 'AB'), (2, 'ABC'), (5, 'ABCD'), (3, 'ABCDEZ'), (9, 'ABD'), (10, '\CBA'), (11, 'A%'), (12, 'CAB.*'), (13, 'CABD')

query IT rowsort
SELECT k, v FROM str WHERE v LIKE 'ABC%'
Expand All @@ -263,11 +263,41 @@ SELECT k, v FROM str WHERE v LIKE 'ABC%'
5 ABCD
3 ABCDEZ

query IT rowsort
SELECT k, v FROM str WHERE v LIKE '\ABC%'
----
2 ABC
5 ABCD
3 ABCDEZ

statement error LIKE regexp compilation failed: LIKE pattern must not end with escape character
SELECT k, v FROM str WHERE v LIKE 'ABC\'

query IT rowsort
SELECT k, v FROM str WHERE v LIKE '\\CBA%'
----
10 \CBA

query IT rowsort
SELECT k, v FROM str WHERE v LIKE 'A\%'
----
11 A%

query IT rowsort
SELECT k, v FROM str WHERE v LIKE 'CAB.*'
----
12 CAB.*

query IT rowsort
SELECT k, v FROM str WHERE v LIKE 'ABC%Z'
----
3 ABCDEZ

query IT rowsort
SELECT k, v FROM str WHERE v LIKE '\ABCDE_'
----
3 ABCDEZ

query IT rowsort
SELECT k, v FROM str WHERE v SIMILAR TO 'ABC_*'
----
Expand Down
33 changes: 20 additions & 13 deletions pkg/sql/opt/idxconstraint/index_constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,21 +280,28 @@ func (c *indexConstraintCtx) makeSpansForSingleColumnDatum(

case opt.LikeOp:
if s, ok := tree.AsDString(datum); ok {
if i := strings.IndexAny(string(s), "_%"); i >= 0 {
if i == 0 {
// Mask starts with _ or %.
c.unconstrained(offset, out)
return false
// Normalize the like pattern to a regexp.
if pattern, err := tree.LikeEscape(string(s)); err == nil {
if re, err := regexp.Compile(pattern); err == nil {
prefix, complete := re.LiteralPrefix()
if complete {
c.eqSpan(offset, tree.NewDString(prefix), out)
return true
}
if prefix == "" {
// Mask starts with _ or %.
c.unconstrained(offset, out)
return false
}
c.makeStringPrefixSpan(offset, prefix, out)
// If pattern is simply prefix + .* the span is tight. Also pattern
// will have regexp special chars escaped and so prefix needs to be
// escaped too.
if prefixEscape, err := tree.LikeEscape(prefix); err == nil {
return strings.HasSuffix(pattern, ".*") && strings.TrimSuffix(pattern, ".*") == prefixEscape
}
}
c.makeStringPrefixSpan(offset, string(s[:i]), out)
// A mask like ABC% is equivalent to restricting the prefix to ABC.
// A mask like ABC%Z requires restricting the prefix, but is a stronger
// condition.
return (i == len(s)-1) && s[i] == '%'
}
// No wildcard characters, this is an equality.
c.eqSpan(offset, &s, out)
return true
}

case opt.SimilarToOp:
Expand Down
89 changes: 89 additions & 0 deletions pkg/sql/opt/idxconstraint/testdata/strings
Original file line number Diff line number Diff line change
@@ -1,3 +1,80 @@
index-constraints vars=(a string) index=a
a LIKE 'ABC'
----
[/'ABC' - /'ABC']

# A backslash that isn't escaping anything is just removed from pattern.
index-constraints vars=(a string) index=a
a LIKE '\aABC%'
----
[/'aABC' - /'aABD')

# A backslash that isn't escaping anything is just removed from pattern.
index-constraints vars=(a string) index=a
a LIKE 'A\BC%'
----
[/'ABC' - /'ABD')

# Currently we punt on custom ESCAPE clauses.
index-constraints vars=(a string) index=a
a LIKE '\aABC%' ESCAPE '|'
----
[ - ]
Remaining filter: like_escape(a, e'\\aABC%', '|')

# Single char wildcard requires remaining filter.
index-constraints vars=(a string) index=a
a LIKE '\aABC_'
----
[/'aABC' - /'aABD')
Remaining filter: a LIKE e'\\aABC_'

# Ending with wildcard with other wildcards present isn't tight.
index-constraints vars=(a string) index=a
a LIKE 'AB_C%'
----
[/'AB' - /'AC')
Remaining filter: a LIKE 'AB_C%'

# Ignore zero prefix (wildcard at beginning).
index-constraints vars=(a string) index=a
a LIKE '%ABC'
----
(/NULL - ]
Remaining filter: a LIKE '%ABC'

# Ignore zero prefix (wildcard at beginning).
index-constraints vars=(a string) index=a
a LIKE '_ABC'
----
(/NULL - ]
Remaining filter: a LIKE '_ABC'

# A backslash that is escaping a wildcard becomes equality.
index-constraints vars=(a string) index=a
a LIKE 'ABC\%'
----
[/'ABC%' - /'ABC%']

# A backslash that is escaping a wildcard becomes equality.
index-constraints vars=(a string) index=a
a LIKE 'ABC\_'
----
[/'ABC_' - /'ABC_']

# A backslash that is escaping a wildcard becomes equality.
index-constraints vars=(a string) index=a
a LIKE 'ABC\_Z'
----
[/'ABC_Z' - /'ABC_Z']

# Invalid pattern does not generate index constraints.
index-constraints vars=(a string) index=a
a LIKE 'ABC\'
----
(/NULL - ]
Remaining filter: a LIKE e'ABC\\'

index-constraints vars=(a string) index=a
a LIKE 'ABC%'
----
Expand Down Expand Up @@ -60,6 +137,18 @@ a SIMILAR TO 'ABC.*Z'
[/'ABC' - /'ABD')
Remaining filter: a SIMILAR TO 'ABC.*Z'

index-constraints vars=(a string) index=(a)
a SIMILAR TO 'ABC%Z'
----
[/'ABC' - /'ABD')
Remaining filter: a SIMILAR TO 'ABC%Z'

index-constraints vars=(a string) index=(a)
a SIMILAR TO 'ABC_Z'
----
[/'ABC' - /'ABD')
Remaining filter: a SIMILAR TO 'ABC_Z'

index-constraints vars=(a string) index=(a)
a SIMILAR TO 'ABC'
----
Expand Down
30 changes: 18 additions & 12 deletions pkg/sql/opt/memo/constraint_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,20 +176,26 @@ func (cb *constraintsBuilder) buildSingleColumnConstraintConst(

case opt.LikeOp:
if s, ok := tree.AsDString(datum); ok {
if i := strings.IndexAny(string(s), "_%"); i >= 0 {
if i == 0 {
// Mask starts with _ or %.
return unconstrained, false
// Normalize the like pattern to a RE
if pattern, err := tree.LikeEscape(string(s)); err == nil {
if re, err := regexp.Compile(pattern); err == nil {
prefix, complete := re.LiteralPrefix()
if complete {
return cb.eqSpan(col, tree.NewDString(prefix)), true
}
if prefix == "" {
// Mask starts with _ or %.
return unconstrained, false
}
c := cb.makeStringPrefixSpan(col, prefix)
// If pattern is simply prefix + .* the span is tight. Also pattern
// will have regexp special chars escaped and so prefix needs to be
// escaped too. The original string may have superfluous escape
if prefixEscape, err := tree.LikeEscape(prefix); err == nil {
return c, strings.HasSuffix(pattern, ".*") && strings.TrimSuffix(pattern, ".*") == prefixEscape
}
}
c := cb.makeStringPrefixSpan(col, string(s[:i]))
// A mask like ABC% is equivalent to restricting the prefix to ABC.
// A mask like ABC%Z requires restricting the prefix, but is a stronger
// condition.
tight := (i == len(s)-1) && s[i] == '%'
return c, tight
}
// No wildcard characters, this is an equality.
return cb.eqSpan(col, &s), true
}

case opt.SimilarToOp:
Expand Down
41 changes: 41 additions & 0 deletions pkg/sql/opt/memo/testdata/logprops/constraints
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,47 @@ select
├── variable: v:3 [type=string]
└── const: 'ABC%' [type=string]

opt
SELECT * FROM kuv WHERE v LIKE '\ABC%'
----
select
├── columns: k:1(int!null) u:2(float) v:3(string!null)
├── key: (1)
├── fd: (1)-->(2,3)
├── prune: (1,2)
├── interesting orderings: (+1)
├── scan kuv
│ ├── columns: k:1(int!null) u:2(float) v:3(string)
│ ├── key: (1)
│ ├── fd: (1)-->(2,3)
│ ├── prune: (1-3)
│ └── interesting orderings: (+1)
└── filters
└── like [type=bool, outer=(3), constraints=(/3: [/'ABC' - /'ABD'); tight)]
├── variable: v:3 [type=string]
└── const: e'\\ABC%' [type=string]

# Like doesn't support RE syntax.
opt
SELECT * FROM kuv WHERE v LIKE 'ABC.*'
----
select
├── columns: k:1(int!null) u:2(float) v:3(string!null)
├── key: (1)
├── fd: ()-->(3), (1)-->(2)
├── prune: (1,2)
├── interesting orderings: (+1 opt(3))
├── scan kuv
│ ├── columns: k:1(int!null) u:2(float) v:3(string)
│ ├── key: (1)
│ ├── fd: (1)-->(2,3)
│ ├── prune: (1-3)
│ └── interesting orderings: (+1)
└── filters
└── like [type=bool, outer=(3), constraints=(/3: [/'ABC.*' - /'ABC.*']; tight), fd=()-->(3)]
├── variable: v:3 [type=string]
└── const: 'ABC.*' [type=string]

opt
SELECT * FROM kuv WHERE v LIKE 'ABC_'
----
Expand Down
25 changes: 20 additions & 5 deletions pkg/sql/sem/tree/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -4928,6 +4928,13 @@ type likeKey struct {
escape rune
}

// LikeEscape converts a like pattern to a regexp pattern.
func LikeEscape(pattern string) (string, error) {
key := likeKey{s: pattern, caseInsensitive: false, escape: '\\'}
re, err := key.patternNoAnchor()
return re, err
}

// unescapePattern unescapes a pattern for a given escape token.
// It handles escaped escape tokens properly by maintaining them as the escape
// token in the return string.
Expand Down Expand Up @@ -5287,11 +5294,7 @@ func calculateLengthAfterReplacingCustomEscape(s string, escape rune) (bool, int
return changed, retLen, nil
}

// Pattern implements the RegexpCacheKey interface.
// The strategy for handling custom escape character
// is to convert all unescaped escape character into '\'.
// k.escape can either be empty or a single character.
func (k likeKey) Pattern() (string, error) {
func (k likeKey) patternNoAnchor() (string, error) {
// QuoteMeta escapes all regexp metacharacters (`\.+*?()|[]{}^$`) with a `\`.
pattern := regexp.QuoteMeta(k.s)
var err error
Expand Down Expand Up @@ -5368,6 +5371,18 @@ func (k likeKey) Pattern() (string, error) {
}
}

return pattern, nil
}

// Pattern implements the RegexpCacheKey interface.
// The strategy for handling custom escape character
// is to convert all unescaped escape character into '\'.
// k.escape can either be empty or a single character.
func (k likeKey) Pattern() (string, error) {
pattern, err := k.patternNoAnchor()
if err != nil {
return "", err
}
return anchorPattern(pattern, k.caseInsensitive), nil
}

Expand Down

0 comments on commit 53c860b

Please sign in to comment.