Skip to content

Commit

Permalink
opt: accelerate % with text inv indexes
Browse files Browse the repository at this point in the history
Teach the inverted filter planner to support the text % text operator on
inverted text indexes.

Release note: None
  • Loading branch information
jordanlewis committed May 6, 2022
1 parent f0e3619 commit a9404c6
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 5 deletions.
25 changes: 25 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/trigram_indexes
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,28 @@ SELECT * FROM a@a_t_idx WHERE t LIKE 'blahf'
query IT
SELECT * FROM a@a_t_idx WHERE t LIKE 'fblah'
----

# Test the acceleration of the % similarity operator.
# By default, the threshold for searching is .3.
query FIT
SELECT similarity(t, 'blar'), * FROM a@a_t_idx WHERE t % 'blar'
----
0.428571432828903 3 blah

query FIT
SELECT similarity(t, 'byar'), * FROM a@a_t_idx WHERE t % 'byar'
----

query FIT
SELECT similarity(t, 'fooz'), * FROM a@a_t_idx WHERE t % 'fooz' ORDER BY a
----
0.400000005960464 1 foozoopa
0.5 2 Foo

statement ok
SET pg_trgm.similarity_threshold=.45

query FIT
SELECT similarity(t, 'fooz'), * FROM a@a_t_idx WHERE t % 'fooz'
----
0.5 2 Foo
111 changes: 111 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/trigram_index
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,114 @@ vectorized: true
missing stats
table: a@a_pkey
spans: FULL SCAN

# Test that trigram indexes can't support searches with no constant args.
# columns.
query T
EXPLAIN SELECT * FROM a WHERE b LIKE b
----
distribution: local
vectorized: true
·
• filter
│ filter: b LIKE b
└── • scan
missing stats
table: a@a_pkey
spans: FULL SCAN

# Test that trigram indexes accelerate the % operator.
query T
EXPLAIN SELECT * FROM a WHERE b % 'foo'
----
distribution: local
vectorized: true
·
• filter
│ filter: b % 'foo'
└── • index join
│ table: a@a_pkey
└── • scan
missing stats
table: a@a_b_idx
spans: 1 span

# Test that trigram indexes accelerate the % operator with an OR if the
# constant has more than one trigram.
query T
EXPLAIN SELECT * FROM a WHERE b % 'foob'
----
distribution: local
vectorized: true
·
• filter
│ filter: b % 'foob'
└── • index join
│ table: a@a_pkey
└── • inverted filter
│ inverted column: b_inverted_key
│ num spans: 2
└── • scan
missing stats
table: a@a_b_idx
spans: 2 spans

# Test that trigram indexes can't accelerate the % operator if there are fewer
# than 3 characters in the constant.
query T
EXPLAIN SELECT * FROM a WHERE b % 'fo'
----
distribution: local
vectorized: true
·
• filter
│ filter: b % 'fo'
└── • scan
missing stats
table: a@a_pkey
spans: FULL SCAN

# Test that trigram indexes can accelerate the % operator in reverse order.
query T
EXPLAIN SELECT * FROM a WHERE 'blah' % b
----
distribution: local
vectorized: true
·
• filter
│ filter: 'blah' % b
└── • index join
│ table: a@a_pkey
└── • inverted filter
│ inverted column: b_inverted_key
│ num spans: 2
└── • scan
missing stats
table: a@a_b_idx
spans: 2 spans

# Test that trigram indexes can't accelerate the % operator with no constant
# columns.
query T
EXPLAIN SELECT * FROM a WHERE b % b
----
distribution: local
vectorized: true
·
• filter
│ filter: b % b
└── • scan
missing stats
table: a@a_pkey
spans: FULL SCAN
18 changes: 17 additions & 1 deletion pkg/sql/opt/invertedidx/trigram.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,26 @@ func (t *trigramFilterPlanner) extractInvertedFilterConditionFromLeaf(
) {
var constantVal opt.ScalarExpr
var left, right opt.ScalarExpr
var allTrigramsMustMatch bool
switch e := expr.(type) {
// Both ILIKE and LIKE are supported because the index entries are always
// downcased. We re-check the condition no matter what later.
case *memo.ILikeExpr:
left, right = e.Left, e.Right
// If we're doing a LIKE (or ILIKE) expression, we need to construct an AND out of all
// of the spans: we need to find results that match every single one of the
// trigrams in the constant datum.
allTrigramsMustMatch = true
case *memo.LikeExpr:
left, right = e.Left, e.Right
allTrigramsMustMatch = true
case *memo.ModExpr:
// If we're doing a % expression (similarity threshold), we need to construct
// an OR out of the spans: we need to find results that match any of the
// trigrams in the constant datum, and we'll filter the results further
// afterwards.
left, right = e.Left, e.Right
allTrigramsMustMatch = false
default:
// Only the above types are supported.
return inverted.NonInvertedColExpression{}, expr, nil
Expand All @@ -53,6 +66,9 @@ func (t *trigramFilterPlanner) extractInvertedFilterConditionFromLeaf(
constantVal = right
} else if isIndexColumn(t.tabID, t.index, right, t.computedColumns) && memo.CanExtractConstDatum(left) {
constantVal = left
} else {
// Can only accelerate with a single constant value.
return inverted.NonInvertedColExpression{}, expr, nil
}
d := memo.ExtractConstDatum(constantVal)
if d.ResolvedType() != types.String {
Expand All @@ -61,7 +77,7 @@ func (t *trigramFilterPlanner) extractInvertedFilterConditionFromLeaf(
))
}
var err error
invertedExpr, err = rowenc.EncodeLikeTrigramSpans(d.(*tree.DString))
invertedExpr, err = rowenc.EncodeTrigramSpans(d.(*tree.DString), allTrigramsMustMatch)
if err != nil {
// An inverted expression could not be extracted.
return inverted.NonInvertedColExpression{}, expr, nil
Expand Down
14 changes: 10 additions & 4 deletions pkg/sql/rowenc/index_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -840,9 +840,11 @@ func encodeOverlapsArrayInvertedIndexSpans(
return invertedExpr, nil
}

// EncodeLikeTrigramSpans returns the spans that must be scanned to look up all
// trigrams present in the input string.
func EncodeLikeTrigramSpans(val *tree.DString) (inverted.Expression, error) {
// EncodeTrigramSpans returns the spans that must be scanned to look up trigrams
// present in the input string. If allMustMatch is true, the resultant inverted
// expression must match every trigram in the input. Otherwise, it will match
// any trigram in the input.
func EncodeTrigramSpans(val *tree.DString, allMustMatch bool) (inverted.Expression, error) {
chunks := strings.Split(string(*val), "%")
// Each chunk will have at minimum 2 trigrams, so start at that default size.
keys := make([][]byte, 0, len(chunks)*2)
Expand Down Expand Up @@ -876,7 +878,11 @@ func EncodeLikeTrigramSpans(val *tree.DString) (inverted.Expression, error) {
if ret == nil {
ret = spanExpr
} else {
ret = inverted.And(ret, spanExpr)
if allMustMatch {
ret = inverted.And(ret, spanExpr)
} else {
ret = inverted.Or(ret, spanExpr)
}
}
}
// The result is never tight. We always need to re-check the condition once
Expand Down

0 comments on commit a9404c6

Please sign in to comment.