-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sql: inv idx accelerate tsvector@@tsquery queries
This commit adds inverted index acceleration for expressions that evaluate a tsquery against a tsvector using the `@@` operator. Release note (sql change): it's now possible to run efficient tsvector @@ tsquery searches when there is an inverted index on the tsvector column being searched.
- Loading branch information
1 parent
90de576
commit 50871a3
Showing
11 changed files
with
325 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# LogicTest: local | ||
|
||
statement ok | ||
CREATE TABLE a ( | ||
a INT PRIMARY KEY, | ||
b TSVECTOR, | ||
c TSQUERY, | ||
FAMILY (a,b,c), | ||
INVERTED INDEX(b) | ||
) | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 1 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 1 span | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'Foo' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 1 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 1 span | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo' OR b @@ 'bar' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 2 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 2 spans | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo | bar' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 2 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 2 spans | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo | bar' OR b @@ 'baz' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 3 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 3 spans | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo & bar' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 2 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 2 spans | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo <-> bar' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 2 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 2 spans | ||
|
||
query T | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ 'foo & !bar' | ||
---- | ||
distribution: local | ||
vectorized: true | ||
· | ||
• index join | ||
│ table: a@a_pkey | ||
│ | ||
└── • inverted filter | ||
│ inverted column: b_inverted_key | ||
│ num spans: 1 | ||
│ | ||
└── • scan | ||
missing stats | ||
table: a@a_b_idx | ||
spans: 1 span | ||
|
||
# Test that tsvector indexes can't accelerate the @@ operator with no constant | ||
# columns. | ||
statement error index \"a_b_idx\" is inverted and cannot be used for this query | ||
EXPLAIN SELECT * FROM a@a_b_idx WHERE b @@ c |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package invertedidx | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/sql/inverted" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree" | ||
"github.com/cockroachdb/cockroach/pkg/sql/types" | ||
"github.com/cockroachdb/errors" | ||
) | ||
|
||
type tsqueryFilterPlanner struct { | ||
tabID opt.TableID | ||
index cat.Index | ||
computedColumns map[opt.ColumnID]opt.ScalarExpr | ||
} | ||
|
||
var _ invertedFilterPlanner = &tsqueryFilterPlanner{} | ||
|
||
// extractInvertedFilterConditionFromLeaf implements the invertedFilterPlanner | ||
// interface. | ||
func (t *tsqueryFilterPlanner) extractInvertedFilterConditionFromLeaf( | ||
_ context.Context, _ *eval.Context, expr opt.ScalarExpr, | ||
) ( | ||
invertedExpr inverted.Expression, | ||
remainingFilters opt.ScalarExpr, | ||
_ *invertedexpr.PreFiltererStateForInvertedFilterer, | ||
) { | ||
var constantVal opt.ScalarExpr | ||
var left, right opt.ScalarExpr | ||
switch e := expr.(type) { | ||
case *memo.TSMatchesExpr: | ||
left, right = e.Left, e.Right | ||
default: | ||
// Only the above types are supported. | ||
return inverted.NonInvertedColExpression{}, expr, nil | ||
} | ||
if isIndexColumn(t.tabID, t.index, left, t.computedColumns) && memo.CanExtractConstDatum(right) { | ||
constantVal = right | ||
} else if isIndexColumn(t.tabID, t.index, right, t.computedColumns) && memo.CanExtractConstDatum(left) { | ||
constantVal = left | ||
} else { | ||
// Can only accelerate with a single constant value. | ||
return inverted.NonInvertedColExpression{}, expr, nil | ||
} | ||
d := memo.ExtractConstDatum(constantVal) | ||
if d.ResolvedType() != types.TSQuery { | ||
panic(errors.AssertionFailedf( | ||
"trying to apply tsvector inverted index to unsupported type %s", d.ResolvedType(), | ||
)) | ||
} | ||
q := d.(*tree.DTSQuery).TSQuery | ||
var err error | ||
invertedExpr, err = q.GetInvertedExpr() | ||
if err != nil { | ||
// An inverted expression could not be extracted. | ||
return inverted.NonInvertedColExpression{}, expr, nil | ||
} | ||
|
||
// If the extracted inverted expression is not tight then remaining filters | ||
// must be applied after the inverted index scan. | ||
if !invertedExpr.IsTight() { | ||
remainingFilters = expr | ||
} | ||
|
||
// We do not currently support pre-filtering for text search indexes, so | ||
// the returned pre-filter state is nil. | ||
return invertedExpr, remainingFilters, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.