-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
invertedidx: add test for trigram span creation
Release note: None
- Loading branch information
1 parent
9a18193
commit 0bc7511
Showing
1 changed file
with
129 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
// Copyright 2022 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package invertedidx_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/cockroachdb/cockroach/pkg/settings/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils" | ||
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval" | ||
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestTryFilterTrigram(t *testing.T) { | ||
semaCtx := tree.MakeSemaContext() | ||
st := cluster.MakeTestingClusterSettings() | ||
evalCtx := eval.NewTestingEvalContext(st) | ||
|
||
tc := testcat.New() | ||
if _, err := tc.ExecuteDDL( | ||
"CREATE TABLE t (s STRING, INVERTED INDEX (s gin_trgm_ops))", | ||
); err != nil { | ||
t.Fatal(err) | ||
} | ||
var f norm.Factory | ||
f.Init(evalCtx, tc) | ||
md := f.Metadata() | ||
tn := tree.NewUnqualifiedTableName("t") | ||
tab := md.AddTable(tc.Table(tn), tn) | ||
trigramOrd := 1 | ||
|
||
// If we can create an inverted filter with the given filter expression and | ||
// index, ok=true. If the spans in the resulting inverted index constraint | ||
// do not have duplicate primary keys, unique=true. If the spans are tight, | ||
// tight=true and remainingFilters="". Otherwise, tight is false and | ||
// remainingFilters contains some or all of the original filters. | ||
testCases := []struct { | ||
filters string | ||
ok bool | ||
unique bool | ||
}{ | ||
// Test LIKE with percents on both sides | ||
// TODO(jordan): we could make expressions with just a single trigram | ||
// tight, because we would know for sure that we wouldn't need to recheck | ||
// the condition once the row is returned. But, it's probably not that | ||
// important of an optimization. | ||
{filters: "s LIKE '%foo%'", ok: true, unique: true}, | ||
{filters: "s LIKE '%blorp%'", ok: true, unique: false}, | ||
{filters: "s LIKE 'foo%'", ok: true, unique: true}, | ||
{filters: "s LIKE 'blorp%'", ok: true, unique: false}, | ||
{filters: "s ILIKE '%foo%'", ok: true, unique: true}, | ||
{filters: "s ILIKE '%blorp%'", ok: true, unique: false}, | ||
// Queries that are too short to have any trigrams do not produce filters. | ||
{filters: "s LIKE '%fo%'", ok: false}, | ||
{filters: "s ILIKE '%fo%'", ok: false}, | ||
{filters: "s LIKE '%fo%ab%ru%'", ok: false}, | ||
|
||
// AND and OR for two LIKE queries behave as expected. | ||
{filters: "s LIKE '%lkj%' AND s LIKE '%bla%'", ok: true, unique: true}, | ||
{filters: "s LIKE '%lkj%' OR s LIKE '%bla%'", ok: true, unique: false}, | ||
|
||
// Similarity queries. | ||
{filters: "s % 'lkjsdlkj'", ok: true, unique: false}, | ||
{filters: "s % 'lkj'", ok: true, unique: true}, | ||
// Can't generate trigrams from such a short constant. | ||
{filters: "s % 'lj'", ok: false}, | ||
|
||
// AND and OR for two similarity queries behave as expected. | ||
{filters: "s % 'lkj' AND s % 'bla'", ok: true, unique: true}, | ||
{filters: "s % 'lkj' OR s % 'bla'", ok: true, unique: false}, | ||
|
||
// Can combine similarity and LIKE queries and still get inverted | ||
// expressions. | ||
{filters: "s % 'lkj' AND s LIKE 'blort'", ok: true, unique: false}, | ||
{filters: "s % 'lkj' OR s LIKE 'blort'", ok: true, unique: false}, | ||
|
||
// Equality queries. | ||
{filters: "s = 'lkjsdlkj'", ok: true, unique: false}, | ||
{filters: "s = 'lkj'", ok: true, unique: true}, | ||
{filters: "s = 'lkj' OR s LIKE 'blah'", ok: true, unique: false}, | ||
} | ||
|
||
for _, tc := range testCases { | ||
t.Logf("test case: %v", tc) | ||
filters := testutils.BuildFilters(t, &f, &semaCtx, evalCtx, tc.filters) | ||
|
||
// We're not testing that the correct SpanExpression is returned here; | ||
// that is tested elsewhere. This is just testing that we are constraining | ||
// the index when we expect to and we have the correct values for tight, | ||
// unique, and remainingFilters. | ||
spanExpr, _, remainingFilters, _, ok := invertedidx.TryFilterInvertedIndex( | ||
evalCtx, | ||
&f, | ||
filters, | ||
nil, /* optionalFilters */ | ||
tab, | ||
md.Table(tab).Index(trigramOrd), | ||
nil, /* computedColumns */ | ||
) | ||
if tc.ok != ok { | ||
t.Fatalf("expected %v, got %v", tc.ok, ok) | ||
} | ||
if !ok { | ||
continue | ||
} | ||
|
||
if spanExpr.Tight { | ||
t.Fatalf("We never expected our inverted expression to be tight") | ||
} | ||
if tc.unique != spanExpr.Unique { | ||
t.Fatalf("For (%s), expected unique=%v, but got %v", tc.filters, tc.unique, spanExpr.Unique) | ||
} | ||
|
||
require.Equal(t, filters.String(), remainingFilters.String(), | ||
"mismatched remaining filters") | ||
} | ||
} |