Skip to content

Commit

Permalink
invertedidx: add test for trigram span creation
Browse files Browse the repository at this point in the history
Release note: None
  • Loading branch information
jordanlewis committed Jun 4, 2022
1 parent 9a18193 commit 0bc7511
Showing 1 changed file with 129 additions and 0 deletions.
129 changes: 129 additions & 0 deletions pkg/sql/opt/invertedidx/trigram_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package invertedidx_test

import (
"testing"

"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx"
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils"
"github.com/cockroachdb/cockroach/pkg/sql/opt/testutils/testcat"
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/stretchr/testify/require"
)

func TestTryFilterTrigram(t *testing.T) {
semaCtx := tree.MakeSemaContext()
st := cluster.MakeTestingClusterSettings()
evalCtx := eval.NewTestingEvalContext(st)

tc := testcat.New()
if _, err := tc.ExecuteDDL(
"CREATE TABLE t (s STRING, INVERTED INDEX (s gin_trgm_ops))",
); err != nil {
t.Fatal(err)
}
var f norm.Factory
f.Init(evalCtx, tc)
md := f.Metadata()
tn := tree.NewUnqualifiedTableName("t")
tab := md.AddTable(tc.Table(tn), tn)
trigramOrd := 1

// If we can create an inverted filter with the given filter expression and
// index, ok=true. If the spans in the resulting inverted index constraint
// do not have duplicate primary keys, unique=true. If the spans are tight,
// tight=true and remainingFilters="". Otherwise, tight is false and
// remainingFilters contains some or all of the original filters.
testCases := []struct {
filters string
ok bool
unique bool
}{
// Test LIKE with percents on both sides
// TODO(jordan): we could make expressions with just a single trigram
// tight, because we would know for sure that we wouldn't need to recheck
// the condition once the row is returned. But, it's probably not that
// important of an optimization.
{filters: "s LIKE '%foo%'", ok: true, unique: true},
{filters: "s LIKE '%blorp%'", ok: true, unique: false},
{filters: "s LIKE 'foo%'", ok: true, unique: true},
{filters: "s LIKE 'blorp%'", ok: true, unique: false},
{filters: "s ILIKE '%foo%'", ok: true, unique: true},
{filters: "s ILIKE '%blorp%'", ok: true, unique: false},
// Queries that are too short to have any trigrams do not produce filters.
{filters: "s LIKE '%fo%'", ok: false},
{filters: "s ILIKE '%fo%'", ok: false},
{filters: "s LIKE '%fo%ab%ru%'", ok: false},

// AND and OR for two LIKE queries behave as expected.
{filters: "s LIKE '%lkj%' AND s LIKE '%bla%'", ok: true, unique: true},
{filters: "s LIKE '%lkj%' OR s LIKE '%bla%'", ok: true, unique: false},

// Similarity queries.
{filters: "s % 'lkjsdlkj'", ok: true, unique: false},
{filters: "s % 'lkj'", ok: true, unique: true},
// Can't generate trigrams from such a short constant.
{filters: "s % 'lj'", ok: false},

// AND and OR for two similarity queries behave as expected.
{filters: "s % 'lkj' AND s % 'bla'", ok: true, unique: true},
{filters: "s % 'lkj' OR s % 'bla'", ok: true, unique: false},

// Can combine similarity and LIKE queries and still get inverted
// expressions.
{filters: "s % 'lkj' AND s LIKE 'blort'", ok: true, unique: false},
{filters: "s % 'lkj' OR s LIKE 'blort'", ok: true, unique: false},

// Equality queries.
{filters: "s = 'lkjsdlkj'", ok: true, unique: false},
{filters: "s = 'lkj'", ok: true, unique: true},
{filters: "s = 'lkj' OR s LIKE 'blah'", ok: true, unique: false},
}

for _, tc := range testCases {
t.Logf("test case: %v", tc)
filters := testutils.BuildFilters(t, &f, &semaCtx, evalCtx, tc.filters)

// We're not testing that the correct SpanExpression is returned here;
// that is tested elsewhere. This is just testing that we are constraining
// the index when we expect to and we have the correct values for tight,
// unique, and remainingFilters.
spanExpr, _, remainingFilters, _, ok := invertedidx.TryFilterInvertedIndex(
evalCtx,
&f,
filters,
nil, /* optionalFilters */
tab,
md.Table(tab).Index(trigramOrd),
nil, /* computedColumns */
)
if tc.ok != ok {
t.Fatalf("expected %v, got %v", tc.ok, ok)
}
if !ok {
continue
}

if spanExpr.Tight {
t.Fatalf("We never expected our inverted expression to be tight")
}
if tc.unique != spanExpr.Unique {
t.Fatalf("For (%s), expected unique=%v, but got %v", tc.filters, tc.unique, spanExpr.Unique)
}

require.Equal(t, filters.String(), remainingFilters.String(),
"mismatched remaining filters")
}
}

0 comments on commit 0bc7511

Please sign in to comment.