From b028691dd4a6ab6eb5f5bef5f1c38dd90f1edf4f Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Sat, 25 Mar 2023 15:32:56 -0700 Subject: [PATCH 1/2] tree: apply functions to TEXT expressions compared with the @@ operator The TSQuery and TSVector "matches" operator "@@" returns different results on CRDB vs. Postgres when one of the arguments is a TEXT expression. CRDB always applies a CAST, and only for constants. The rules at https://www.postgresql.org/docs/current/textsearch-intro.html#TEXTSEARCH-MATCHING specify: > The form text @@ tsquery is equivalent to to_tsvector(x) @@ y. > The form text @@ text is equivalent to to_tsvector(x) @@ plainto_tsquery(y). This PR adds these implicit function calls in these "matches" comparison expressions during type checking as well as a cast of TEXT to TSQuery when the other argument is a TSVector, which allows variable expressions to be handled. Fixes #98875 Fixes #98804 Release note (bug fix): This allows the text search @@ ("matches") operator to work with variable expressions and fixes incorrect results when one of the arguments is a TEXT expression and the other argument is a TEXT or TSQuery expression. --- .../logictest/testdata/logic_test/tsvector | 64 ++++++++++++++ pkg/sql/sem/tree/overload.go | 2 +- pkg/sql/sem/tree/type_check.go | 85 +++++++++++++++++-- pkg/sql/sem/tree/type_check_test.go | 7 ++ 4 files changed, 148 insertions(+), 10 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/tsvector b/pkg/sql/logictest/testdata/logic_test/tsvector index 6e4bbff5b1b2..078ba3c51238 100644 --- a/pkg/sql/logictest/testdata/logic_test/tsvector +++ b/pkg/sql/logictest/testdata/logic_test/tsvector @@ -360,3 +360,67 @@ LIMIT 2 ---- 0 + +subtest 98804_regression_test + +statement ok +RESET default_text_search_config + +statement ok +CREATE TABLE ab (a TEXT, b TEXT) + +statement ok +INSERT INTO ab VALUES('fat rats', 'fat cats chased fat, out of shape rats'); + +query B +SELECT a @@ b FROM ab +---- +false + +query B +SELECT b @@ a FROM ab +---- +true + +query B +SELECT 'fat rats' @@ b FROM ab +---- +false + +query B +SELECT b @@ 'fat rats' FROM ab +---- +true + +query B +SELECT a @@ 'fat cats ate fat bats' FROM ab +---- +false + +query B +SELECT 'fat cats ate fat bats' @@ a FROM ab +---- +false + +statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +SELECT b @@ a::tsvector FROM ab + +statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +SELECT a::tsvector @@ b FROM ab + +query B +SELECT 'fat bat cat' @@ 'bats fats' +---- +true + +query B +SELECT 'bats fats' @@ 'fat bat cat' +---- +false + +statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +SELECT 'fat cats chased fat, out of shape rats' @@ 'fat rats'::tsvector + +statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +SELECT 'fat rats'::tsvector @@ 'fat cats chased fat, out of shape rats' + diff --git a/pkg/sql/sem/tree/overload.go b/pkg/sql/sem/tree/overload.go index 8cc2be0648b5..7a0be0d6ead7 100644 --- a/pkg/sql/sem/tree/overload.go +++ b/pkg/sql/sem/tree/overload.go @@ -997,7 +997,7 @@ func (s *overloadTypeChecker) typeCheckOverloadedExprs( return err } - // The fourth heuristic is to prefer candidates that accepts the "best" + // The fourth heuristic is to prefer candidates that accept the "best" // mutual type in the resolvable type set of all constants. if bestConstType, ok := commonConstantType(s.exprs, s.constIdxs); ok { // In case all overloads are filtered out at this step, diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index fa71894b2fbd..b51ddac77b86 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -2204,6 +2204,38 @@ func typeCheckComparisonOp( _, leftIsTuple := foldedLeft.(*Tuple) _, rightIsTuple := foldedRight.(*Tuple) _, rightIsSubquery := foldedRight.(SubqueryExpr) + var tsMatchesWithText bool + var typedLeft, typedRight TypedExpr + var leftFamily, rightFamily types.Family + var err error + + // Do an initial check for TEXT @@ XXX special cases which might need to + // inject a to_tsvector or plainto_tsquery function call. + if op.Symbol == treecmp.TSMatches { + if switched { + // The order of operators matters as to which function call to apply. + foldedLeft, foldedRight = foldedRight, foldedLeft + switched = false + } + disallowSwitch = true + typedLeft, err = foldedLeft.TypeCheck(ctx, semaCtx, types.Any) + if err != nil { + sigWithErr := fmt.Sprintf(compExprsFmt, left, op, right, err) + return nil, nil, nil, false, + pgerror.Newf(pgcode.InvalidParameterValue, unsupportedCompErrFmt, sigWithErr) + } + typedRight, err = foldedRight.TypeCheck(ctx, semaCtx, types.Any) + if err != nil { + sigWithErr := fmt.Sprintf(compExprsFmt, left, op, right, err) + return nil, nil, nil, false, + pgerror.Newf(pgcode.InvalidParameterValue, unsupportedCompErrFmt, sigWithErr) + } + leftFamily = typedLeft.ResolvedType().Family() + rightFamily = typedRight.ResolvedType().Family() + if leftFamily == types.StringFamily || rightFamily == types.StringFamily { + tsMatchesWithText = true + } + } handleTupleTypeMismatch := false switch { @@ -2227,7 +2259,7 @@ func typeCheckComparisonOp( pgerror.Newf(pgcode.InvalidParameterValue, unsupportedCompErrFmt, sig) } - typedLeft := typedSubExprs[0] + typedLeft = typedSubExprs[0] typedSubExprs = typedSubExprs[1:] rightTuple.typ = types.MakeTuple(make([]*types.T, len(typedSubExprs))) @@ -2241,7 +2273,7 @@ func typeCheckComparisonOp( return typedLeft, rightTuple, fn, false, nil case foldedOp.Symbol == treecmp.In && rightIsSubquery: - typedLeft, err := foldedLeft.TypeCheck(ctx, semaCtx, types.Any) + typedLeft, err = foldedLeft.TypeCheck(ctx, semaCtx, types.Any) if err != nil { sigWithErr := fmt.Sprintf(compExprsFmt, left, op, right, err) return nil, nil, nil, false, @@ -2257,14 +2289,14 @@ func typeCheckComparisonOp( } desired := types.MakeTuple([]*types.T{typ}) - typedRight, err := foldedRight.TypeCheck(ctx, semaCtx, desired) + typedRight, err = foldedRight.TypeCheck(ctx, semaCtx, desired) if err != nil { sigWithErr := fmt.Sprintf(compExprsFmt, left, op, right, err) return nil, nil, nil, false, pgerror.Newf(pgcode.InvalidParameterValue, unsupportedCompErrFmt, sigWithErr) } - if err := typeCheckSubqueryWithIn( + if err = typeCheckSubqueryWithIn( typedLeft.ResolvedType(), typedRight.ResolvedType(), ); err != nil { return nil, nil, nil, false, err @@ -2279,16 +2311,17 @@ func typeCheckComparisonOp( pgerror.Newf(pgcode.InvalidParameterValue, unsupportedCompErrFmt, sig) } // Using non-folded left and right to avoid having to swap later. - typedLeft, typedRight, err := typeCheckTupleComparison(ctx, semaCtx, op, left.(*Tuple), right.(*Tuple)) + typedLeft, typedRight, err = typeCheckTupleComparison(ctx, semaCtx, op, left.(*Tuple), right.(*Tuple)) if err != nil { return nil, nil, nil, false, err } return typedLeft, typedRight, fn, false, nil case leftIsTuple || rightIsTuple: + var errLeft, errRight error // Tuple must compare with a tuple type, as handled above. - typedLeft, errLeft := foldedLeft.TypeCheck(ctx, semaCtx, types.Any) - typedRight, errRight := foldedRight.TypeCheck(ctx, semaCtx, types.Any) + typedLeft, errLeft = foldedLeft.TypeCheck(ctx, semaCtx, types.Any) + typedRight, errRight = foldedRight.TypeCheck(ctx, semaCtx, types.Any) if errLeft == nil && errRight == nil && ((typedLeft.ResolvedType().Family() == types.TupleFamily && typedRight.ResolvedType().Family() != types.TupleFamily) || @@ -2296,6 +2329,40 @@ func typeCheckComparisonOp( typedLeft.ResolvedType().Family() != types.TupleFamily)) { handleTupleTypeMismatch = true } + case tsMatchesWithText: + // Apply rules from: + // https://www.postgresql.org/docs/current/textsearch-intro.html#TEXTSEARCH-MATCHING + // Perform the following type conversions: + // initial | result + // ------------------------------------------------------------------------- + // a::TEXT @@ b::TEXT | to_tsvector(a) @@ plainto_tsquery(b) + // a::TEXT @@ b::TSQUERY | to_tsvector(a) @@ b + // a::TSQUERY @@ b::TEXT | a @@ to_tsvector(b) + // a::TSVECTOR @@ b::TEXT | a @@ b::TSQUERY + // a::TEXT @@ b::TSVECTOR | a::TSQUERY @@ b + if leftFamily == types.StringFamily { + if rightFamily == types.StringFamily || rightFamily == types.TSQueryFamily { + leftExprs := make(Exprs, 1) + leftExprs[0] = typedLeft + foldedLeft = &FuncExpr{Func: WrapFunction("to_tsvector"), Exprs: leftExprs, AggType: GeneralAgg} + } else if rightFamily == types.TSVectorFamily { + foldedLeft = &CastExpr{Expr: typedLeft, Type: types.TSQuery, SyntaxMode: CastShort} + } + } + + funcName := "plainto_tsquery" + if rightFamily == types.StringFamily { + if leftFamily == types.StringFamily || leftFamily == types.TSQueryFamily { + if leftFamily == types.TSQueryFamily { + funcName = "to_tsvector" + } + rightExprs := make(Exprs, 1) + rightExprs[0] = typedRight + foldedRight = &FuncExpr{Func: WrapFunction(funcName), Exprs: rightExprs, AggType: GeneralAgg} + } else if leftFamily == types.TSVectorFamily { + foldedRight = &CastExpr{Expr: typedRight, Type: types.TSQuery, SyntaxMode: CastShort} + } + } } // For comparisons, we do not stimulate the typing of untyped NULL with the @@ -2356,8 +2423,8 @@ func typeCheckComparisonOp( } leftReturn := leftExpr.ResolvedType() rightReturn := rightExpr.ResolvedType() - leftFamily := leftReturn.Family() - rightFamily := rightReturn.Family() + leftFamily = leftReturn.Family() + rightFamily = rightReturn.Family() // Return early if at least one overload is possible, NULL is an argument, // and none of the overloads accept NULL. diff --git a/pkg/sql/sem/tree/type_check_test.go b/pkg/sql/sem/tree/type_check_test.go index ea9910e1db10..6b7ce62d4c54 100644 --- a/pkg/sql/sem/tree/type_check_test.go +++ b/pkg/sql/sem/tree/type_check_test.go @@ -213,6 +213,13 @@ func TestTypeCheck(t *testing.T) { // String preference. {`st_geomfromgeojson($1)`, `st_geomfromgeojson($1:::STRING):::GEOMETRY`}, + + // TSQuery and TSVector + {`'a' @@ 'b'`, `to_tsvector('a':::STRING) @@ plainto_tsquery('b':::STRING)`}, + {`'a' @@ 'b':::TSQUERY`, `to_tsvector('a':::STRING) @@ '''b''':::TSQUERY`}, + {`'a':::TSQUERY @@ 'b'`, `'''a''':::TSQUERY @@ to_tsvector('b':::STRING)`}, + {`'a' @@ 'b':::TSVECTOR`, `'a':::STRING::TSQUERY @@ '''b''':::TSVECTOR`}, + {`'a':::TSVECTOR @@ 'b'`, `'''a''':::TSVECTOR @@ 'b':::STRING::TSQUERY`}, } ctx := context.Background() for _, d := range testData { From d2e3c38a6518a5339917088498016f87e3e50ab4 Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 5 Apr 2023 10:56:06 -0400 Subject: [PATCH 2/2] sql: remove unnecessary casts in `@@` operator type-checking Release note: None --- .../logictest/testdata/logic_test/tsvector | 21 ++++++++++++++----- pkg/sql/sem/tree/type_check.go | 4 ---- pkg/sql/sem/tree/type_check_test.go | 4 ++-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/tsvector b/pkg/sql/logictest/testdata/logic_test/tsvector index 078ba3c51238..f910ec6bcf33 100644 --- a/pkg/sql/logictest/testdata/logic_test/tsvector +++ b/pkg/sql/logictest/testdata/logic_test/tsvector @@ -402,10 +402,10 @@ SELECT 'fat cats ate fat bats' @@ a FROM ab ---- false -statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +statement error pgcode 22023 unsupported comparison operator: @@ SELECT b @@ a::tsvector FROM ab -statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +statement error pgcode 22023 unsupported comparison operator: @@ SELECT a::tsvector @@ b FROM ab query B @@ -418,9 +418,20 @@ SELECT 'bats fats' @@ 'fat bat cat' ---- false -statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats +query B +SELECT 'fat' @@ 'fat rats'::tsvector +---- +true + +# TODO(#75101): The error should be "syntax error in TSQuery". +statement error pgcode 22023 unsupported comparison operator: @@ SELECT 'fat cats chased fat, out of shape rats' @@ 'fat rats'::tsvector -statement error pq: syntax error in TSQuery: fat cats chased fat, out of shape rats -SELECT 'fat rats'::tsvector @@ 'fat cats chased fat, out of shape rats' +query B +SELECT 'fat rats'::tsvector @@ 'fat' +---- +true +# TODO(#75101): The error should be "syntax error in TSQuery". +statement error pgcode 22023 unsupported comparison operator: @@ +SELECT 'fat rats'::tsvector @@ 'fat cats chased fat, out of shape rats' diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index b51ddac77b86..85f4482a8051 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -2345,8 +2345,6 @@ func typeCheckComparisonOp( leftExprs := make(Exprs, 1) leftExprs[0] = typedLeft foldedLeft = &FuncExpr{Func: WrapFunction("to_tsvector"), Exprs: leftExprs, AggType: GeneralAgg} - } else if rightFamily == types.TSVectorFamily { - foldedLeft = &CastExpr{Expr: typedLeft, Type: types.TSQuery, SyntaxMode: CastShort} } } @@ -2359,8 +2357,6 @@ func typeCheckComparisonOp( rightExprs := make(Exprs, 1) rightExprs[0] = typedRight foldedRight = &FuncExpr{Func: WrapFunction(funcName), Exprs: rightExprs, AggType: GeneralAgg} - } else if leftFamily == types.TSVectorFamily { - foldedRight = &CastExpr{Expr: typedRight, Type: types.TSQuery, SyntaxMode: CastShort} } } } diff --git a/pkg/sql/sem/tree/type_check_test.go b/pkg/sql/sem/tree/type_check_test.go index 6b7ce62d4c54..58a71e41ffdb 100644 --- a/pkg/sql/sem/tree/type_check_test.go +++ b/pkg/sql/sem/tree/type_check_test.go @@ -218,8 +218,8 @@ func TestTypeCheck(t *testing.T) { {`'a' @@ 'b'`, `to_tsvector('a':::STRING) @@ plainto_tsquery('b':::STRING)`}, {`'a' @@ 'b':::TSQUERY`, `to_tsvector('a':::STRING) @@ '''b''':::TSQUERY`}, {`'a':::TSQUERY @@ 'b'`, `'''a''':::TSQUERY @@ to_tsvector('b':::STRING)`}, - {`'a' @@ 'b':::TSVECTOR`, `'a':::STRING::TSQUERY @@ '''b''':::TSVECTOR`}, - {`'a':::TSVECTOR @@ 'b'`, `'''a''':::TSVECTOR @@ 'b':::STRING::TSQUERY`}, + {`'a' @@ 'b':::TSVECTOR`, `'''a''':::TSQUERY @@ '''b''':::TSVECTOR`}, + {`'a':::TSVECTOR @@ 'b'`, `'''a''':::TSVECTOR @@ '''b''':::TSQUERY`}, } ctx := context.Background() for _, d := range testData {