From 47ca688f3f6c542a3e71580efa332708278c99cb Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 6 Feb 2023 16:07:01 -0800 Subject: [PATCH] parser: fix GetTypeFromValidSQLSyntax for collated strings Previously, `GetTypeFromValidSQLSyntax` would result in an error when attempting to parse a collated string. This is due to an expression like `1::STRING COLLATE en` being parsed as `CollateExpr(CastExpr)` whereas the function expected just `CastExpr`. This is now fixed by having a special case for the collated strings. Additionally, this commit adds a couple of testing improvements around the collated strings: - a representative collated string type is now included into `randgen.SeedTypes` (this actually exposed a minor bug in recently introduced `tree.DatumPrev` and `tree.DatumNext` methods, now fixed in this commit) - a new test is introduced that asserts that for all (with a few exceptions) type families at least one representative type is included into `randgen.SeedTypes`. Release note (bug fix): Previously, `ALTER TABLE ... INJECT STATISTICS` command would fail if a column with COLLATED STRING type had histograms to be injected, and this is now fixed. The bug has been present since at least 21.2. --- pkg/internal/sqlsmith/BUILD.bazel | 1 + pkg/internal/sqlsmith/sqlsmith_test.go | 4 ++ pkg/sql/parser/BUILD.bazel | 3 ++ pkg/sql/parser/parse.go | 13 +++++ pkg/sql/parser/parse_test.go | 21 ++++++++ pkg/sql/randgen/BUILD.bazel | 5 +- pkg/sql/randgen/mutator.go | 6 +++ pkg/sql/randgen/type.go | 8 +++ pkg/sql/randgen/types_test.go | 54 +++++++++++++++++++ pkg/sql/sem/tree/datum.go | 33 +++--------- .../schemachange/operation_generator.go | 10 ++-- 11 files changed, 125 insertions(+), 33 deletions(-) create mode 100644 pkg/sql/randgen/types_test.go diff --git a/pkg/internal/sqlsmith/BUILD.bazel b/pkg/internal/sqlsmith/BUILD.bazel index 8966d13303f7..f5a5b83a65cc 100644 --- a/pkg/internal/sqlsmith/BUILD.bazel +++ b/pkg/internal/sqlsmith/BUILD.bazel @@ -62,6 +62,7 @@ go_test( "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/util/leaktest", + "//pkg/util/log", "//pkg/util/randutil", ], ) diff --git a/pkg/internal/sqlsmith/sqlsmith_test.go b/pkg/internal/sqlsmith/sqlsmith_test.go index 45df6ddb06b3..b07c9ab4c7e3 100644 --- a/pkg/internal/sqlsmith/sqlsmith_test.go +++ b/pkg/internal/sqlsmith/sqlsmith_test.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/randutil" ) @@ -37,6 +38,7 @@ var ( // TestSetups verifies that all setups generate executable SQL. func TestSetups(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) defer utilccl.TestingEnableEnterprise()() for name, setup := range Setups { @@ -82,6 +84,7 @@ func TestSetups(t *testing.T) { // false-negative. func TestRandTableInserts(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) ctx := context.Background() s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{}) @@ -145,6 +148,7 @@ func TestRandTableInserts(t *testing.T) { // sometimes put them into bad states that the parser would never do. func TestGenerateParse(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) defer utilccl.TestingEnableEnterprise()() ctx := context.Background() diff --git a/pkg/sql/parser/BUILD.bazel b/pkg/sql/parser/BUILD.bazel index 64582afbecfd..566303ca6054 100644 --- a/pkg/sql/parser/BUILD.bazel +++ b/pkg/sql/parser/BUILD.bazel @@ -61,6 +61,7 @@ go_test( deps = [ "//pkg/sql/lexbase", "//pkg/sql/pgwire/pgerror", + "//pkg/sql/randgen", "//pkg/sql/sem/builtins", "//pkg/sql/sem/tree", "//pkg/sql/sem/tree/treebin", @@ -69,9 +70,11 @@ go_test( "//pkg/testutils/sqlutils", "//pkg/util/leaktest", "//pkg/util/log", + "//pkg/util/randutil", "@com_github_cockroachdb_datadriven//:datadriven", "@com_github_cockroachdb_errors//:errors", "@com_github_stretchr_testify//assert", + "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/sql/parser/parse.go b/pkg/sql/parser/parse.go index 998d93c8a11d..b4f5715871f4 100644 --- a/pkg/sql/parser/parse.go +++ b/pkg/sql/parser/parse.go @@ -415,6 +415,19 @@ func GetTypeFromValidSQLSyntax(sql string) (tree.ResolvableTypeReference, error) if err != nil { return nil, err } + return GetTypeFromCastOrCollate(expr) +} + +// GetTypeFromCastOrCollate returns the type of the given tree.Expr. The method +// assumes that the expression is either tree.CastExpr or tree.CollateExpr +// (which wraps the tree.CastExpr). +func GetTypeFromCastOrCollate(expr tree.Expr) (tree.ResolvableTypeReference, error) { + // COLLATE clause has lower precedence than the cast, so if we have + // something like `1::STRING COLLATE en`, it'll be parsed as + // CollateExpr(CastExpr). + if collate, ok := expr.(*tree.CollateExpr); ok { + return types.MakeCollatedString(types.String, collate.Locale), nil + } cast, ok := expr.(*tree.CastExpr) if !ok { diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 6575aa1bf740..ae2b33ce1d28 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/sql/randgen" _ "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree/treebin" @@ -28,9 +29,11 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" _ "github.com/cockroachdb/cockroach/pkg/util/log" // for flags + "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/datadriven" "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var issueLinkRE = regexp.MustCompile("https://go.crdb.dev/issue-v/([0-9]+)/.*") @@ -746,3 +749,21 @@ func BenchmarkParse(b *testing.B) { }) } } + +func TestGetTypeFromValidSQLSyntax(t *testing.T) { + rng, _ := randutil.NewTestRand() + + const numRuns = 1000 + for i := 0; i < numRuns; i++ { + orig := randgen.RandType(rng) + typeRef, err := parser.GetTypeFromValidSQLSyntax(orig.SQLString()) + require.NoError(t, err) + actual, ok := tree.GetStaticallyKnownType(typeRef) + require.True(t, ok) + // TODO(yuzefovich): ideally, we'd assert that the returned type is + // equal to the original one; however, there are some subtle differences + // at the moment (like the width might only be set on the returned + // type), so we simply assert that the OIDs are the same. + require.Equal(t, orig.Oid(), actual.Oid()) + } +} diff --git a/pkg/sql/randgen/BUILD.bazel b/pkg/sql/randgen/BUILD.bazel index 58709ccd3ed8..efa057bed0a3 100644 --- a/pkg/sql/randgen/BUILD.bazel +++ b/pkg/sql/randgen/BUILD.bazel @@ -60,10 +60,11 @@ go_test( "main_test.go", "mutator_test.go", "schema_test.go", + "types_test.go", ], args = ["-test.timeout=295s"], + embed = [":randgen"], deps = [ - ":randgen", "//pkg/base", "//pkg/ccl", "//pkg/ccl/utilccl", @@ -71,11 +72,13 @@ go_test( "//pkg/security/securitytest", "//pkg/server", "//pkg/sql/sem/tree", + "//pkg/sql/types", "//pkg/testutils/serverutils", "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/util/leaktest", "//pkg/util/randutil", + "@com_github_cockroachdb_errors//:errors", "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/sql/randgen/mutator.go b/pkg/sql/randgen/mutator.go index 9cc4986fc80a..259bc7c394ac 100644 --- a/pkg/sql/randgen/mutator.go +++ b/pkg/sql/randgen/mutator.go @@ -218,6 +218,12 @@ func statisticsMutator( return } colType := tree.MustBeStaticallyKnownType(col.Type) + if colType.Family() == types.CollatedStringFamily { + // Collated strings are not roundtrippable during + // encoding/decoding, so we cannot always make a valid + // histogram. + return + } h := randHistogram(rng, colType) stat := colStats[col.Name] if err := stat.SetHistogram(&h); err != nil { diff --git a/pkg/sql/randgen/type.go b/pkg/sql/randgen/type.go index 8aeaa5b8e126..a97e8c1fc739 100644 --- a/pkg/sql/randgen/type.go +++ b/pkg/sql/randgen/type.go @@ -57,6 +57,14 @@ func init() { } } + // Add a collated string separately (since it shares the oid with the STRING + // type and, thus, wasn't included above). + collatedStringType := types.MakeCollatedString(types.String, "en" /* locale */) + SeedTypes = append(SeedTypes, collatedStringType) + if IsAllowedForArray(collatedStringType) { + arrayContentsTypes = append(arrayContentsTypes, collatedStringType) + } + // Sort these so randomly chosen indexes always point to the same element. sort.Slice(SeedTypes, func(i, j int) bool { return SeedTypes[i].String() < SeedTypes[j].String() diff --git a/pkg/sql/randgen/types_test.go b/pkg/sql/randgen/types_test.go new file mode 100644 index 000000000000..fc1ca32b5249 --- /dev/null +++ b/pkg/sql/randgen/types_test.go @@ -0,0 +1,54 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package randgen + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/errors" +) + +// TestSeedTypes verifies that at least one representative type is included into +// SeedTypes for all (with a few exceptions) type families. +func TestSeedTypes(t *testing.T) { + defer leaktest.AfterTest(t)() + + noFamilyRepresentative := make(map[types.Family]struct{}) +loop: + for id := range types.Family_name { + familyID := types.Family(id) + switch familyID { + case types.EnumFamily: + // Enums need to created separately. + continue loop + case types.EncodedKeyFamily: + // It's not a real type. + continue loop + case types.UnknownFamily, types.AnyFamily: + // These are not included on purpose. + continue loop + } + noFamilyRepresentative[familyID] = struct{}{} + } + for _, typ := range SeedTypes { + delete(noFamilyRepresentative, typ.Family()) + } + if len(noFamilyRepresentative) > 0 { + s := "no representative for " + for f := range noFamilyRepresentative { + s += fmt.Sprintf("%s (%d) ", types.Family_name[int32(f)], f) + } + t.Fatal(errors.Errorf("%s", s)) + } +} diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 77985c3eb9e6..ac8e19e32028 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -5801,7 +5801,7 @@ func AdjustValueToType(typ *types.T, inVal Datum) (outVal Datum, err error) { // with a reasonable previous datum that is smaller than the given one. // // The return value is undefined if Datum.IsMin returns true or if the value is -// NaN of an infinity (for floats and decimals). +// NaN or an infinity (for floats and decimals). func DatumPrev( datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment, ) (Datum, bool) { @@ -5838,16 +5838,6 @@ func DatumPrev( return nil, false } return NewDString(prev), true - case *DCollatedString: - prev, ok := prevString(d.Contents) - if !ok { - return nil, false - } - c, err := NewDCollatedString(prev, d.Locale, collationEnv) - if err != nil { - return nil, false - } - return c, true case *DBytes: prev, ok := prevString(string(*d)) if !ok { @@ -5860,8 +5850,8 @@ func DatumPrev( return NewDInterval(prev, types.DefaultIntervalTypeMetadata), true default: // TODO(yuzefovich): consider adding support for other datums that don't - // have Datum.Prev implementation (DBitArray, DGeography, DGeometry, - // DBox2D, DJSON, DArray). + // have Datum.Prev implementation (DCollatedString, DBitArray, + // DGeography, DGeometry, DBox2D, DJSON, DArray). return datum.Prev(cmpCtx) } } @@ -5872,7 +5862,7 @@ func DatumPrev( // with a reasonable next datum that is greater than the given one. // // The return value is undefined if Datum.IsMax returns true or if the value is -// NaN of an infinity (for floats and decimals). +// NaN or an infinity (for floats and decimals). func DatumNext( datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment, ) (Datum, bool) { @@ -5890,24 +5880,13 @@ func DatumNext( return nil, false } return &next, true - case *DCollatedString: - s := NewDString(d.Contents) - next, ok := s.Next(cmpCtx) - if !ok { - return nil, false - } - c, err := NewDCollatedString(string(*next.(*DString)), d.Locale, collationEnv) - if err != nil { - return nil, false - } - return c, true case *DInterval: next := d.Add(duration.MakeDuration(1000000 /* nanos */, 0 /* days */, 0 /* months */)) return NewDInterval(next, types.DefaultIntervalTypeMetadata), true default: // TODO(yuzefovich): consider adding support for other datums that don't - // have Datum.Next implementation (DGeography, DGeometry, DBox2D, - // DJSON). + // have Datum.Next implementation (DCollatedString, DGeography, + // DGeometry, DBox2D, DJSON). return datum.Next(cmpCtx) } } diff --git a/pkg/workload/schemachange/operation_generator.go b/pkg/workload/schemachange/operation_generator.go index 2db8a6b7f866..4ef6d51342ce 100644 --- a/pkg/workload/schemachange/operation_generator.go +++ b/pkg/workload/schemachange/operation_generator.go @@ -3606,11 +3606,11 @@ func (og *operationGenerator) typeFromTypeName( if err != nil { return nil, errors.Wrapf(err, "typeFromTypeName: %s", typeName) } - typ, err := tree.ResolveType( - ctx, - stmt.AST.(*tree.Select).Select.(*tree.SelectClause).Exprs[0].Expr.(*tree.CastExpr).Type, - &txTypeResolver{tx: tx}, - ) + typRef, err := parser.GetTypeFromCastOrCollate(stmt.AST.(*tree.Select).Select.(*tree.SelectClause).Exprs[0].Expr) + if err != nil { + return nil, errors.Wrapf(err, "GetTypeFromCastOrCollate: %s", typeName) + } + typ, err := tree.ResolveType(ctx, typRef, &txTypeResolver{tx: tx}) if err != nil { return nil, errors.Wrapf(err, "ResolveType: %v", typeName) }