Skip to content

Commit

Permalink
parser: fix GetTypeFromValidSQLSyntax for collated strings
Browse files Browse the repository at this point in the history
Previously, `GetTypeFromValidSQLSyntax` would result in an error when
attempting to parse a collated string. This is due to an expression like
`1::STRING COLLATE en` being parsed as `CollateExpr(CastExpr)` whereas
the function expected just `CastExpr`. This is now fixed by having
a special case for the collated strings.

Additionally, this commit adds a couple of testing improvements around
the collated strings:
- a representative collated string type is now included into
`randgen.SeedTypes` (this actually exposed a minor bug in recently
introduced `tree.DatumPrev` and `tree.DatumNext` methods, now fixed in
this commit)
- a new test is introduced that asserts that for all (with a few
exceptions) type families at least one representative type is included
into `randgen.SeedTypes`.

Release note (bug fix): Previously, `ALTER TABLE ... INJECT STATISTICS`
command would fail if a column with COLLATED STRING type had histograms
to be injected, and this is now fixed. The bug has been present since at
least 21.2.
  • Loading branch information
yuzefovich committed Feb 23, 2023
1 parent 27e4d3c commit 47ca688
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 33 deletions.
1 change: 1 addition & 0 deletions pkg/internal/sqlsmith/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ go_test(
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/randutil",
],
)
Expand Down
4 changes: 4 additions & 0 deletions pkg/internal/sqlsmith/sqlsmith_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

Expand All @@ -37,6 +38,7 @@ var (
// TestSetups verifies that all setups generate executable SQL.
func TestSetups(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
defer utilccl.TestingEnableEnterprise()()

for name, setup := range Setups {
Expand Down Expand Up @@ -82,6 +84,7 @@ func TestSetups(t *testing.T) {
// false-negative.
func TestRandTableInserts(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

ctx := context.Background()
s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{})
Expand Down Expand Up @@ -145,6 +148,7 @@ func TestRandTableInserts(t *testing.T) {
// sometimes put them into bad states that the parser would never do.
func TestGenerateParse(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
defer utilccl.TestingEnableEnterprise()()

ctx := context.Background()
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/parser/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ go_test(
deps = [
"//pkg/sql/lexbase",
"//pkg/sql/pgwire/pgerror",
"//pkg/sql/randgen",
"//pkg/sql/sem/builtins",
"//pkg/sql/sem/tree",
"//pkg/sql/sem/tree/treebin",
Expand All @@ -69,9 +70,11 @@ go_test(
"//pkg/testutils/sqlutils",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/randutil",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
],
)

Expand Down
13 changes: 13 additions & 0 deletions pkg/sql/parser/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,19 @@ func GetTypeFromValidSQLSyntax(sql string) (tree.ResolvableTypeReference, error)
if err != nil {
return nil, err
}
return GetTypeFromCastOrCollate(expr)
}

// GetTypeFromCastOrCollate returns the type of the given tree.Expr. The method
// assumes that the expression is either tree.CastExpr or tree.CollateExpr
// (which wraps the tree.CastExpr).
func GetTypeFromCastOrCollate(expr tree.Expr) (tree.ResolvableTypeReference, error) {
// COLLATE clause has lower precedence than the cast, so if we have
// something like `1::STRING COLLATE en`, it'll be parsed as
// CollateExpr(CastExpr).
if collate, ok := expr.(*tree.CollateExpr); ok {
return types.MakeCollatedString(types.String, collate.Locale), nil
}

cast, ok := expr.(*tree.CastExpr)
if !ok {
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/parser/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,19 @@ import (

"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/randgen"
_ "github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree/treebin"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree/treecmp"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
_ "github.com/cockroachdb/cockroach/pkg/util/log" // for flags
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var issueLinkRE = regexp.MustCompile("https://go.crdb.dev/issue-v/([0-9]+)/.*")
Expand Down Expand Up @@ -746,3 +749,21 @@ func BenchmarkParse(b *testing.B) {
})
}
}

func TestGetTypeFromValidSQLSyntax(t *testing.T) {
rng, _ := randutil.NewTestRand()

const numRuns = 1000
for i := 0; i < numRuns; i++ {
orig := randgen.RandType(rng)
typeRef, err := parser.GetTypeFromValidSQLSyntax(orig.SQLString())
require.NoError(t, err)
actual, ok := tree.GetStaticallyKnownType(typeRef)
require.True(t, ok)
// TODO(yuzefovich): ideally, we'd assert that the returned type is
// equal to the original one; however, there are some subtle differences
// at the moment (like the width might only be set on the returned
// type), so we simply assert that the OIDs are the same.
require.Equal(t, orig.Oid(), actual.Oid())
}
}
5 changes: 4 additions & 1 deletion pkg/sql/randgen/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,25 @@ go_test(
"main_test.go",
"mutator_test.go",
"schema_test.go",
"types_test.go",
],
args = ["-test.timeout=295s"],
embed = [":randgen"],
deps = [
":randgen",
"//pkg/base",
"//pkg/ccl",
"//pkg/ccl/utilccl",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
"//pkg/testutils/testcluster",
"//pkg/util/leaktest",
"//pkg/util/randutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
],
)
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/randgen/mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,12 @@ func statisticsMutator(
return
}
colType := tree.MustBeStaticallyKnownType(col.Type)
if colType.Family() == types.CollatedStringFamily {
// Collated strings are not roundtrippable during
// encoding/decoding, so we cannot always make a valid
// histogram.
return
}
h := randHistogram(rng, colType)
stat := colStats[col.Name]
if err := stat.SetHistogram(&h); err != nil {
Expand Down
8 changes: 8 additions & 0 deletions pkg/sql/randgen/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ func init() {
}
}

// Add a collated string separately (since it shares the oid with the STRING
// type and, thus, wasn't included above).
collatedStringType := types.MakeCollatedString(types.String, "en" /* locale */)
SeedTypes = append(SeedTypes, collatedStringType)
if IsAllowedForArray(collatedStringType) {
arrayContentsTypes = append(arrayContentsTypes, collatedStringType)
}

// Sort these so randomly chosen indexes always point to the same element.
sort.Slice(SeedTypes, func(i, j int) bool {
return SeedTypes[i].String() < SeedTypes[j].String()
Expand Down
54 changes: 54 additions & 0 deletions pkg/sql/randgen/types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package randgen

import (
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/errors"
)

// TestSeedTypes verifies that at least one representative type is included into
// SeedTypes for all (with a few exceptions) type families.
func TestSeedTypes(t *testing.T) {
defer leaktest.AfterTest(t)()

noFamilyRepresentative := make(map[types.Family]struct{})
loop:
for id := range types.Family_name {
familyID := types.Family(id)
switch familyID {
case types.EnumFamily:
// Enums need to created separately.
continue loop
case types.EncodedKeyFamily:
// It's not a real type.
continue loop
case types.UnknownFamily, types.AnyFamily:
// These are not included on purpose.
continue loop
}
noFamilyRepresentative[familyID] = struct{}{}
}
for _, typ := range SeedTypes {
delete(noFamilyRepresentative, typ.Family())
}
if len(noFamilyRepresentative) > 0 {
s := "no representative for "
for f := range noFamilyRepresentative {
s += fmt.Sprintf("%s (%d) ", types.Family_name[int32(f)], f)
}
t.Fatal(errors.Errorf("%s", s))
}
}
33 changes: 6 additions & 27 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -5801,7 +5801,7 @@ func AdjustValueToType(typ *types.T, inVal Datum) (outVal Datum, err error) {
// with a reasonable previous datum that is smaller than the given one.
//
// The return value is undefined if Datum.IsMin returns true or if the value is
// NaN of an infinity (for floats and decimals).
// NaN or an infinity (for floats and decimals).
func DatumPrev(
datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment,
) (Datum, bool) {
Expand Down Expand Up @@ -5838,16 +5838,6 @@ func DatumPrev(
return nil, false
}
return NewDString(prev), true
case *DCollatedString:
prev, ok := prevString(d.Contents)
if !ok {
return nil, false
}
c, err := NewDCollatedString(prev, d.Locale, collationEnv)
if err != nil {
return nil, false
}
return c, true
case *DBytes:
prev, ok := prevString(string(*d))
if !ok {
Expand All @@ -5860,8 +5850,8 @@ func DatumPrev(
return NewDInterval(prev, types.DefaultIntervalTypeMetadata), true
default:
// TODO(yuzefovich): consider adding support for other datums that don't
// have Datum.Prev implementation (DBitArray, DGeography, DGeometry,
// DBox2D, DJSON, DArray).
// have Datum.Prev implementation (DCollatedString, DBitArray,
// DGeography, DGeometry, DBox2D, DJSON, DArray).
return datum.Prev(cmpCtx)
}
}
Expand All @@ -5872,7 +5862,7 @@ func DatumPrev(
// with a reasonable next datum that is greater than the given one.
//
// The return value is undefined if Datum.IsMax returns true or if the value is
// NaN of an infinity (for floats and decimals).
// NaN or an infinity (for floats and decimals).
func DatumNext(
datum Datum, cmpCtx CompareContext, collationEnv *CollationEnvironment,
) (Datum, bool) {
Expand All @@ -5890,24 +5880,13 @@ func DatumNext(
return nil, false
}
return &next, true
case *DCollatedString:
s := NewDString(d.Contents)
next, ok := s.Next(cmpCtx)
if !ok {
return nil, false
}
c, err := NewDCollatedString(string(*next.(*DString)), d.Locale, collationEnv)
if err != nil {
return nil, false
}
return c, true
case *DInterval:
next := d.Add(duration.MakeDuration(1000000 /* nanos */, 0 /* days */, 0 /* months */))
return NewDInterval(next, types.DefaultIntervalTypeMetadata), true
default:
// TODO(yuzefovich): consider adding support for other datums that don't
// have Datum.Next implementation (DGeography, DGeometry, DBox2D,
// DJSON).
// have Datum.Next implementation (DCollatedString, DGeography,
// DGeometry, DBox2D, DJSON).
return datum.Next(cmpCtx)
}
}
10 changes: 5 additions & 5 deletions pkg/workload/schemachange/operation_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -3606,11 +3606,11 @@ func (og *operationGenerator) typeFromTypeName(
if err != nil {
return nil, errors.Wrapf(err, "typeFromTypeName: %s", typeName)
}
typ, err := tree.ResolveType(
ctx,
stmt.AST.(*tree.Select).Select.(*tree.SelectClause).Exprs[0].Expr.(*tree.CastExpr).Type,
&txTypeResolver{tx: tx},
)
typRef, err := parser.GetTypeFromCastOrCollate(stmt.AST.(*tree.Select).Select.(*tree.SelectClause).Exprs[0].Expr)
if err != nil {
return nil, errors.Wrapf(err, "GetTypeFromCastOrCollate: %s", typeName)
}
typ, err := tree.ResolveType(ctx, typRef, &txTypeResolver{tx: tx})
if err != nil {
return nil, errors.Wrapf(err, "ResolveType: %v", typeName)
}
Expand Down

0 comments on commit 47ca688

Please sign in to comment.