From 9d195ead1212a46c4bb8bc8286ff5420ed238607 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Tue, 9 Aug 2022 17:20:15 -0700 Subject: [PATCH] colexecbase: fix bpchar to bpchar cast This commit fixes an identity cast between `bpchar`s or arrays of `bpchar`s. These types require special handling of trimming trailing whitespace which wasn't done in the "vanilla" identity cast. I believe that these casts don't get planned by the optimizer ever, so there should be no impact, but it fixes a test flake and addresses the corresponding TODO. Release note: None --- pkg/sql/colexec/colexecbase/BUILD.bazel | 1 - pkg/sql/colexec/colexecbase/cast.eg.go | 54 +++++++++++++++++++++++- pkg/sql/colexec/colexecbase/cast_test.go | 32 ++------------ pkg/sql/colexec/colexecbase/cast_tmpl.go | 54 +++++++++++++++++++++++- 4 files changed, 110 insertions(+), 31 deletions(-) diff --git a/pkg/sql/colexec/colexecbase/BUILD.bazel b/pkg/sql/colexec/colexecbase/BUILD.bazel index e6fb834ad91c..9ee0fe549e81 100644 --- a/pkg/sql/colexec/colexecbase/BUILD.bazel +++ b/pkg/sql/colexec/colexecbase/BUILD.bazel @@ -78,7 +78,6 @@ go_test( "//pkg/util/log", "//pkg/util/mon", "//pkg/util/randutil", - "@com_github_lib_pq//oid", "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/sql/colexec/colexecbase/cast.eg.go b/pkg/sql/colexec/colexecbase/cast.eg.go index e37cf814cab7..46288b3d7344 100644 --- a/pkg/sql/colexec/colexecbase/cast.eg.go +++ b/pkg/sql/colexec/colexecbase/cast.eg.go @@ -85,7 +85,15 @@ func GetCastOperator( return &castOpNullAny{castOpBase: base}, nil } if isIdentityCast(fromType, toType) { - return &castIdentityOp{castOpBase: base}, nil + // bpchars require special handling. + if toType.Oid() == oid.T_bpchar { + return &castBPCharIdentityOp{castOpBase: base}, nil + } + // If we don't have an array of bpchars, then we use the identity, + // otherwise we'll fallback to datum-datum cast below. + if toType.Oid() != oid.T__bpchar { + return &castIdentityOp{castOpBase: base}, nil + } } isFromDatum := typeconv.TypeFamilyToCanonicalTypeFamily(fromType.Family()) == typeconv.DatumVecCanonicalTypeFamily isToDatum := typeconv.TypeFamilyToCanonicalTypeFamily(toType.Family()) == typeconv.DatumVecCanonicalTypeFamily @@ -906,6 +914,50 @@ func (c *castIdentityOp) Next() coldata.Batch { return batch } +// castBPCharIdentityOp is a specialization of castIdentityOp which handles +// casts to the bpchar type (which trims trailing whitespaces). +type castBPCharIdentityOp struct { + castOpBase +} + +var _ colexecop.ClosableOperator = &castBPCharIdentityOp{} + +func (c *castBPCharIdentityOp) Next() coldata.Batch { + batch := c.Input.Next() + n := batch.Length() + if n == 0 { + return coldata.ZeroBatch + } + inputVec := batch.ColVec(c.colIdx) + inputCol := inputVec.Bytes() + inputNulls := inputVec.Nulls() + outputVec := batch.ColVec(c.outputIdx) + outputCol := outputVec.Bytes() + outputNulls := outputVec.Nulls() + // Note that the loops below are not as optimized as in other cast operators + // since this operator should only be planned in tests. + c.allocator.PerformOperation([]coldata.Vec{outputVec}, func() { + if sel := batch.Selection(); sel != nil { + for _, i := range sel[:n] { + if inputNulls.NullAt(i) { + outputNulls.SetNull(i) + } else { + outputCol.Set(i, bytes.TrimRight(inputCol.Get(i), " ")) + } + } + } else { + for i := 0; i < n; i++ { + if inputNulls.NullAt(i) { + outputNulls.SetNull(i) + } else { + outputCol.Set(i, bytes.TrimRight(inputCol.Get(i), " ")) + } + } + } + }) + return batch +} + type castNativeToDatumOp struct { castOpBase diff --git a/pkg/sql/colexec/colexecbase/cast_test.go b/pkg/sql/colexec/colexecbase/cast_test.go index b2da1526b730..7c7501c4d0d9 100644 --- a/pkg/sql/colexec/colexecbase/cast_test.go +++ b/pkg/sql/colexec/colexecbase/cast_test.go @@ -29,7 +29,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/randutil" - "github.com/lib/pq/oid" "github.com/stretchr/testify/require" ) @@ -46,12 +45,6 @@ func TestRandomizedCast(t *testing.T) { getValidSupportedCast := func() (from, to *types.T) { for { from, to = randgen.RandType(rng), randgen.RandType(rng) - if from.Oid() == oid.T_void && to.Oid() == oid.T_bpchar { - // Skip the cast from void to char because such setup would get - // stuck forever in the datum generation (due to the TODO - // below). - continue - } if _, ok := tree.LookupCastVolatility(from, to, nil /* sessiondata */); ok { if colexecbase.IsCastSupported(from, to) { return from, to @@ -71,27 +64,10 @@ func TestRandomizedCast(t *testing.T) { toConverter := colconv.GetDatumToPhysicalFn(to) errorExpected := false for i := 0; i < numRows; i++ { - var ( - fromDatum, toDatum tree.Datum - err error - ) - // Datum generation. The loop exists only because of the TODO below. - for { - // We don't allow any NULL datums to be generated, so disable - // this ability in the RandDatum function. - fromDatum = randgen.RandDatum(rng, from, false) - toDatum, err = tree.PerformCast(&evalCtx, fromDatum, to) - if to.Oid() == oid.T_bpchar && string(*toDatum.(*tree.DString)) == "" { - // There is currently a problem when converting an empty - // string datum to a physical representation, so we skip - // such a datum and retry generation. - // TODO(yuzefovich): figure it out. When removing this - // check, remove the special casing for 'void -> char' cast - // above. - continue - } - break - } + // We don't allow any NULL datums to be generated, so disable + // this ability in the RandDatum function. + fromDatum := randgen.RandDatum(rng, from, false) + toDatum, err := tree.PerformCast(&evalCtx, fromDatum, to) var toPhys interface{} if err != nil { errorExpected = true diff --git a/pkg/sql/colexec/colexecbase/cast_tmpl.go b/pkg/sql/colexec/colexecbase/cast_tmpl.go index 28934d0f71ca..16a32188a6cf 100644 --- a/pkg/sql/colexec/colexecbase/cast_tmpl.go +++ b/pkg/sql/colexec/colexecbase/cast_tmpl.go @@ -118,7 +118,15 @@ func GetCastOperator( return &castOpNullAny{castOpBase: base}, nil } if isIdentityCast(fromType, toType) { - return &castIdentityOp{castOpBase: base}, nil + // bpchars require special handling. + if toType.Oid() == oid.T_bpchar { + return &castBPCharIdentityOp{castOpBase: base}, nil + } + // If we don't have an array of bpchars, then we use the identity, + // otherwise we'll fallback to datum-datum cast below. + if toType.Oid() != oid.T__bpchar { + return &castIdentityOp{castOpBase: base}, nil + } } isFromDatum := typeconv.TypeFamilyToCanonicalTypeFamily(fromType.Family()) == typeconv.DatumVecCanonicalTypeFamily isToDatum := typeconv.TypeFamilyToCanonicalTypeFamily(toType.Family()) == typeconv.DatumVecCanonicalTypeFamily @@ -319,6 +327,50 @@ func (c *castIdentityOp) Next() coldata.Batch { return batch } +// castBPCharIdentityOp is a specialization of castIdentityOp which handles +// casts to the bpchar type (which trims trailing whitespaces). +type castBPCharIdentityOp struct { + castOpBase +} + +var _ colexecop.ClosableOperator = &castBPCharIdentityOp{} + +func (c *castBPCharIdentityOp) Next() coldata.Batch { + batch := c.Input.Next() + n := batch.Length() + if n == 0 { + return coldata.ZeroBatch + } + inputVec := batch.ColVec(c.colIdx) + inputCol := inputVec.Bytes() + inputNulls := inputVec.Nulls() + outputVec := batch.ColVec(c.outputIdx) + outputCol := outputVec.Bytes() + outputNulls := outputVec.Nulls() + // Note that the loops below are not as optimized as in other cast operators + // since this operator should only be planned in tests. + c.allocator.PerformOperation([]coldata.Vec{outputVec}, func() { + if sel := batch.Selection(); sel != nil { + for _, i := range sel[:n] { + if inputNulls.NullAt(i) { + outputNulls.SetNull(i) + } else { + outputCol.Set(i, bytes.TrimRight(inputCol.Get(i), " ")) + } + } + } else { + for i := 0; i < n; i++ { + if inputNulls.NullAt(i) { + outputNulls.SetNull(i) + } else { + outputCol.Set(i, bytes.TrimRight(inputCol.Get(i), " ")) + } + } + } + }) + return batch +} + type castNativeToDatumOp struct { castOpBase