From 2a99ef0021cbe88908f5783eaff7abc2d5c6a8c5 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Wed, 14 Dec 2022 12:50:13 -0500 Subject: [PATCH] tree: add extended vector friendly version of ParseAndRequireString In order to build coldata.Vec's from string data a new version of ParseAndRequireString is provided which pulls out the special types supported by the vector engine and delegates to ParseAndRequireString for anything else. Informs: #91831 Release note: None --- pkg/col/coldataext/BUILD.bazel | 6 +- pkg/col/coldataext/vec_handler.go | 106 ++++++++++++++++++++++++++++ pkg/sql/sem/tree/datum.go | 24 +++++-- pkg/sql/sem/tree/parse_string.go | 110 ++++++++++++++++++++++++++++-- 4 files changed, 234 insertions(+), 12 deletions(-) create mode 100644 pkg/col/coldataext/vec_handler.go diff --git a/pkg/col/coldataext/BUILD.bazel b/pkg/col/coldataext/BUILD.bazel index 5f258b8e9882..3558eb40d7be 100644 --- a/pkg/col/coldataext/BUILD.bazel +++ b/pkg/col/coldataext/BUILD.bazel @@ -6,6 +6,7 @@ go_library( srcs = [ "datum_vec.go", "extended_column_factory.go", + "vec_handler.go", ], importpath = "github.com/cockroachdb/cockroach/pkg/col/coldataext", visibility = ["//visibility:public"], @@ -19,7 +20,10 @@ go_library( "//pkg/sql/sem/eval", "//pkg/sql/sem/tree", "//pkg/sql/types", - "//pkg/util/buildutil", + "//pkg/util/duration", + "//pkg/util/json", + "//pkg/util/timeutil/pgdate", + "@com_github_cockroachdb_apd_v3//:apd", "@com_github_cockroachdb_errors//:errors", ], ) diff --git a/pkg/col/coldataext/vec_handler.go b/pkg/col/coldataext/vec_handler.go new file mode 100644 index 000000000000..e834c25efe3f --- /dev/null +++ b/pkg/col/coldataext/vec_handler.go @@ -0,0 +1,106 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file + +// licenses/APL.txt. + +package coldataext + +import ( + "time" + + "github.com/cockroachdb/apd/v3" + "github.com/cockroachdb/cockroach/pkg/col/coldata" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/util/duration" + "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" +) + +// MakeVecHandler makes tree.ValueHandler that stores values to a coldata.Vec. +func MakeVecHandler(v coldata.Vec) tree.ValueHandler { + return &vecHandler{col: v} +} + +type vecHandler struct { + col coldata.Vec + row int +} + +// Reset is used to re-use a batch handler across batches. +func (v *vecHandler) Reset() { + v.row = 0 +} + +// Decimal implements tree.ValueHandler interface. It returns a pointer into the +// vec to avoid copying. +func (v *vecHandler) Decimal() *apd.Decimal { + d := &v.col.Decimal()[v.row] + v.row++ + return d +} + +// String is part of the tree.ValueHandler interface. +func (v *vecHandler) String(s string) { + v.col.Bytes().Set(v.row, []byte(s)) + v.row++ +} + +// Date is part of the tree.ValueHandler interface. +func (v *vecHandler) Date(d pgdate.Date) { + v.col.Int64().Set(v.row, d.UnixEpochDaysWithOrig()) + v.row++ +} + +// Datum is part of the tree.ValueHandler interface. +func (v *vecHandler) Datum(d tree.Datum) { + v.col.Datum().Set(v.row, d) + v.row++ +} + +// Bool is part of the tree.ValueHandler interface. +func (v *vecHandler) Bool(b bool) { + v.col.Bool().Set(v.row, b) + v.row++ +} + +// Bytes is part of the tree.ValueHandler interface. +func (v *vecHandler) Bytes(b []byte) { + v.col.Bytes().Set(v.row, b) + v.row++ +} + +// Float is part of the tree.ValueHandler interface. +func (v *vecHandler) Float(f float64) { + v.col.Float64().Set(v.row, f) + v.row++ +} + +// Int is part of the tree.ValueHandler interface. +func (v *vecHandler) Int(i int64) { + v.col.Int64().Set(v.row, i) + v.row++ +} + +// Duration is part of the tree.ValueHandler interface. +func (v *vecHandler) Duration(d duration.Duration) { + v.col.Interval().Set(v.row, d) + v.row++ +} + +// JSON is part of the tree.ValueHandler interface. +func (v *vecHandler) JSON(j json.JSON) { + v.col.JSON().Set(v.row, j) + v.row++ +} + +// TimestampTZ is part of the tree.ValueHandler interface. +func (v *vecHandler) TimestampTZ(t time.Time) { + v.col.Timestamp().Set(v.row, t) + v.row++ +} diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index aa2f5e854386..9af719a385c9 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -1041,10 +1041,14 @@ func ParseDDecimal(s string) (*DDecimal, error) { // SetString sets d to s. Any non-standard NaN values are converted to a // normal NaN. Any negative zero is converted to positive. func (d *DDecimal) SetString(s string) error { + return setDecimalString(s, &d.Decimal) +} + +func setDecimalString(s string, d *apd.Decimal) error { // ExactCtx should be able to handle any decimal, but if there is any rounding // or other inexact conversion, it will result in an error. //_, res, err := HighPrecisionCtx.SetString(&d.Decimal, s) - _, res, err := ExactCtx.SetString(&d.Decimal, s) + _, res, err := ExactCtx.SetString(d, s) if res != 0 || err != nil { return MakeParseError(s, types.Decimal, err) } @@ -2860,13 +2864,20 @@ type DTimestampTZ struct { time.Time } -// MakeDTimestampTZ creates a DTimestampTZ with specified precision. -func MakeDTimestampTZ(t time.Time, precision time.Duration) (*DTimestampTZ, error) { +func checkTimeBounds(t time.Time, precision time.Duration) (time.Time, error) { ret := t.Round(precision) if ret.After(MaxSupportedTime) || ret.Before(MinSupportedTime) { - return nil, NewTimestampExceedsBoundsError(ret) + return time.Time{}, NewTimestampExceedsBoundsError(ret) + } + return ret, nil +} + +// MakeDTimestampTZ creates a DTimestampTZ with specified precision. +func MakeDTimestampTZ(t time.Time, precision time.Duration) (_ *DTimestampTZ, err error) { + if t, err = checkTimeBounds(t, precision); err != nil { + return nil, err } - return &DTimestampTZ{Time: ret}, nil + return &DTimestampTZ{Time: t}, nil } // MustMakeDTimestampTZ wraps MakeDTimestampTZ but panics if there is an error. @@ -2897,6 +2908,9 @@ func MakeDTimestampTZFromDate(loc *time.Location, d *DDate) (*DTimestampTZ, erro // // The dependsOnContext return value indicates if we had to consult the // ParseContext (either for the time or the local timezone). +// +// Parts of this function are inlined into ParseAndRequireEx, if this changes materially +// ParseAndRequireEx may need to change too. func ParseDTimestampTZ( ctx ParseContext, s string, precision time.Duration, ) (_ *DTimestampTZ, dependsOnContext bool, _ error) { diff --git a/pkg/sql/sem/tree/parse_string.go b/pkg/sql/sem/tree/parse_string.go index c7edf20370b2..26018eea4cf0 100644 --- a/pkg/sql/sem/tree/parse_string.go +++ b/pkg/sql/sem/tree/parse_string.go @@ -13,9 +13,17 @@ package tree import ( "strconv" "strings" + "time" + "github.com/cockroachdb/apd/v3" + "github.com/cockroachdb/cockroach/pkg/sql/lex" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/cockroach/pkg/util/duration" + "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" "github.com/cockroachdb/errors" "github.com/lib/pq/oid" ) @@ -74,12 +82,7 @@ func ParseAndRequireString( case types.CollatedStringFamily: d, err = NewDCollatedString(s, t.Locale(), ctx.GetCollationEnv()) case types.StringFamily: - // If the string type specifies a limit we truncate to that limit: - // 'hello'::CHAR(2) -> 'he' - // This is true of all the string type variants. - if t.Width() > 0 { - s = util.TruncateString(s, int(t.Width())) - } + s = truncateString(s, t) return NewDString(s), false, nil case types.TimeFamily: d, dependsOnContext, err = ParseDTime(ctx, s, TimeFamilyPrecisionToRoundDuration(t.Precision())) @@ -115,6 +118,16 @@ func ParseAndRequireString( return d, dependsOnContext, err } +func truncateString(s string, t *types.T) string { + // If the string type specifies a limit we truncate to that limit: + // 'hello'::CHAR(2) -> 'he' + // This is true of all the string type variants. + if t.Width() > 0 { + s = util.TruncateString(s, int(t.Width())) + } + return s +} + // ParseDOidAsInt parses the input and returns it as an OID. If the input // is not formatted as an int, an error is returned. func ParseDOidAsInt(s string) (*DOid, error) { @@ -143,3 +156,88 @@ func FormatBitArrayToType(d *DBitArray, t *types.T) *DBitArray { } return &DBitArray{a} } + +// ValueHandler is an interface to allow raw types to extracted from strings. +type ValueHandler interface { + Null() + Date(d pgdate.Date) + Datum(d Datum) + Bool(b bool) + Bytes(b []byte) + Decimal() *apd.Decimal + Float(f float64) + Int(i int64) + Duration(d duration.Duration) + JSON(j json.JSON) + String(s string) + TimestampTZ(t time.Time) +} + +func ParseAndRequireStringEx(t *types.T, s string, ctx ParseTimeContext, vh ValueHandler, ph *pgdate.ParseHelper) (err error) { + switch t.Family() { + case types.BoolFamily: + var b bool + if b, err = ParseBool(strings.TrimSpace(s)); err == nil { + vh.Bool(b) + } + case types.BytesFamily: + var res []byte + if res, err = lex.DecodeRawBytesToByteArrayAuto([]byte(s)); err != nil { + vh.Bytes(res) + } else { + err = MakeParseError(s, types.Bytes, err) + } + case types.DateFamily: + now := relativeParseTime(ctx) + var t pgdate.Date + if t, _, err = pgdate.ParseDate(now, dateStyle(ctx), s, ph); err == nil { + vh.Date(t) + } + case types.DecimalFamily: + dec := vh.Decimal() + if err = setDecimalString(s, dec); err != nil { + // Erase any invalid results. + *dec = apd.Decimal{} + err = MakeParseError(s, types.Decimal, err) + } + case types.FloatFamily: + var f float64 + if f, err = strconv.ParseFloat(s, 64); err == nil { + vh.Float(f) + } else { + err = MakeParseError(s, types.Float, err) + } + case types.IntFamily: + var i int64 + if i, err = strconv.ParseInt(s, 0, 64); err == nil { + vh.Int(i) + } else { + err = MakeParseError(s, types.Int, err) + } + case types.JsonFamily: + var j json.JSON + if j, err = json.ParseJSON(s); err == nil { + vh.JSON(j) + } else { + err = pgerror.Wrapf(err, pgcode.Syntax, "could not parse JSON") + } + case types.StringFamily: + s = truncateString(s, t) + vh.String(s) + case types.TimestampTZFamily: + now := relativeParseTime(ctx) + var ts time.Time + if ts, _, err = pgdate.ParseTimestamp(now, dateStyle(ctx), s); err == nil { + // Always normalize time to the current location. + if ts, err = checkTimeBounds(ts, TimeFamilyPrecisionToRoundDuration(t.Precision())); err == nil { + vh.TimestampTZ(ts) + } + } + default: + var d Datum + if d, _, err = ParseAndRequireString(t, s, ctx); err == nil { + vh.Datum(d) + } + } + return err +}