diff --git a/pkg/col/coldata/BUILD.bazel b/pkg/col/coldata/BUILD.bazel index b28a0bdc2c7d..9153b4693c4a 100644 --- a/pkg/col/coldata/BUILD.bazel +++ b/pkg/col/coldata/BUILD.bazel @@ -19,6 +19,7 @@ go_library( "//pkg/sql/types", "//pkg/util", "//pkg/util/duration", + "//pkg/util/uuid", "@com_github_cockroachdb_apd_v2//:apd", "@com_github_cockroachdb_errors//:errors", "@com_github_stretchr_testify//require", diff --git a/pkg/col/coldata/bytes.go b/pkg/col/coldata/bytes.go index 6be38f46979a..9789d12840ab 100644 --- a/pkg/col/coldata/bytes.go +++ b/pkg/col/coldata/bytes.go @@ -45,12 +45,24 @@ const BytesInitialAllocationFactor = 64 // NewBytes returns a Bytes struct with enough capacity for n zero-length // []byte values. It is legal to call Set on the returned Bytes at this point, // but Get is undefined until at least one element is Set. +// BytesInitialAllocationFactor number of bytes are allocated initially for each +// []byte element. func NewBytes(n int) *Bytes { + return NewBytesWithAvgLength(n, BytesInitialAllocationFactor) +} + +// NewBytesWithAvgLength returns a Bytes struct with enough capacity for n +// []byte values with the average length of avgElementLength. It is legal to +// call Set on the returned Bytes at this point, but Get is undefined until at +// least one element is Set. +// - avgElementLength determines the average length of a single []byte element +// that will be added to this Bytes. +func NewBytesWithAvgLength(n int, avgElementLength int) *Bytes { return &Bytes{ // Given that the []byte slices are of variable length, we multiply the // number of elements by some constant factor. // TODO(asubiotto): Make this tunable. - data: make([]byte, 0, n*BytesInitialAllocationFactor), + data: make([]byte, 0, n*avgElementLength), offsets: make([]int32, n+1), } } diff --git a/pkg/col/coldata/vec.go b/pkg/col/coldata/vec.go index e239817e1be9..e8ca71bc2357 100644 --- a/pkg/col/coldata/vec.go +++ b/pkg/col/coldata/vec.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/col/typeconv" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/uuid" ) // Column is an interface that represents a raw array of a Go native type. @@ -162,6 +163,9 @@ func (cf *defaultColumnFactory) MakeColumn(t *types.T, length int) Column { case types.BoolFamily: return make(Bools, length) case types.BytesFamily: + if t.Family() == types.UuidFamily { + return NewBytesWithAvgLength(length, uuid.Size) + } return NewBytes(length) case types.IntFamily: switch t.Width() { diff --git a/pkg/sql/colmem/BUILD.bazel b/pkg/sql/colmem/BUILD.bazel index f6b113c82c2d..4e445e2ef6a2 100644 --- a/pkg/sql/colmem/BUILD.bazel +++ b/pkg/sql/colmem/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "//pkg/sql/types", "//pkg/util/duration", "//pkg/util/mon", + "//pkg/util/uuid", "@com_github_cockroachdb_apd_v2//:apd", "@com_github_cockroachdb_errors//:errors", ], diff --git a/pkg/sql/colmem/allocator.go b/pkg/sql/colmem/allocator.go index 1e9775539546..02d482b0dc6d 100644 --- a/pkg/sql/colmem/allocator.go +++ b/pkg/sql/colmem/allocator.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/mon" + "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" ) @@ -370,12 +371,19 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int { // (excluding any Bytes vectors, those are tracked separately). acc := 0 numBytesVectors := 0 + // We will track Uuid vectors separately because they use smaller initial + // allocation factor. + numUUIDVectors := 0 for _, t := range vecTypes { switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) { case types.BoolFamily: acc += sizeOfBool case types.BytesFamily: - numBytesVectors++ + if t.Family() == types.UuidFamily { + numUUIDVectors++ + } else { + numBytesVectors++ + } case types.IntFamily: switch t.Width() { case 16: @@ -416,15 +424,20 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int { colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", t)) } } - // For byte arrays, we initially allocate BytesInitialAllocationFactor - // number of bytes (plus an int32 for the offset) for each row, so we use - // the sum of two values as the estimate. However, later, the exact - // memory footprint will be used: whenever a modification of Bytes takes - // place, the Allocator will measure the old footprint and the updated - // one and will update the memory account accordingly. We also account for - // the overhead and for the additional offset value that are needed for - // Bytes vectors (to be in line with coldata.Bytes.Size() method). - bytesVectorsSize := numBytesVectors * (int(coldata.FlatBytesOverhead) + - coldata.BytesInitialAllocationFactor*batchLength + sizeOfInt32*(batchLength+1)) + // For byte arrays, we initially allocate a constant number of bytes (plus + // an int32 for the offset) for each row, so we use the sum of two values as + // the estimate. However, later, the exact memory footprint will be used: + // whenever a modification of Bytes takes place, the Allocator will measure + // the old footprint and the updated one and will update the memory account + // accordingly. We also account for the overhead and for the additional + // offset value that are needed for Bytes vectors (to be in line with + // coldata.Bytes.Size() method). + var bytesVectorsSize int + // Add the overhead. + bytesVectorsSize += (numBytesVectors + numUUIDVectors) * (int(coldata.FlatBytesOverhead)) + // Add the data for both Bytes and Uuids. + bytesVectorsSize += (numBytesVectors*coldata.BytesInitialAllocationFactor + numUUIDVectors*uuid.Size) * batchLength + // Add the offsets. + bytesVectorsSize += (numBytesVectors + numUUIDVectors) * sizeOfInt32 * (batchLength + 1) return acc*batchLength + bytesVectorsSize }