cockroachdb · craig · Apr 8, 2021 · Apr 7, 2021
@@ -19,6 +19,7 @@ go_library(
         "//pkg/sql/types",
         "//pkg/util",
         "//pkg/util/duration",
+        "//pkg/util/uuid",
         "@com_github_cockroachdb_apd_v2//:apd",
         "@com_github_cockroachdb_errors//:errors",
         "@com_github_stretchr_testify//require",

@@ -45,12 +45,24 @@ const BytesInitialAllocationFactor = 64
 // NewBytes returns a Bytes struct with enough capacity for n zero-length
 // []byte values. It is legal to call Set on the returned Bytes at this point,
 // but Get is undefined until at least one element is Set.
+// BytesInitialAllocationFactor number of bytes are allocated initially for each
+// []byte element.
 func NewBytes(n int) *Bytes {
+	return NewBytesWithAvgLength(n, BytesInitialAllocationFactor)
+}
+
+// NewBytesWithAvgLength returns a Bytes struct with enough capacity for n
+// []byte values with the average length of avgElementLength. It is legal to
+// call Set on the returned Bytes at this point, but Get is undefined until at
+// least one element is Set.
+// - avgElementLength determines the average length of a single []byte element
+// that will be added to this Bytes.
+func NewBytesWithAvgLength(n int, avgElementLength int) *Bytes {
 	return &Bytes{
 		// Given that the []byte slices are of variable length, we multiply the
 		// number of elements by some constant factor.
 		// TODO(asubiotto): Make this tunable.
-		data:    make([]byte, 0, n*BytesInitialAllocationFactor),
+		data:    make([]byte, 0, n*avgElementLength),
 		offsets: make([]int32, n+1),
 	}
 }

@@ -15,6 +15,7 @@ import (
 
 	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
+	"github.com/cockroachdb/cockroach/pkg/util/uuid"
 )
 
 // Column is an interface that represents a raw array of a Go native type.
@@ -162,6 +163,9 @@ func (cf *defaultColumnFactory) MakeColumn(t *types.T, length int) Column {
 	case types.BoolFamily:
 		return make(Bools, length)
 	case types.BytesFamily:
+		if t.Family() == types.UuidFamily {
+			return NewBytesWithAvgLength(length, uuid.Size)
+		}
 		return NewBytes(length)
 	case types.IntFamily:
 		switch t.Width() {

@@ -13,6 +13,7 @@ go_library(
         "//pkg/sql/types",
         "//pkg/util/duration",
         "//pkg/util/mon",
+        "//pkg/util/uuid",
         "@com_github_cockroachdb_apd_v2//:apd",
         "@com_github_cockroachdb_errors//:errors",
     ],

@@ -23,6 +23,7 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/types"
 	"github.com/cockroachdb/cockroach/pkg/util/duration"
 	"github.com/cockroachdb/cockroach/pkg/util/mon"
+	"github.com/cockroachdb/cockroach/pkg/util/uuid"
 	"github.com/cockroachdb/errors"
 )
 
@@ -370,12 +371,19 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int {
 	// (excluding any Bytes vectors, those are tracked separately).
 	acc := 0
 	numBytesVectors := 0
+	// We will track Uuid vectors separately because they use smaller initial
+	// allocation factor.
+	numUUIDVectors := 0
 	for _, t := range vecTypes {
 		switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) {
 		case types.BoolFamily:
 			acc += sizeOfBool
 		case types.BytesFamily:
-			numBytesVectors++
+			if t.Family() == types.UuidFamily {
+				numUUIDVectors++
+			} else {
+				numBytesVectors++
+			}
 		case types.IntFamily:
 			switch t.Width() {
 			case 16:
@@ -416,15 +424,20 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int {
 			colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", t))
 		}
 	}
-	// For byte arrays, we initially allocate BytesInitialAllocationFactor
-	// number of bytes (plus an int32 for the offset) for each row, so we use
-	// the sum of two values as the estimate. However, later, the exact
-	// memory footprint will be used: whenever a modification of Bytes takes
-	// place, the Allocator will measure the old footprint and the updated
-	// one and will update the memory account accordingly. We also account for
-	// the overhead and for the additional offset value that are needed for
-	// Bytes vectors (to be in line with coldata.Bytes.Size() method).
-	bytesVectorsSize := numBytesVectors * (int(coldata.FlatBytesOverhead) +
-		coldata.BytesInitialAllocationFactor*batchLength + sizeOfInt32*(batchLength+1))
+	// For byte arrays, we initially allocate a constant number of bytes (plus
+	// an int32 for the offset) for each row, so we use the sum of two values as
+	// the estimate. However, later, the exact memory footprint will be used:
+	// whenever a modification of Bytes takes place, the Allocator will measure
+	// the old footprint and the updated one and will update the memory account
+	// accordingly. We also account for the overhead and for the additional
+	// offset value that are needed for Bytes vectors (to be in line with
+	// coldata.Bytes.Size() method).
+	var bytesVectorsSize int
+	// Add the overhead.
+	bytesVectorsSize += (numBytesVectors + numUUIDVectors) * (int(coldata.FlatBytesOverhead))
+	// Add the data for both Bytes and Uuids.
+	bytesVectorsSize += (numBytesVectors*coldata.BytesInitialAllocationFactor + numUUIDVectors*uuid.Size) * batchLength
+	// Add the offsets.
+	bytesVectorsSize += (numBytesVectors + numUUIDVectors) * sizeOfInt32 * (batchLength + 1)
 	return acc*batchLength + bytesVectorsSize
 }