Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coldata: do not allocate wasteful memory for UUIDs #63231

Merged
merged 1 commit into from
Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/col/coldata/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ go_library(
"//pkg/sql/types",
"//pkg/util",
"//pkg/util/duration",
"//pkg/util/uuid",
"@com_github_cockroachdb_apd_v2//:apd",
"@com_github_cockroachdb_errors//:errors",
"@com_github_stretchr_testify//require",
Expand Down
14 changes: 13 additions & 1 deletion pkg/col/coldata/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,24 @@ const BytesInitialAllocationFactor = 64
// NewBytes returns a Bytes struct with enough capacity for n zero-length
// []byte values. It is legal to call Set on the returned Bytes at this point,
// but Get is undefined until at least one element is Set.
// BytesInitialAllocationFactor number of bytes are allocated initially for each
// []byte element.
func NewBytes(n int) *Bytes {
return NewBytesWithAvgLength(n, BytesInitialAllocationFactor)
}

// NewBytesWithAvgLength returns a Bytes struct with enough capacity for n
// []byte values with the average length of avgElementLength. It is legal to
// call Set on the returned Bytes at this point, but Get is undefined until at
// least one element is Set.
// - avgElementLength determines the average length of a single []byte element
// that will be added to this Bytes.
func NewBytesWithAvgLength(n int, avgElementLength int) *Bytes {
return &Bytes{
// Given that the []byte slices are of variable length, we multiply the
// number of elements by some constant factor.
// TODO(asubiotto): Make this tunable.
data: make([]byte, 0, n*BytesInitialAllocationFactor),
data: make([]byte, 0, n*avgElementLength),
offsets: make([]int32, n+1),
}
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/col/coldata/vec.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/col/typeconv"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)

// Column is an interface that represents a raw array of a Go native type.
Expand Down Expand Up @@ -162,6 +163,9 @@ func (cf *defaultColumnFactory) MakeColumn(t *types.T, length int) Column {
case types.BoolFamily:
return make(Bools, length)
case types.BytesFamily:
if t.Family() == types.UuidFamily {
return NewBytesWithAvgLength(length, uuid.Size)
}
return NewBytes(length)
case types.IntFamily:
switch t.Width() {
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/colmem/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ go_library(
"//pkg/sql/types",
"//pkg/util/duration",
"//pkg/util/mon",
"//pkg/util/uuid",
"@com_github_cockroachdb_apd_v2//:apd",
"@com_github_cockroachdb_errors//:errors",
],
Expand Down
35 changes: 24 additions & 11 deletions pkg/sql/colmem/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -370,12 +371,19 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int {
// (excluding any Bytes vectors, those are tracked separately).
acc := 0
numBytesVectors := 0
// We will track Uuid vectors separately because they use smaller initial
// allocation factor.
numUUIDVectors := 0
for _, t := range vecTypes {
switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) {
case types.BoolFamily:
acc += sizeOfBool
case types.BytesFamily:
numBytesVectors++
if t.Family() == types.UuidFamily {
numUUIDVectors++
} else {
numBytesVectors++
}
case types.IntFamily:
switch t.Width() {
case 16:
Expand Down Expand Up @@ -416,15 +424,20 @@ func EstimateBatchSizeBytes(vecTypes []*types.T, batchLength int) int {
colexecerror.InternalError(errors.AssertionFailedf("unhandled type %s", t))
}
}
// For byte arrays, we initially allocate BytesInitialAllocationFactor
// number of bytes (plus an int32 for the offset) for each row, so we use
// the sum of two values as the estimate. However, later, the exact
// memory footprint will be used: whenever a modification of Bytes takes
// place, the Allocator will measure the old footprint and the updated
// one and will update the memory account accordingly. We also account for
// the overhead and for the additional offset value that are needed for
// Bytes vectors (to be in line with coldata.Bytes.Size() method).
bytesVectorsSize := numBytesVectors * (int(coldata.FlatBytesOverhead) +
coldata.BytesInitialAllocationFactor*batchLength + sizeOfInt32*(batchLength+1))
// For byte arrays, we initially allocate a constant number of bytes (plus
// an int32 for the offset) for each row, so we use the sum of two values as
// the estimate. However, later, the exact memory footprint will be used:
// whenever a modification of Bytes takes place, the Allocator will measure
// the old footprint and the updated one and will update the memory account
// accordingly. We also account for the overhead and for the additional
// offset value that are needed for Bytes vectors (to be in line with
// coldata.Bytes.Size() method).
var bytesVectorsSize int
// Add the overhead.
bytesVectorsSize += (numBytesVectors + numUUIDVectors) * (int(coldata.FlatBytesOverhead))
// Add the data for both Bytes and Uuids.
bytesVectorsSize += (numBytesVectors*coldata.BytesInitialAllocationFactor + numUUIDVectors*uuid.Size) * batchLength
// Add the offsets.
bytesVectorsSize += (numBytesVectors + numUUIDVectors) * sizeOfInt32 * (batchLength + 1)
return acc*batchLength + bytesVectorsSize
}