Merge #72841 #72961 #72962

72841: sql: cleanup r=andreimatei a=andreimatei This code was unnecessarily constructing a struct. Release note: None 72961: col/coldata: batch allocate memColumn objects r=nvanbenschoten a=nvanbenschoten This commit updates `NewMemBatchWithCapacity` to batch allocate all necessary `memColumn` objects in a single chunk. Outside of #72798, this was the single largest source of heap allocations by object count (`alloc_objects`) in TPC-E. With #72798 applied, calls to `NewMemColumn` were responsible for **3.18%** of total heap allocations in the workload: ``` ----------------------------------------------------------+------------- flat flat% sum% cum cum% calls calls% + context ----------------------------------------------------------+------------- 355202097 97.92% | github.com/cockroachdb/cockroach/pkg/col/coldata.NewMemBatchWithCapacity /go/src/github.com/cockroachdb/cockroach/pkg/col/coldata/batch.go:123 7561654 2.08% | github.com/cockroachdb/cockroach/pkg/sql/colmem.(*Allocator).NewMemColumn /go/src/github.com/cockroachdb/cockroach/pkg/sql/colmem/allocator.go:217 362763751 3.18% 3.18% 362763751 3.18% | github.com/cockroachdb/cockroach/pkg/col/coldata.NewMemColumn /go/src/github.com/cockroachdb/cockroach/pkg/col/coldata/vec.go:202 ----------------------------------------------------------+------------- ``` Lower in the heap profile, we see that each of the other heap allocations that are performed once per call to `NewMemBatchWithCapacity` were observed 39625411 times. So we can estimate that the average batch contains `362763751 / 39625411 = 9.15` columns. This lines up with the improvement we see due to this change. Heap allocations due to `memColumn` objects drop by about a factor of 9, down to **0.33%** of all heap allocations: ``` ----------------------------------------------------------+------------- flat flat% sum% cum cum% calls calls% + context ----------------------------------------------------------+------------- 12082615 100% | github.com/cockroachdb/cockroach/pkg/sql/colmem.(*Allocator).NewMemBatchWithFixedCapacity /go/src/github.com/cockroachdb/cockroach/pkg/sql/colmem/allocator.go:131 12082615 0.33% 38.80% 12082615 0.33% | github.com/cockroachdb/cockroach/pkg/col/coldata.NewMemBatchWithCapacity /go/src/github.com/cockroachdb/cockroach/pkg/col/coldata/batch.go:122 ----------------------------------------------------------+------------- ``` Despite this change, we will still probably want to address Jordan's TODO a few lines earlier about pooling all allocations it this level: https://github.com/cockroachdb/cockroach/blob/28bb1ea049da5bfb6e15a7003cd7b678cbc4b67f/pkg/col/coldata/batch.go#L113 72962: sql/catalog/descs: implement interfaces on *DistSQLTypeResolver r=nvanbenschoten a=nvanbenschoten This commit updates `DistSQLTypeResolver` to implement all interfaces on a pointer receiver instead of a value receiver. Implementing interfaces on values is rarely the right choice, as it forces a heap allocation whenever the object is boxed into an interface, instead of just forcing the pointer onto the heap once (on its own or as part of a larger object) and then storing the pointer in the interface header. Before this commit, the use of value receivers was causing `HydrateTypeSlice` to allocate. Outside of #72798 and #72961, this was the single largest source of heap allocations in TPC-E. With those two PRs applied, `HydrateTypeSlice` was accounting for **2.30%** of total heap allocations in the workload: ``` ----------------------------------------------------------+------------- flat flat% sum% cum cum% calls calls% + context ----------------------------------------------------------+------------- 27722149 32.66% | github.com/cockroachdb/cockroach/pkg/sql/execinfra.(*ProcessorBase).InitWithEvalCtx /go/src/github.com/cockroachdb/cockroach/pkg/sql/execinfra/processorsbase.go:790 27460002 32.36% | github.com/cockroachdb/cockroach/pkg/sql/colflow.(*vectorizedFlowCreator).setupFlow.func1 /go/src/github.com/cockroachdb/cockroach/pkg/sql/colflow/vectorized_flow.go:1097 21266755 25.06% | github.com/cockroachdb/cockroach/pkg/sql/colflow.(*vectorizedFlowCreator).setupInput /go/src/github.com/cockroachdb/cockroach/pkg/sql/colflow/vectorized_flow.go:818 8421503 9.92% | github.com/cockroachdb/cockroach/pkg/sql/colfetcher.populateTableArgs /go/src/github.com/cockroachdb/cockroach/pkg/sql/colfetcher/cfetcher_setup.go:174 84870409 2.30% 2.30% 84870409 2.30% | github.com/cockroachdb/cockroach/pkg/sql/catalog/descs.DistSQLTypeResolver.HydrateTypeSlice /go/src/github.com/cockroachdb/cockroach/pkg/sql/catalog/descs/dist_sql_type_resolver.go:134 ----------------------------------------------------------+------------- ``` With this change, the heap allocation in `HydrateTypeSlice` disappears. With these three PRs combined, the largest source of heap allocations in the workload is `context.WithValue`, like the Go gods intended. ``` ----------------------------------------------------------+------------- flat flat% sum% cum cum% calls calls% + context ----------------------------------------------------------+------------- 16723340 38.17% | github.com/cockroachdb/logtags.WithTags /go/src/github.com/cockroachdb/cockroach/vendor/github.com/cockroachdb/logtags/context.go:34 7405899 16.90% | google.golang.org/grpc/peer.NewContext /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/peer/peer.go:44 3910493 8.93% | google.golang.org/grpc.NewContextWithServerTransportStream /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/server.go:1672 3702950 8.45% | github.com/cockroachdb/cockroach/pkg/util/tracing.maybeWrapCtx /go/src/github.com/cockroachdb/cockroach/pkg/util/tracing/context.go:80 3560952 8.13% | google.golang.org/grpc/metadata.NewIncomingContext /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/metadata/metadata.go:152 3342479 7.63% | google.golang.org/grpc.newContextWithRPCInfo /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/rpc_util.go:791 2938326 6.71% | google.golang.org/grpc/internal/credentials.NewRequestInfoContext /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/internal/credentials/credentials.go:29 1387235 3.17% | github.com/cockroachdb/cockroach/pkg/util/grpcutil.NewLocalRequestContext /go/src/github.com/cockroachdb/cockroach/pkg/util/grpcutil/grpc_util.go:39 655388 1.50% | github.com/cockroachdb/cockroach/pkg/sql.withStatement /go/src/github.com/cockroachdb/cockroach/pkg/sql/conn_executor.go:3197 185693 0.42% | google.golang.org/grpc/metadata.NewOutgoingContext /go/src/github.com/cockroachdb/cockroach/vendor/google.golang.org/grpc/metadata/metadata.go:159 43812755 2.20% 2.20% 43812755 2.20% | context.WithValue /usr/local/go/src/context/context.go:533 ----------------------------------------------------------+------------- ``` Co-authored-by: Andrei Matei <[email protected]> Co-authored-by: Nathan VanBenschoten <[email protected]>
cockroachdb · Nov 19, 2021 · e131a82 · e131a82
4 parents 8a501a2 + 785b28e + 019212f + 1b59a8c
commit e131a82
Show file tree

Hide file tree

Showing 7 changed files with 30 additions and 17 deletions.
diff --git a/pkg/col/coldata/batch.go b/pkg/col/coldata/batch.go
@@ -119,9 +119,12 @@ func NewMemBatch(typs []*types.T, factory ColumnFactory) Batch {
 // column size. Use for operators that have a precisely-sized output batch.
 func NewMemBatchWithCapacity(typs []*types.T, capacity int, factory ColumnFactory) Batch {
 	b := NewMemBatchNoCols(typs, capacity).(*MemBatch)
+	cols := make([]memColumn, len(typs))
 	for i, t := range typs {
-		b.b[i] = NewMemColumn(t, capacity, factory)
-		if b.b[i].IsBytesLike() {
+		col := &cols[i]
+		col.init(t, capacity, factory)
+		b.b[i] = col
+		if col.IsBytesLike() {
 			b.bytesVecIdxs.Add(i)
 		}
 	}

diff --git a/pkg/col/coldata/vec.go b/pkg/col/coldata/vec.go
@@ -199,7 +199,14 @@ func (cf *defaultColumnFactory) MakeColumn(t *types.T, length int) Column {
 // NewMemColumn returns a new memColumn, initialized with a length using the
 // given column factory.
 func NewMemColumn(t *types.T, length int, factory ColumnFactory) Vec {
-	return &memColumn{
+	var m memColumn
+	m.init(t, length, factory)
+	return &m
+}
+
+// init initializes the receiver with a length using the given column factory.
+func (m *memColumn) init(t *types.T, length int, factory ColumnFactory) {
+	*m = memColumn{
 		t:                   t,
 		canonicalTypeFamily: typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()),
 		col:                 factory.MakeColumn(t, length),

diff --git a/pkg/sql/backfill.go b/pkg/sql/backfill.go
@@ -701,9 +701,10 @@ func (sc *SchemaChanger) validateConstraints(
 				// print the check expression back to the user.
 				evalCtx.Txn = txn
 				// Use the DistSQLTypeResolver because we need to resolve types by ID.
-				semaCtx := tree.MakeSemaContext()
 				collection := evalCtx.Descs
-				semaCtx.TypeResolver = descs.NewDistSQLTypeResolver(collection, txn)
+				resolver := descs.NewDistSQLTypeResolver(collection, txn)
+				semaCtx := tree.MakeSemaContext()
+				semaCtx.TypeResolver = &resolver
 				// TODO (rohany): When to release this? As of now this is only going to get released
 				//  after the check is validated.
 				defer func() { collection.ReleaseAll(ctx) }()

diff --git a/pkg/sql/backfill/backfill.go b/pkg/sql/backfill/backfill.go
@@ -201,12 +201,12 @@ func (cb *ColumnBackfiller) InitForDistributedUse(
 	if err := flowCtx.Cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
 		resolver := flowCtx.TypeResolverFactory.NewTypeResolver(txn)
 		// Hydrate all the types present in the table.
-		if err := typedesc.HydrateTypesInTableDescriptor(ctx, desc.TableDesc(), resolver); err != nil {
+		if err := typedesc.HydrateTypesInTableDescriptor(ctx, desc.TableDesc(), &resolver); err != nil {
 			return err
 		}
 		// Set up a SemaContext to type check the default and computed expressions.
 		semaCtx := tree.MakeSemaContext()
-		semaCtx.TypeResolver = resolver
+		semaCtx.TypeResolver = &resolver
 		var err error
 		defaultExprs, err = schemaexpr.MakeDefaultExprs(
 			ctx, cb.added, &transform.ExprTransformContext{}, evalCtx, &semaCtx,
@@ -600,13 +600,13 @@ func (ib *IndexBackfiller) InitForDistributedUse(
 		resolver := flowCtx.TypeResolverFactory.NewTypeResolver(txn)
 		// Hydrate all the types present in the table.
 		if err = typedesc.HydrateTypesInTableDescriptor(
-			ctx, desc.TableDesc(), resolver,
+			ctx, desc.TableDesc(), &resolver,
 		); err != nil {
 			return err
 		}
 		// Set up a SemaContext to type check the default and computed expressions.
 		semaCtx := tree.MakeSemaContext()
-		semaCtx.TypeResolver = resolver
+		semaCtx.TypeResolver = &resolver
 		// Convert any partial index predicate strings into expressions.
 		predicates, colExprs, referencedColumns, err = constructExprs(
 			ctx, desc, ib.added, ib.cols, ib.addedCols, ib.computedCols, evalCtx, &semaCtx,

diff --git a/pkg/sql/catalog/descs/dist_sql_type_resolver.go b/pkg/sql/catalog/descs/dist_sql_type_resolver.go
@@ -49,8 +49,9 @@ func (df *DistSQLTypeResolverFactory) NewTypeResolver(txn *kv.Txn) DistSQLTypeRe
 // NewSemaContext creates a new SemaContext with a TypeResolver bound to the
 // input transaction.
 func (df *DistSQLTypeResolverFactory) NewSemaContext(txn *kv.Txn) *tree.SemaContext {
+	resolver := df.NewTypeResolver(txn)
 	semaCtx := tree.MakeSemaContext()
-	semaCtx.TypeResolver = df.NewTypeResolver(txn)
+	semaCtx.TypeResolver = &resolver
 	return &semaCtx
 }
 
@@ -70,14 +71,16 @@ func NewDistSQLTypeResolver(descs *Collection, txn *kv.Txn) DistSQLTypeResolver
 }
 
 // ResolveType implements the tree.TypeReferenceResolver interface.
-func (dt DistSQLTypeResolver) ResolveType(
+func (dt *DistSQLTypeResolver) ResolveType(
 	context.Context, *tree.UnresolvedObjectName,
 ) (*types.T, error) {
 	return nil, errors.AssertionFailedf("cannot resolve types in DistSQL by name")
 }
 
 // ResolveTypeByOID implements the tree.TypeReferenceResolver interface.
-func (dt DistSQLTypeResolver) ResolveTypeByOID(ctx context.Context, oid oid.Oid) (*types.T, error) {
+func (dt *DistSQLTypeResolver) ResolveTypeByOID(
+	ctx context.Context, oid oid.Oid,
+) (*types.T, error) {
 	id, err := typedesc.UserDefinedTypeOIDToID(oid)
 	if err != nil {
 		return nil, err
@@ -90,7 +93,7 @@ func (dt DistSQLTypeResolver) ResolveTypeByOID(ctx context.Context, oid oid.Oid)
 }
 
 // GetTypeDescriptor implements the sqlbase.TypeDescriptorResolver interface.
-func (dt DistSQLTypeResolver) GetTypeDescriptor(
+func (dt *DistSQLTypeResolver) GetTypeDescriptor(
 	ctx context.Context, id descpb.ID,
 ) (tree.TypeName, catalog.TypeDescriptor, error) {
 	flags := tree.CommonLookupFlags{
@@ -129,7 +132,7 @@ func (dt DistSQLTypeResolver) GetTypeDescriptor(
 }
 
 // HydrateTypeSlice installs metadata into a slice of types.T's.
-func (dt DistSQLTypeResolver) HydrateTypeSlice(ctx context.Context, typs []*types.T) error {
+func (dt *DistSQLTypeResolver) HydrateTypeSlice(ctx context.Context, typs []*types.T) error {
 	for _, t := range typs {
 		if err := typedesc.EnsureTypeIsHydrated(ctx, t, dt); err != nil {
 			return err

diff --git a/pkg/sql/execinfra/processorsbase.go b/pkg/sql/execinfra/processorsbase.go
@@ -791,7 +791,7 @@ func (pb *ProcessorBase) InitWithEvalCtx(
 		return err
 	}
 	pb.SemaCtx = tree.MakeSemaContext()
-	pb.SemaCtx.TypeResolver = resolver
+	pb.SemaCtx.TypeResolver = &resolver
 
 	return pb.OutputHelper.Init(post, coreOutputTypes, &pb.SemaCtx, pb.EvalCtx)
 }

diff --git a/pkg/sql/explain_plan.go b/pkg/sql/explain_plan.go
@@ -90,9 +90,8 @@ func (e *explainPlanNode) startExec(params runParams) error {
 			distSQLPlanner.finalizePlanWithRowCount(planCtx, physicalPlan, plan.mainRowCount)
 			ob.AddDistribution(physicalPlan.Distribution.String())
 			flows := physicalPlan.GenerateFlowSpecs()
-			flowCtx := newFlowCtxForExplainPurposes(planCtx, params.p)
 
-			ctxSessionData := flowCtx.EvalCtx.SessionData()
+			ctxSessionData := planCtx.EvalContext().SessionData()
 			var willVectorize bool
 			if ctxSessionData.VectorizeMode == sessiondatapb.VectorizeOff {
 				willVectorize = false