diff --git a/pkg/server/config.go b/pkg/server/config.go index c38172c19d93..7d9f7d7a2217 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -517,6 +517,10 @@ type SQLConfig struct { // LocalKVServerInfo is set in configs for shared-process tenants. It contains // info for making Batch requests to the local KV server without using gRPC. LocalKVServerInfo *LocalKVServerInfo + + // NodeMetricsRecorder is the node's MetricRecorder; the tenant's metrics will + // be recorded with it. Nil if this is not a shared-process tenant. + NodeMetricsRecorder *status.MetricsRecorder } // LocalKVServerInfo is used to group information about the local KV server diff --git a/pkg/server/server_controller_new_server.go b/pkg/server/server_controller_new_server.go index a21b329120d4..b57a4e37e85b 100644 --- a/pkg/server/server_controller_new_server.go +++ b/pkg/server/server_controller_new_server.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security/clientsecopts" "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/server/status" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/isql" @@ -145,7 +146,7 @@ func (s *Server) startTenantServerInternal( log.Infof(startCtx, "starting tenant server") // Now start the tenant proper. - tenantServer, err := NewSharedProcessTenantServer(startCtx, stopper, baseCfg, sqlCfg, s.recorder, tenantNameContainer) + tenantServer, err := NewSharedProcessTenantServer(startCtx, stopper, baseCfg, sqlCfg, tenantNameContainer) if err != nil { return nil, err } @@ -172,7 +173,7 @@ func (s *Server) makeSharedProcessTenantConfig( InternalServer: s.node, ServerInterceptors: s.grpc.serverInterceptorsInfo, } - baseCfg, sqlCfg, err := makeSharedProcessTenantServerConfig(ctx, tenantID, index, parentCfg, localServerInfo, stopper) + baseCfg, sqlCfg, err := makeSharedProcessTenantServerConfig(ctx, tenantID, index, parentCfg, localServerInfo, stopper, s.recorder) if err != nil { return BaseConfig{}, SQLConfig{}, err } @@ -188,6 +189,7 @@ func makeSharedProcessTenantServerConfig( kvServerCfg Config, kvServerInfo LocalKVServerInfo, stopper *stop.Stopper, + nodeMetricsRecorder *status.MetricsRecorder, ) (baseCfg BaseConfig, sqlCfg SQLConfig, err error) { st := cluster.MakeClusterSettings() @@ -356,6 +358,8 @@ func makeSharedProcessTenantServerConfig( // reach this KV layer without going through the network. sqlCfg.LocalKVServerInfo = &kvServerInfo + sqlCfg.NodeMetricsRecorder = nodeMetricsRecorder + return baseCfg, sqlCfg, nil } diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index f110e9b79feb..751bdf095400 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -166,7 +166,7 @@ func NewSeparateProcessTenantServer( tenantNameContainer *roachpb.TenantNameContainer, ) (*SQLServerWrapper, error) { instanceIDContainer := baseCfg.IDContainer.SwitchToSQLIDContainerForStandaloneSQLInstance() - return newTenantServer(ctx, stopper, baseCfg, sqlCfg, nil /* parentRecorder */, tenantNameContainer, instanceIDContainer) + return newTenantServer(ctx, stopper, baseCfg, sqlCfg, tenantNameContainer, instanceIDContainer) } // NewSharedProcessTenantServer creates a tenant-specific, SQL-only @@ -180,14 +180,13 @@ func NewSharedProcessTenantServer( stopper *stop.Stopper, baseCfg BaseConfig, sqlCfg SQLConfig, - parentRecorder *status.MetricsRecorder, tenantNameContainer *roachpb.TenantNameContainer, ) (*SQLServerWrapper, error) { if baseCfg.IDContainer.Get() == 0 { return nil, errors.AssertionFailedf("programming error: NewSharedProcessTenantServer called before NodeID was assigned.") } instanceIDContainer := base.NewSQLIDContainerForNode(baseCfg.IDContainer) - return newTenantServer(ctx, stopper, baseCfg, sqlCfg, parentRecorder, tenantNameContainer, instanceIDContainer) + return newTenantServer(ctx, stopper, baseCfg, sqlCfg, tenantNameContainer, instanceIDContainer) } // newTenantServer constructs a SQLServerWrapper. @@ -198,7 +197,6 @@ func newTenantServer( stopper *stop.Stopper, baseCfg BaseConfig, sqlCfg SQLConfig, - parentRecorder *status.MetricsRecorder, tenantNameContainer *roachpb.TenantNameContainer, instanceIDContainer *base.SQLIDContainer, ) (*SQLServerWrapper, error) { @@ -212,7 +210,7 @@ func newTenantServer( // Inform the server identity provider that we're operating // for a tenant server. baseCfg.idProvider.SetTenant(sqlCfg.TenantID) - args, err := makeTenantSQLServerArgs(ctx, stopper, baseCfg, sqlCfg, parentRecorder, tenantNameContainer, instanceIDContainer) + args, err := makeTenantSQLServerArgs(ctx, stopper, baseCfg, sqlCfg, tenantNameContainer, instanceIDContainer) if err != nil { return nil, err } @@ -618,6 +616,24 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error { s.sqlServer.cfg.HTTPAdvertiseAddr, s.sqlServer.cfg.SQLAdvertiseAddr, ) + // If there's a higher-level recorder, we link our metrics registry to it. + if s.sqlCfg.NodeMetricsRecorder != nil { + s.sqlCfg.NodeMetricsRecorder.AddTenantRegistry(s.sqlCfg.TenantID, s.registry) + // TODO(aadityasondhi): Unlink the tenant registry if the tenant is stopped. + // See #97889. + } else { + // Export statistics to graphite, if enabled by configuration. We only do + // this if there isn't a higher-level recorder; if there is, that one takes + // responsibility for exporting to Graphite. + var graphiteOnce sync.Once + graphiteEndpoint.SetOnChange(&s.ClusterSettings().SV, func(context.Context) { + if graphiteEndpoint.Get(&s.ClusterSettings().SV) != "" { + graphiteOnce.Do(func() { + startGraphiteStatsExporter(workersCtx, s.stopper, s.recorder, s.ClusterSettings()) + }) + } + }) + } if !s.sqlServer.cfg.DisableRuntimeStatsMonitor { // Begin recording runtime statistics. @@ -634,16 +650,6 @@ func (s *SQLServerWrapper) PreStart(ctx context.Context) error { } } - // Export statistics to graphite, if enabled by configuration. - var graphiteOnce sync.Once - graphiteEndpoint.SetOnChange(&s.ClusterSettings().SV, func(context.Context) { - if graphiteEndpoint.Get(&s.ClusterSettings().SV) != "" { - graphiteOnce.Do(func() { - startGraphiteStatsExporter(workersCtx, s.stopper, s.recorder, s.ClusterSettings()) - }) - } - }) - // After setting modeOperational, we can block until all stores are fully // initialized. s.grpc.setMode(modeOperational) @@ -886,7 +892,6 @@ func makeTenantSQLServerArgs( stopper *stop.Stopper, baseCfg BaseConfig, sqlCfg SQLConfig, - parentRecorder *status.MetricsRecorder, tenantNameContainer *roachpb.TenantNameContainer, instanceIDContainer *base.SQLIDContainer, ) (sqlServerArgs, error) { @@ -1070,13 +1075,6 @@ func makeTenantSQLServerArgs( recorder := status.NewMetricsRecorder( sqlCfg.TenantID, tenantNameContainer, nil /* nodeLiveness */, nil, /* remoteClocks */ clock.WallClock(), st) - // Note: If the tenant is in-process, we attach this tenant's metric - // registry to the parentRecorder held by the system tenant. This - // ensures that generated Prometheus metrics from the system tenant - // include metrics from this in-process tenant. - if parentRecorder != nil { - parentRecorder.AddTenantRegistry(sqlCfg.TenantID, registry) - } var runtime *status.RuntimeStatSampler if baseCfg.RuntimeStatSampler != nil {