diff --git a/pkg/ccl/serverccl/statusccl/tenant_status_test.go b/pkg/ccl/serverccl/statusccl/tenant_status_test.go index 6c7409451487..37e3c90b0a09 100644 --- a/pkg/ccl/serverccl/statusccl/tenant_status_test.go +++ b/pkg/ccl/serverccl/statusccl/tenant_status_test.go @@ -60,7 +60,21 @@ func TestTenantStatusAPI(t *testing.T) { StoreDisableCoalesceAdjacent: true, } - testHelper := serverccl.NewTestTenantHelper(t, 3 /* tenantClusterSize */, knobs) + tenantLocalities := []roachpb.Locality{ + {Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-west1"}}}, + {Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-central1"}}}, + {Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-east1"}}}, + } + + testHelper := serverccl.NewTestTenantHelperWithTenantArgs( + t, + 3, /* tenantClusterSize */ + knobs, + func(tenantIdx int, tenantArgs *base.TestTenantArgs) { + tenantArgs.Locality = tenantLocalities[tenantIdx] + }, + ) + defer testHelper.Cleanup(ctx, t) t.Run("reset_sql_stats", func(t *testing.T) { @@ -114,6 +128,10 @@ func TestTenantStatusAPI(t *testing.T) { t.Run("tenant_logs", func(t *testing.T) { testTenantLogs(ctx, t, testHelper) }) + + t.Run("tenant_nodes", func(t *testing.T) { + testTenantNodes(ctx, t, testHelper, tenantLocalities) + }) } func testTenantLogs(ctx context.Context, t *testing.T, helper serverccl.TenantTestHelper) { @@ -1239,6 +1257,37 @@ func testTenantAuthOnStatements( require.NoError(t, err) } +func testTenantNodes( + ctx context.Context, + t *testing.T, + helper serverccl.TenantTestHelper, + tenantLocalities []roachpb.Locality, +) { + // TODO(todd): Get this test working over HTTP, #93267. + // + // When I first tried, I ran into errors about the SQL user + // `authentic_user_noadmin` already existing, but at first glance it seemed + // that the sync.Once mechanism used in its setup had already accounted + // for that scenario; that we're properly passing by reference all the way + // through; and that these tests aren't being run in parallel. + tenantA := helper.TestCluster().TenantStatusSrv(0) + resp, err := tenantA.Nodes(ctx, &serverpb.NodesRequest{}) + require.NoError(t, err) + + expected := map[roachpb.NodeID]roachpb.Locality{ + 1: tenantLocalities[0], + 2: tenantLocalities[1], + 3: tenantLocalities[2], + } + + actual := make(map[roachpb.NodeID]roachpb.Locality) + for _, node := range resp.Nodes { + actual[node.Desc.NodeID] = node.Desc.Locality + } + + require.Equal(t, expected, actual) +} + // assertStartKeyInRange compares the pretty printed startKey with the provided // tenantPrefix key, ensuring that the startKey starts with the tenantPrefix. func assertStartKeyInRange(t *testing.T, startKey string, tenantPrefix roachpb.Key) { diff --git a/pkg/ccl/serverccl/tenant_test_utils.go b/pkg/ccl/serverccl/tenant_test_utils.go index 39d01ba19ef1..d955e29fdf1d 100644 --- a/pkg/ccl/serverccl/tenant_test_utils.go +++ b/pkg/ccl/serverccl/tenant_test_utils.go @@ -93,17 +93,11 @@ type TestTenant interface { var _ TestTenant = &testTenant{} func newTestTenant( - t *testing.T, - server serverutils.TestServerInterface, - tenantID roachpb.TenantID, - knobs base.TestingKnobs, + t *testing.T, server serverutils.TestServerInterface, args base.TestTenantArgs, ) TestTenant { t.Helper() - tenantParams := tests.CreateTestTenantParams(tenantID) - tenantParams.TestingKnobs = knobs - - tenant, tenantConn := serverutils.StartTenant(t, server, tenantParams) + tenant, tenantConn := serverutils.StartTenant(t, server, args) sqlDB := sqlutils.MakeSQLRunner(tenantConn) status := tenant.StatusServer().(serverpb.SQLStatusServer) sqlStats := tenant.PGServer().(*pgwire.Server).SQLServer. @@ -151,6 +145,20 @@ func NewTestTenantHelper( ) TenantTestHelper { t.Helper() + return NewTestTenantHelperWithTenantArgs(t, tenantClusterSize, knobs, func(int, *base.TestTenantArgs) {}) +} + +// NewTestTenantHelperWithTenantArgs constructs a TenantTestHelper instance, +// offering the caller the opportunity to modify the arguments passed when +// starting each tenant. +func NewTestTenantHelperWithTenantArgs( + t *testing.T, + tenantClusterSize int, + knobs base.TestingKnobs, + customizeTenantArgs func(tenantIdx int, tenantArgs *base.TestTenantArgs), +) TenantTestHelper { + t.Helper() + params, _ := tests.CreateTestServerParams() params.Knobs = knobs // We're running tenant tests, no need for a default tenant. @@ -168,6 +176,7 @@ func NewTestTenantHelper( tenantClusterSize, security.EmbeddedTenantIDs()[0], knobs, + customizeTenantArgs, ), // Spin up a small tenant cluster under a different tenant ID to test // tenant isolation. @@ -177,6 +186,7 @@ func NewTestTenantHelper( 1, /* tenantClusterSize */ security.EmbeddedTenantIDs()[1], knobs, + func(int, *base.TestTenantArgs) {}, ), } } @@ -224,13 +234,16 @@ func newTenantClusterHelper( tenantClusterSize int, tenantID uint64, knobs base.TestingKnobs, + customizeTenantArgs func(tenantIdx int, tenantArgs *base.TestTenantArgs), ) TenantClusterHelper { t.Helper() var cluster tenantCluster = make([]TestTenant, tenantClusterSize) for i := 0; i < tenantClusterSize; i++ { - cluster[i] = - newTestTenant(t, server, roachpb.MustMakeTenantID(tenantID), knobs) + args := tests.CreateTestTenantParams(roachpb.MustMakeTenantID(tenantID)) + args.TestingKnobs = knobs + customizeTenantArgs(i, &args) + cluster[i] = newTestTenant(t, server, args) } return cluster diff --git a/pkg/server/serverpb/status.go b/pkg/server/serverpb/status.go index 1885f5040993..74a4c064b874 100644 --- a/pkg/server/serverpb/status.go +++ b/pkg/server/serverpb/status.go @@ -39,6 +39,7 @@ type SQLStatusServer interface { UserSQLRoles(context.Context, *UserSQLRolesRequest) (*UserSQLRolesResponse, error) TxnIDResolution(context.Context, *TxnIDResolutionRequest) (*TxnIDResolutionResponse, error) TransactionContentionEvents(context.Context, *TransactionContentionEventsRequest) (*TransactionContentionEventsResponse, error) + Nodes(context.Context, *NodesRequest) (*NodesResponse, error) NodesList(context.Context, *NodesListRequest) (*NodesListResponse, error) ListExecutionInsights(context.Context, *ListExecutionInsightsRequest) (*ListExecutionInsightsResponse, error) LogFilesList(context.Context, *LogFilesListRequest) (*LogFilesListResponse, error) diff --git a/pkg/server/status.go b/pkg/server/status.go index d7d951ef0153..d6c22e8bdb50 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -1491,6 +1491,44 @@ func (s *statusServer) NodesList( return s.serverIterator.nodesList(ctx) } +// Nodes returns a limited subset of the full NodesResponse. +// +// Its existence is a stop-gap measure to support running much of our UI code +// on multiregion tenant clusters. Longer-term, we will need to reconsider +// the overall architecture (e.g. store node ids in sqlstats, map node id -> +// region at UI display time) because sql instances are far more ephemeral +// than cluster nodes, and this Nodes endpoint is only able to answer for +// currently-live sql instances. What I think we want is to store locality +// information directly in the sqlstats tables, sourced from some new tracing +// in the distsql flow. +func (s *statusServer) Nodes( + ctx context.Context, req *serverpb.NodesRequest, +) (*serverpb.NodesResponse, error) { + ctx = propagateGatewayMetadata(ctx) + ctx = s.AnnotateCtx(ctx) + + if err := s.privilegeChecker.requireViewActivityOrViewActivityRedactedPermission(ctx); err != nil { + // NB: not using serverError() here since the priv checker + // already returns a proper gRPC error status. + return nil, err + } + + instances, err := s.sqlServer.sqlInstanceReader.GetAllInstances(ctx) + if err != nil { + return nil, err + } + var resp serverpb.NodesResponse + for _, instance := range instances { + resp.Nodes = append(resp.Nodes, statuspb.NodeStatus{ + Desc: roachpb.NodeDescriptor{ + NodeID: roachpb.NodeID(instance.InstanceID), + Locality: instance.Locality, + }, + }) + } + return &resp, err +} + // Nodes returns all node statuses. // // Do not use this method inside the server code! Use diff --git a/pkg/ui/workspaces/cluster-ui/src/util/proto.spec.ts b/pkg/ui/workspaces/cluster-ui/src/util/proto.spec.ts index c63d1f19ae92..a84385e3b54c 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/proto.spec.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/proto.spec.ts @@ -66,5 +66,10 @@ describe("Proto utils", () => { statusWithRolledMetrics.metrics, ); }); + + it("does not explode when node fields are missing", () => { + const emptyNodeStatus: Partial = {}; + assert.deepEqual(rollupStoreMetrics(emptyNodeStatus), {}); + }); }); }); diff --git a/pkg/ui/workspaces/cluster-ui/src/util/proto.ts b/pkg/ui/workspaces/cluster-ui/src/util/proto.ts index 6e6058191a91..b48dfb9209d4 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/proto.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/proto.ts @@ -21,6 +21,9 @@ export type StatusMetrics = typeof nodeStatus.metrics; * with metrics from `store_statuses.metrics` object. */ export function rollupStoreMetrics(ns: INodeStatus): StatusMetrics { + if (!ns.store_statuses) { + return ns.metrics || {}; + } return ns.store_statuses .map(ss => ss.metrics) .reduce((acc, i) => {