Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statusccl: temporarily serve /_status/nodes to tenants #93268

Merged
merged 1 commit into from
Dec 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,21 @@ func TestTenantStatusAPI(t *testing.T) {
StoreDisableCoalesceAdjacent: true,
}

testHelper := serverccl.NewTestTenantHelper(t, 3 /* tenantClusterSize */, knobs)
tenantLocalities := []roachpb.Locality{
{Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-west1"}}},
{Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-central1"}}},
{Tiers: []roachpb.Tier{{Key: "region", Value: "gcp-us-east1"}}},
}

testHelper := serverccl.NewTestTenantHelperWithTenantArgs(
t,
3, /* tenantClusterSize */
knobs,
func(tenantIdx int, tenantArgs *base.TestTenantArgs) {
tenantArgs.Locality = tenantLocalities[tenantIdx]
},
)

defer testHelper.Cleanup(ctx, t)

t.Run("reset_sql_stats", func(t *testing.T) {
Expand Down Expand Up @@ -114,6 +128,10 @@ func TestTenantStatusAPI(t *testing.T) {
t.Run("tenant_logs", func(t *testing.T) {
testTenantLogs(ctx, t, testHelper)
})

t.Run("tenant_nodes", func(t *testing.T) {
testTenantNodes(ctx, t, testHelper, tenantLocalities)
})
}

func testTenantLogs(ctx context.Context, t *testing.T, helper serverccl.TenantTestHelper) {
Expand Down Expand Up @@ -1239,6 +1257,37 @@ func testTenantAuthOnStatements(
require.NoError(t, err)
}

func testTenantNodes(
ctx context.Context,
t *testing.T,
helper serverccl.TenantTestHelper,
tenantLocalities []roachpb.Locality,
) {
// TODO(todd): Get this test working over HTTP, #93267.
//
// When I first tried, I ran into errors about the SQL user
// `authentic_user_noadmin` already existing, but at first glance it seemed
// that the sync.Once mechanism used in its setup had already accounted
// for that scenario; that we're properly passing by reference all the way
// through; and that these tests aren't being run in parallel.
tenantA := helper.TestCluster().TenantStatusSrv(0)
resp, err := tenantA.Nodes(ctx, &serverpb.NodesRequest{})
require.NoError(t, err)

expected := map[roachpb.NodeID]roachpb.Locality{
1: tenantLocalities[0],
2: tenantLocalities[1],
3: tenantLocalities[2],
}

actual := make(map[roachpb.NodeID]roachpb.Locality)
for _, node := range resp.Nodes {
actual[node.Desc.NodeID] = node.Desc.Locality
}

require.Equal(t, expected, actual)
}

// assertStartKeyInRange compares the pretty printed startKey with the provided
// tenantPrefix key, ensuring that the startKey starts with the tenantPrefix.
func assertStartKeyInRange(t *testing.T, startKey string, tenantPrefix roachpb.Key) {
Expand Down
33 changes: 23 additions & 10 deletions pkg/ccl/serverccl/tenant_test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,11 @@ type TestTenant interface {
var _ TestTenant = &testTenant{}

func newTestTenant(
t *testing.T,
server serverutils.TestServerInterface,
tenantID roachpb.TenantID,
knobs base.TestingKnobs,
t *testing.T, server serverutils.TestServerInterface, args base.TestTenantArgs,
) TestTenant {
t.Helper()

tenantParams := tests.CreateTestTenantParams(tenantID)
tenantParams.TestingKnobs = knobs

tenant, tenantConn := serverutils.StartTenant(t, server, tenantParams)
tenant, tenantConn := serverutils.StartTenant(t, server, args)
sqlDB := sqlutils.MakeSQLRunner(tenantConn)
status := tenant.StatusServer().(serverpb.SQLStatusServer)
sqlStats := tenant.PGServer().(*pgwire.Server).SQLServer.
Expand Down Expand Up @@ -151,6 +145,20 @@ func NewTestTenantHelper(
) TenantTestHelper {
t.Helper()

return NewTestTenantHelperWithTenantArgs(t, tenantClusterSize, knobs, func(int, *base.TestTenantArgs) {})
}

// NewTestTenantHelperWithTenantArgs constructs a TenantTestHelper instance,
// offering the caller the opportunity to modify the arguments passed when
// starting each tenant.
func NewTestTenantHelperWithTenantArgs(
t *testing.T,
tenantClusterSize int,
knobs base.TestingKnobs,
customizeTenantArgs func(tenantIdx int, tenantArgs *base.TestTenantArgs),
) TenantTestHelper {
t.Helper()

params, _ := tests.CreateTestServerParams()
params.Knobs = knobs
// We're running tenant tests, no need for a default tenant.
Expand All @@ -168,6 +176,7 @@ func NewTestTenantHelper(
tenantClusterSize,
security.EmbeddedTenantIDs()[0],
knobs,
customizeTenantArgs,
),
// Spin up a small tenant cluster under a different tenant ID to test
// tenant isolation.
Expand All @@ -177,6 +186,7 @@ func NewTestTenantHelper(
1, /* tenantClusterSize */
security.EmbeddedTenantIDs()[1],
knobs,
func(int, *base.TestTenantArgs) {},
),
}
}
Expand Down Expand Up @@ -224,13 +234,16 @@ func newTenantClusterHelper(
tenantClusterSize int,
tenantID uint64,
knobs base.TestingKnobs,
customizeTenantArgs func(tenantIdx int, tenantArgs *base.TestTenantArgs),
) TenantClusterHelper {
t.Helper()

var cluster tenantCluster = make([]TestTenant, tenantClusterSize)
for i := 0; i < tenantClusterSize; i++ {
cluster[i] =
newTestTenant(t, server, roachpb.MustMakeTenantID(tenantID), knobs)
args := tests.CreateTestTenantParams(roachpb.MustMakeTenantID(tenantID))
args.TestingKnobs = knobs
customizeTenantArgs(i, &args)
cluster[i] = newTestTenant(t, server, args)
}

return cluster
Expand Down
1 change: 1 addition & 0 deletions pkg/server/serverpb/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type SQLStatusServer interface {
UserSQLRoles(context.Context, *UserSQLRolesRequest) (*UserSQLRolesResponse, error)
TxnIDResolution(context.Context, *TxnIDResolutionRequest) (*TxnIDResolutionResponse, error)
TransactionContentionEvents(context.Context, *TransactionContentionEventsRequest) (*TransactionContentionEventsResponse, error)
Nodes(context.Context, *NodesRequest) (*NodesResponse, error)
NodesList(context.Context, *NodesListRequest) (*NodesListResponse, error)
ListExecutionInsights(context.Context, *ListExecutionInsightsRequest) (*ListExecutionInsightsResponse, error)
LogFilesList(context.Context, *LogFilesListRequest) (*LogFilesListResponse, error)
Expand Down
38 changes: 38 additions & 0 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,44 @@ func (s *statusServer) NodesList(
return s.serverIterator.nodesList(ctx)
}

// Nodes returns a limited subset of the full NodesResponse.
//
// Its existence is a stop-gap measure to support running much of our UI code
// on multiregion tenant clusters. Longer-term, we will need to reconsider
// the overall architecture (e.g. store node ids in sqlstats, map node id ->
// region at UI display time) because sql instances are far more ephemeral
// than cluster nodes, and this Nodes endpoint is only able to answer for
// currently-live sql instances. What I think we want is to store locality
// information directly in the sqlstats tables, sourced from some new tracing
// in the distsql flow.
func (s *statusServer) Nodes(
ctx context.Context, req *serverpb.NodesRequest,
) (*serverpb.NodesResponse, error) {
ctx = propagateGatewayMetadata(ctx)
ctx = s.AnnotateCtx(ctx)

if err := s.privilegeChecker.requireViewActivityOrViewActivityRedactedPermission(ctx); err != nil {
// NB: not using serverError() here since the priv checker
// already returns a proper gRPC error status.
return nil, err
}

instances, err := s.sqlServer.sqlInstanceReader.GetAllInstances(ctx)
if err != nil {
return nil, err
}
var resp serverpb.NodesResponse
for _, instance := range instances {
resp.Nodes = append(resp.Nodes, statuspb.NodeStatus{
Desc: roachpb.NodeDescriptor{
NodeID: roachpb.NodeID(instance.InstanceID),
Locality: instance.Locality,
},
})
}
return &resp, err
}

// Nodes returns all node statuses.
//
// Do not use this method inside the server code! Use
Expand Down
5 changes: 5 additions & 0 deletions pkg/ui/workspaces/cluster-ui/src/util/proto.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,10 @@ describe("Proto utils", () => {
statusWithRolledMetrics.metrics,
);
});

it("does not explode when node fields are missing", () => {
const emptyNodeStatus: Partial<INodeStatus> = {};
assert.deepEqual(rollupStoreMetrics(emptyNodeStatus), {});
});
});
});
3 changes: 3 additions & 0 deletions pkg/ui/workspaces/cluster-ui/src/util/proto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ export type StatusMetrics = typeof nodeStatus.metrics;
* with metrics from `store_statuses.metrics` object.
*/
export function rollupStoreMetrics(ns: INodeStatus): StatusMetrics {
if (!ns.store_statuses) {
return ns.metrics || {};
}
return ns.store_statuses
.map(ss => ss.metrics)
.reduce((acc, i) => {
Expand Down