From f294257bebed1902b27d150e53b2d1113b1fe555 Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Wed, 23 Feb 2022 15:25:55 -0400 Subject: [PATCH 1/3] status: implement `TenantRanges` on tenant & KV status servers While a system tenant or host cluster has full access to the debug information provided by the `Ranges` endpoint in the KV status server, tenants currently have no way to fetch metadata about their own ranges. We'd like to expose debug information in the form of range metadata to tenants, so that they can use this information in `debug.zip`, which is currently in the process of being exposed (albeit a subset of the full functionality) to tenants. To provide this, we implement `TenantRanges` and make it accessible within the tenant status server. The endpoint will reach across the Tenant/KV boundary via the tenant Connector interface to call the associated KV status server implementation. From here, we can lean on the existing `Ranges` endpoint. We can fan out requests to `Ranges` for all nodes containing replicas of ranges within the tenant's keyspace. The caller can then transform the metadata into a more tenant-appropriate format (e.g. avoiding concepts that break the 'tenant boundary', such as node IDs, replication information, etc). Unlike the previous `Ranges` endpoint, `TenantRanges` only returns metadata for the *leaseholder* replica for each range, instead of metadata for all replicas. This, again, is to best align with the boundaries of the tenant abstraction and only provide information about the primary replica serving reads/writes at request time. The results from all nodes are then combined, and returned back to the tenant caller. This commit does not contain pagination - this functionality will come in a follow up PR where we offset based on range startKeys. Release note (api change): The status api will now have a newly exposed `_status/tenant_ranges` endpoint available to tenants, although it's not currently used except for debug.zip (see following commit). Release justification: low-risk updates to new functionality --- docs/generated/http/full.md | 172 ++++++++++++++++++ pkg/ccl/kvccl/kvtenantccl/connector.go | 20 ++ pkg/ccl/serverccl/statusccl/BUILD.bazel | 1 + .../serverccl/statusccl/tenant_status_test.go | 62 +++++++ pkg/kv/kvclient/kvtenant/connector.go | 5 + pkg/rpc/auth_tenant.go | 13 ++ pkg/server/server.go | 1 + pkg/server/server_sql.go | 4 + pkg/server/serverpb/status.go | 9 + pkg/server/serverpb/status.proto | 77 ++++++++ pkg/server/status.go | 146 +++++++++++++++ pkg/server/status_test.go | 22 +++ pkg/server/tenant.go | 1 + pkg/server/tenant_status.go | 14 ++ pkg/server/testserver.go | 10 + pkg/sql/exec_util.go | 25 +-- pkg/testutils/serverutils/test_tenant_shim.go | 4 + 17 files changed, 574 insertions(+), 12 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 23df3b36fc2f..fa002d90ed4d 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -1623,6 +1623,178 @@ Tier represents one level of the locality hierarchy. +## TenantRanges + +`GET /_status/tenant_ranges` + +TenantRanges requests internal details about all range replicas within +the tenant's keyspace. + +Support status: [reserved](#support-status) + +#### Request Parameters + + + + + + + + + + + + + +#### Response Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| ranges_by_locality | [TenantRangesResponse.RangesByLocalityEntry](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.RangesByLocalityEntry) | repeated | ranges_by_locality maps each range replica to its specified availability zone, as defined within the replica's locality metadata (default key `az`). Replicas without the default available zone key set will fall under the `locality-unset` key. | [reserved](#support-status) | + + + + + + + +#### TenantRangesResponse.RangesByLocalityEntry + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | | +| value | [TenantRangesResponse.TenantRangeList](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangesResponse.TenantRangeList) | | | | + + + + + + +#### TenantRangesResponse.TenantRangeList + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| ranges | [TenantRangeInfo](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo) | repeated | | [reserved](#support-status) | + + + + + + +#### TenantRangeInfo + +TenantRangeInfo provides metadata about a specific range replica, +where concepts not considered to be relevant within the tenant +abstraction (e.g. NodeIDs) are omitted. Instead, Locality information +is used to distinguish replicas. + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| range_id | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The ID of the Range. | [reserved](#support-status) | +| span | [PrettySpan](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.PrettySpan) | | The pretty-printed key span of the range. | [reserved](#support-status) | +| locality | [Locality](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Locality) | | Any locality information associated with this specific replica. | [reserved](#support-status) | +| is_leaseholder | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Whether the range's specific replica is a leaseholder. | [reserved](#support-status) | +| lease_valid | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Whether the range's specific replica holds a valid lease. | [reserved](#support-status) | +| range_stats | [RangeStatistics](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.RangeStatistics) | | Statistics about the range replica, e.g. QPS, WPS. | [reserved](#support-status) | +| mvcc_stats | [cockroach.storage.enginepb.MVCCStats](#cockroach.server.serverpb.TenantRangesResponse-cockroach.storage.enginepb.MVCCStats) | | MVCC stats about the range replica, e.g. live_bytes. | [reserved](#support-status) | +| read_latches | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | Read count as reported by the range replica's spanlatch.Manager. | [reserved](#support-status) | +| write_latches | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | Write count as reported by the range replica's spanlatch.Manager. | [reserved](#support-status) | +| locks | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of locks as reported by the range replica's lockTable. | [reserved](#support-status) | +| locks_with_wait_queues | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of locks with non-empty wait-queues as reported by the range replica's lockTable | [reserved](#support-status) | +| lock_wait_queue_waiters | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The aggregate number of waiters in wait-queues across all locks as reported by the range replica's lockTable | [reserved](#support-status) | +| top_k_locks_by_wait_queue_waiters | [TenantRangeInfo.LockInfo](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.TenantRangeInfo.LockInfo) | repeated | The top-k locks with the most waiters (readers + writers) in their wait-queue, ordered in descending order. | [reserved](#support-status) | + + + + + + +#### PrettySpan + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| start_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | [reserved](#support-status) | +| end_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | | [reserved](#support-status) | + + + + + + +#### Locality + +Locality is an ordered set of key value Tiers that describe a node's +location. The tier keys should be the same across all nodes. + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| tiers | [Tier](#cockroach.server.serverpb.TenantRangesResponse-cockroach.server.serverpb.Tier) | repeated | | [reserved](#support-status) | + + + + + + +#### Tier + +Tier represents one level of the locality hierarchy. + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | Key is the name of tier and should match all other nodes. | [reserved](#support-status) | +| value | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | Value is node specific value corresponding to the key. | [reserved](#support-status) | + + + + + + +#### RangeStatistics + +RangeStatistics describes statistics reported by a range. For internal use +only. + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| queries_per_second | [double](#cockroach.server.serverpb.TenantRangesResponse-double) | | Queries per second served by this range.

Note that queries per second will only be known by the leaseholder. All other replicas will report it as 0. | [reserved](#support-status) | +| writes_per_second | [double](#cockroach.server.serverpb.TenantRangesResponse-double) | | Writes per second served by this range. | [reserved](#support-status) | + + + + + + +#### TenantRangeInfo.LockInfo + +LockInfo provides metadata about the state of a single lock +in the range replica's lockTable. + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| pretty_key | [string](#cockroach.server.serverpb.TenantRangesResponse-string) | | The lock's key in pretty format. | [reserved](#support-status) | +| key | [bytes](#cockroach.server.serverpb.TenantRangesResponse-bytes) | | The lock's key. | [reserved](#support-status) | +| held | [bool](#cockroach.server.serverpb.TenantRangesResponse-bool) | | Is the lock actively held by a transaction, or just a reservation? | [reserved](#support-status) | +| waiters | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiters in the lock's wait queue. | [reserved](#support-status) | +| waiting_readers | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiting readers in the lock's wait queue. | [reserved](#support-status) | +| waiting_writers | [int64](#cockroach.server.serverpb.TenantRangesResponse-int64) | | The number of waiting writers in the lock's wait queue. | [reserved](#support-status) | + + + + + + ## Gossip `GET /_status/gossip/{node_id}` diff --git a/pkg/ccl/kvccl/kvtenantccl/connector.go b/pkg/ccl/kvccl/kvtenantccl/connector.go index ddce7fd20733..1b28cad4f8e5 100644 --- a/pkg/ccl/kvccl/kvtenantccl/connector.go +++ b/pkg/ccl/kvccl/kvtenantccl/connector.go @@ -120,6 +120,11 @@ var _ config.SystemConfigProvider = (*Connector)(nil) // multi-region primitives. var _ serverpb.RegionsServer = (*Connector)(nil) +// Connector is capable of finding debug information about the current +// tenant within the cluster. This is necessary for things such as +// debug zip and range reports. +var _ serverpb.TenantStatusServer = (*Connector)(nil) + // Connector is capable of accessing span configurations for secondary tenants. var _ spanconfig.KVAccessor = (*Connector)(nil) @@ -428,6 +433,21 @@ func (c *Connector) Regions( return resp, nil } +// TenantRanges implements the serverpb.TenantStatusServer interface +func (c *Connector) TenantRanges( + ctx context.Context, req *serverpb.TenantRangesRequest, +) (resp *serverpb.TenantRangesResponse, _ error) { + if err := c.withClient(ctx, func(ctx context.Context, c *client) error { + var err error + resp, err = c.TenantRanges(ctx, req) + return err + }); err != nil { + return nil, err + } + + return resp, nil +} + // FirstRange implements the kvcoord.RangeDescriptorDB interface. func (c *Connector) FirstRange() (*roachpb.RangeDescriptor, error) { return nil, status.Error(codes.Unauthenticated, "kvtenant.Proxy does not have access to FirstRange") diff --git a/pkg/ccl/serverccl/statusccl/BUILD.bazel b/pkg/ccl/serverccl/statusccl/BUILD.bazel index 132b69ba767b..034fefe8d642 100644 --- a/pkg/ccl/serverccl/statusccl/BUILD.bazel +++ b/pkg/ccl/serverccl/statusccl/BUILD.bazel @@ -36,6 +36,7 @@ go_test( "//pkg/ccl", "//pkg/ccl/kvccl", "//pkg/ccl/utilccl", + "//pkg/keys", "//pkg/roachpb", "//pkg/rpc", "//pkg/security", diff --git a/pkg/ccl/serverccl/statusccl/tenant_status_test.go b/pkg/ccl/serverccl/statusccl/tenant_status_test.go index 639415b8efce..9179ddc5f7dc 100644 --- a/pkg/ccl/serverccl/statusccl/tenant_status_test.go +++ b/pkg/ccl/serverccl/statusccl/tenant_status_test.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" _ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/server/serverpb" @@ -96,6 +97,10 @@ func TestTenantStatusAPI(t *testing.T) { t.Run("txn_id_resolution", func(t *testing.T) { testTxnIDResolutionRPC(ctx, t, testHelper) }) + + t.Run("tenant_ranges", func(t *testing.T) { + testTenantRangesRPC(ctx, t, testHelper) + }) } func TestTenantCannotSeeNonTenantStats(t *testing.T) { @@ -978,3 +983,60 @@ func testTxnIDResolutionRPC(ctx context.Context, t *testing.T, helper *tenantTes run(sqlConn, status, 1 /* coordinatorNodeID */) }) } + +func testTenantRangesRPC(_ context.Context, t *testing.T, helper *tenantTestHelper) { + tenantA := helper.testCluster().tenant(0).tenant.TenantStatusServer().(serverpb.TenantStatusServer) + keyPrefixForA := keys.MakeTenantPrefix(helper.testCluster().tenant(0).tenant.RPCContext().TenantID) + keyPrefixEndForA := keyPrefixForA.PrefixEnd() + + tenantB := helper.controlCluster().tenant(0).tenant.TenantStatusServer().(serverpb.TenantStatusServer) + keyPrefixForB := keys.MakeTenantPrefix(helper.controlCluster().tenant(0).tenant.RPCContext().TenantID) + keyPrefixEndForB := keyPrefixForB.PrefixEnd() + + resp, err := tenantA.TenantRanges(context.Background(), &serverpb.TenantRangesRequest{}) + require.NoError(t, err) + require.NotEmpty(t, resp.RangesByLocality) + for localityKey, rangeList := range resp.RangesByLocality { + require.NotEmpty(t, localityKey) + for _, r := range rangeList.Ranges { + assertStartKeyInRange(t, r.Span.StartKey, keyPrefixForA) + assertEndKeyInRange(t, r.Span.EndKey, keyPrefixForA, keyPrefixEndForA) + } + } + + resp, err = tenantB.TenantRanges(context.Background(), &serverpb.TenantRangesRequest{}) + require.NoError(t, err) + require.NotEmpty(t, resp.RangesByLocality) + for localityKey, rangeList := range resp.RangesByLocality { + require.NotEmpty(t, localityKey) + for _, r := range rangeList.Ranges { + assertStartKeyInRange(t, r.Span.StartKey, keyPrefixForB) + assertEndKeyInRange(t, r.Span.EndKey, keyPrefixForB, keyPrefixEndForB) + } + } +} + +// assertStartKeyInRange compares the pretty printed startKey with the provided +// tenantPrefix key, ensuring that the startKey starts with the tenantPrefix. +func assertStartKeyInRange(t *testing.T, startKey string, tenantPrefix roachpb.Key) { + require.Truef(t, strings.Index(startKey, tenantPrefix.String()) == 0, + fmt.Sprintf("start key %s is outside of the tenant's keyspace (prefix: %v)", + startKey, tenantPrefix.String())) +} + +// assertEndKeyInRange compares the pretty printed endKey with the provided +// tenantPrefix and tenantPrefixEnd keys. Ensures that the key starts with +// either the tenantPrefix, or the tenantPrefixEnd (valid as end keys are +// exclusive). +func assertEndKeyInRange( + t *testing.T, endKey string, tenantPrefix roachpb.Key, tenantPrefixEnd roachpb.Key, +) { + require.Truef(t, + strings.Index(endKey, tenantPrefix.String()) == 0 || + strings.Index(endKey, tenantPrefixEnd.String()) == 0 || + // Possible if the tenant's ranges fall at the end of the entire keyspace + // range within the cluster. + endKey == "/Max", + fmt.Sprintf("end key %s is outside of the tenant's keyspace (prefix: %v, prefixEnd: %v)", + endKey, tenantPrefix.String(), tenantPrefixEnd.String())) +} diff --git a/pkg/kv/kvclient/kvtenant/connector.go b/pkg/kv/kvclient/kvtenant/connector.go index 71756dea4c67..3db120cc5140 100644 --- a/pkg/kv/kvclient/kvtenant/connector.go +++ b/pkg/kv/kvclient/kvtenant/connector.go @@ -61,6 +61,11 @@ type Connector interface { // primitives. serverpb.RegionsServer + // TenantStatusServer is the subset of the serverpb.StatusInterface that is + // used by the SQL system to query for debug information, such as tenant-specific + // range reports. + serverpb.TenantStatusServer + // TokenBucketProvider provides access to the tenant cost control token // bucket. TokenBucketProvider diff --git a/pkg/rpc/auth_tenant.go b/pkg/rpc/auth_tenant.go index d826951ab9f4..3b37eeb1dd2e 100644 --- a/pkg/rpc/auth_tenant.go +++ b/pkg/rpc/auth_tenant.go @@ -104,6 +104,9 @@ func (a tenantAuthorizer) authorize( case "/cockroach.server.serverpb.Status/CancelQuery": return a.authTenant(tenID) + case "/cockroach.server.serverpb.Status/TenantRanges": + return a.authTenantRanges(tenID) + case "/cockroach.server.serverpb.Status/CancelLocalQuery": return a.authTenant(tenID) @@ -240,6 +243,16 @@ var gossipSubscriptionPatternAllowlist = []string{ "system-db", } +// authTenantRanges authorizes the provided tenant to invoke the +// TenantRanges RPC with the provided args. It requires that an authorized +// tenantID has been set. +func (a tenantAuthorizer) authTenantRanges(tenID roachpb.TenantID) error { + if !tenID.IsSet() { + return authErrorf("tenant ranges request with unspecified tenant not permitted.") + } + return nil +} + // authTokenBucket authorizes the provided tenant to invoke the // TokenBucket RPC with the provided args. func (a tenantAuthorizer) authTokenBucket( diff --git a/pkg/server/server.go b/pkg/server/server.go index e2219ffe5a33..539e80a637cb 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -740,6 +740,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { rangeFeedFactory: rangeFeedFactory, sqlStatusServer: sStatus, regionsServer: sStatus, + tenantStatusServer: sStatus, tenantUsageServer: tenantUsage, monitorAndMetrics: sqlMonitorAndMetrics, settingsStorage: settingsWriter, diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go index f83c3cf4eb9a..d14ed526a22a 100644 --- a/pkg/server/server_sql.go +++ b/pkg/server/server_sql.go @@ -301,6 +301,9 @@ type sqlServerArgs struct { // Used to query valid regions on the server. regionsServer serverpb.RegionsServer + // Used to query status information useful for debugging on the server. + tenantStatusServer serverpb.TenantStatusServer + // Used for multi-tenant cost control (on the host cluster side). tenantUsageServer multitenant.TenantUsageServer @@ -678,6 +681,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { DistSender: cfg.distSender, RPCContext: cfg.rpcContext, LeaseManager: leaseMgr, + TenantStatusServer: cfg.tenantStatusServer, Clock: cfg.clock, DistSQLSrv: distSQLServer, NodesStatusServer: cfg.nodesStatusServer, diff --git a/pkg/server/serverpb/status.go b/pkg/server/serverpb/status.go index ecd0e11cb577..eb825e785a66 100644 --- a/pkg/server/serverpb/status.go +++ b/pkg/server/serverpb/status.go @@ -73,6 +73,15 @@ type RegionsServer interface { Regions(context.Context, *RegionsRequest) (*RegionsResponse, error) } +// TenantStatusServer is the subset of the serverpb.StatusInterface that is +// used by tenants to query for debug information, such as tenant-specific +// range reports. +// +// It is available for all tenants. +type TenantStatusServer interface { + TenantRanges(context.Context, *TenantRangesRequest) (*TenantRangesResponse, error) +} + // OptionalNodesStatusServer returns the wrapped NodesStatusServer, if it is // available. If it is not, an error referring to the optionally supplied issues // is returned. diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index 0fb269c462dc..d812c149a2a5 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -411,6 +411,62 @@ message PrettySpan { string end_key = 2; } +// TenantRangeInfo provides metadata about a specific range replica, +// where concepts not considered to be relevant within the tenant +// abstraction (e.g. NodeIDs) are omitted. Instead, Locality information +// is used to distinguish replicas. +message TenantRangeInfo { + // The ID of the Range. + int64 range_id = 1 [ + (gogoproto.customname) = "RangeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID" + ]; + // The pretty-printed key span of the range. + PrettySpan span = 2 [ (gogoproto.nullable) = false ]; + // Any locality information associated with this specific replica. + Locality locality = 3; + // Whether the range's specific replica is a leaseholder. + bool is_leaseholder = 4; + // Whether the range's specific replica holds a valid lease. + bool lease_valid = 5; + // Statistics about the range replica, e.g. QPS, WPS. + RangeStatistics range_stats = 6 [ (gogoproto.nullable) = false ]; + // MVCC stats about the range replica, e.g. live_bytes. + storage.enginepb.MVCCStats mvcc_stats = 7; + // Read count as reported by the range replica's spanlatch.Manager. + int64 read_latches = 8; + // Write count as reported by the range replica's spanlatch.Manager. + int64 write_latches = 9; + // The number of locks as reported by the range replica's lockTable. + int64 locks = 10; + // The number of locks with non-empty wait-queues as reported by + // the range replica's lockTable + int64 locks_with_wait_queues = 11; + // The aggregate number of waiters in wait-queues across all locks + // as reported by the range replica's lockTable + int64 lock_wait_queue_waiters = 12; + // LockInfo provides metadata about the state of a single lock + // in the range replica's lockTable. + message LockInfo { + // The lock's key in pretty format. + string pretty_key = 1; + // The lock's key. + bytes key = 2 [ (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.Key" ]; + // Is the lock actively held by a transaction, or just a reservation? + bool held = 3; + // The number of waiters in the lock's wait queue. + int64 waiters = 4; + // The number of waiting readers in the lock's wait queue. + int64 waiting_readers = 5; + // The number of waiting writers in the lock's wait queue. + int64 waiting_writers = 6; + } + // The top-k locks with the most waiters (readers + writers) in their + // wait-queue, ordered in descending order. + repeated LockInfo top_k_locks_by_wait_queue_waiters = 13 [ (gogoproto.nullable) = false ]; +} + message RangeInfo { PrettySpan span = 1 [ (gogoproto.nullable) = false ]; RaftState raft_state = 2 [ (gogoproto.nullable) = false ]; @@ -467,6 +523,19 @@ message RangesResponse { repeated RangeInfo ranges = 1 [ (gogoproto.nullable) = false ]; } +message TenantRangesRequest {} + +message TenantRangesResponse { + message TenantRangeList { + repeated TenantRangeInfo ranges = 1 [ (gogoproto.nullable) = false ]; + } + // ranges_by_locality maps each range replica to its specified availability + // zone, as defined within the replica's locality metadata (default key `az`). + // Replicas without the default available zone key set will fall under the + // `locality-unset` key. + map ranges_by_locality = 1 [ (gogoproto.nullable) = false ]; +} + message GossipRequest { // node_id is a string so that "local" can be used to specify that no // forwarding is necessary. @@ -1707,6 +1776,14 @@ service Status { }; } + // TenantRanges requests internal details about all range replicas within + // the tenant's keyspace. + rpc TenantRanges(TenantRangesRequest) returns (TenantRangesResponse) { + option (google.api.http) = { + get : "/_status/tenant_ranges" + }; + } + // Gossip retrieves gossip-level details about a given node. rpc Gossip(GossipRequest) returns (gossip.InfoStatus) { option (google.api.http) = { diff --git a/pkg/server/status.go b/pkg/server/status.go index 431dfd7af97f..b1f36970a5dd 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -37,6 +37,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" + "github.com/cockroachdb/cockroach/pkg/kv/kvclient" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" @@ -2052,6 +2053,151 @@ func (s *statusServer) rangesHelper( return &output, next, nil } +func (s *statusServer) TenantRanges( + ctx context.Context, _ *serverpb.TenantRangesRequest, +) (*serverpb.TenantRangesResponse, error) { + propagateGatewayMetadata(ctx) + ctx = s.AnnotateCtx(ctx) + if _, err := s.privilegeChecker.requireAdminUser(ctx); err != nil { + return nil, err + } + + tID, ok := roachpb.TenantFromContext(ctx) + if !ok { + return nil, status.Error(codes.Internal, "no tenant ID found in context") + } + + tenantPrefix := keys.MakeTenantPrefix(tID) + tenantKeySpan := roachpb.Span{ + Key: tenantPrefix, + EndKey: tenantPrefix.PrefixEnd(), + } + + // rangeIDs contains all the `roachpb.RangeID`s found to exist within the + // tenant's keyspace. + rangeIDs := make([]roachpb.RangeID, 0) + // replicaNodeIDs acts as a set of `roachpb.NodeID`'s. These `NodeID`s + // represent all nodes with a store containing a replica for the tenant. + replicaNodeIDs := make(map[roachpb.NodeID]struct{}) + if err := s.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + rangeKVs, err := kvclient.ScanMetaKVs(ctx, txn, tenantKeySpan) + if err != nil { + return err + } + + for _, rangeKV := range rangeKVs { + var desc roachpb.RangeDescriptor + if err := rangeKV.ValueProto(&desc); err != nil { + return err + } + rangeIDs = append(rangeIDs, desc.RangeID) + for _, rep := range desc.Replicas().Descriptors() { + _, ok := replicaNodeIDs[rep.NodeID] + if !ok { + replicaNodeIDs[rep.NodeID] = struct{}{} + } + } + } + return nil + }); err != nil { + return nil, status.Error( + codes.Internal, + errors.Wrap(err, "there was a problem with the initial fetch of range IDs").Error()) + } + + nodeResults := make([][]serverpb.RangeInfo, 0, len(replicaNodeIDs)) + for nodeID := range replicaNodeIDs { + nodeIDString := nodeID.String() + _, local, err := s.parseNodeID(nodeIDString) + if err != nil { + return nil, status.Errorf(codes.Internal, err.Error()) + } + + req := &serverpb.RangesRequest{ + NodeId: nodeIDString, + RangeIDs: rangeIDs, + } + + var resp *serverpb.RangesResponse + if local { + resp, _, err = s.rangesHelper(ctx, req, 0, 0) + if err != nil { + return nil, err + } + } else { + statusServer, err := s.dialNode(ctx, nodeID) + if err != nil { + return nil, serverError(ctx, err) + } + + resp, err = statusServer.Ranges(ctx, req) + if err != nil { + return nil, err + } + } + + nodeResults = append(nodeResults, resp.Ranges) + } + + transformTenantRange := func( + rep serverpb.RangeInfo, + ) (string, *serverpb.TenantRangeInfo) { + topKLocksByWaiters := make([]serverpb.TenantRangeInfo_LockInfo, 0, len(rep.TopKLocksByWaitQueueWaiters)) + for _, lm := range rep.TopKLocksByWaitQueueWaiters { + topKLocksByWaiters = append(topKLocksByWaiters, serverpb.TenantRangeInfo_LockInfo{ + PrettyKey: lm.Key.String(), + Key: lm.Key, + Held: lm.Held, + Waiters: lm.Waiters, + WaitingReaders: lm.WaitingReaders, + WaitingWriters: lm.WaitingWriters, + }) + } + azKey := "az" + localityKey := "locality-unset" + for _, tier := range rep.Locality.Tiers { + if tier.Key == azKey { + localityKey = tier.Value + } + } + return localityKey, &serverpb.TenantRangeInfo{ + RangeID: rep.State.Desc.RangeID, + Span: rep.Span, + Locality: rep.Locality, + IsLeaseholder: rep.IsLeaseholder, + LeaseValid: rep.LeaseValid, + RangeStats: rep.Stats, + MvccStats: rep.State.Stats, + ReadLatches: rep.ReadLatches, + WriteLatches: rep.WriteLatches, + Locks: rep.Locks, + LocksWithWaitQueues: rep.LocksWithWaitQueues, + LockWaitQueueWaiters: rep.LockWaitQueueWaiters, + TopKLocksByWaitQueueWaiters: topKLocksByWaiters, + } + } + + resp := &serverpb.TenantRangesResponse{ + RangesByLocality: make(map[string]serverpb.TenantRangesResponse_TenantRangeList), + } + + for _, rangeMetas := range nodeResults { + for _, rangeMeta := range rangeMetas { + localityKey, rangeInfo := transformTenantRange(rangeMeta) + rangeList, ok := resp.RangesByLocality[localityKey] + if !ok { + rangeList = serverpb.TenantRangesResponse_TenantRangeList{ + Ranges: make([]serverpb.TenantRangeInfo, 0), + } + } + rangeList.Ranges = append(rangeList.Ranges, *rangeInfo) + resp.RangesByLocality[localityKey] = rangeList + } + } + + return resp, nil +} + // HotRanges returns the hottest ranges on each store on the requested node(s). func (s *statusServer) HotRanges( ctx context.Context, req *serverpb.HotRangesRequest, diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 7f0fc868e7eb..e96e795b0c32 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -1172,6 +1172,28 @@ func TestRangesResponse(t *testing.T) { } } +func TestTenantRangesResponse(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + ts := startServer(t) + defer ts.Stopper().Stop(ctx) + + t.Run("returns error when TenantID not set in ctx", func(t *testing.T) { + rpcStopper := stop.NewStopper() + defer rpcStopper.Stop(ctx) + + conn, err := ts.rpcContext.GRPCDialNode(ts.ServingRPCAddr(), ts.NodeID(), rpc.DefaultClass).Connect(ctx) + if err != nil { + t.Fatal(err) + } + client := serverpb.NewStatusClient(conn) + _, err = client.TenantRanges(ctx, &serverpb.TenantRangesRequest{}) + require.Error(t, err) + require.Contains(t, err.Error(), "no tenant ID found in context") + }) +} + func TestRaftDebug(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 5cf932d88997..281ad968cbe9 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -566,6 +566,7 @@ func makeTenantSQLServerArgs( protectedtsProvider: protectedTSProvider, rangeFeedFactory: rangeFeedFactory, regionsServer: tenantConnect, + tenantStatusServer: tenantConnect, costController: costController, grpc: grpcServer, }, nil diff --git a/pkg/server/tenant_status.go b/pkg/server/tenant_status.go index 658b85c34ffe..f4942262e74e 100644 --- a/pkg/server/tenant_status.go +++ b/pkg/server/tenant_status.go @@ -1138,6 +1138,20 @@ func (t *tenantStatusServer) TxnIDResolution( return statusClient.TxnIDResolution(ctx, req) } +func (t *tenantStatusServer) TenantRanges( + ctx context.Context, req *serverpb.TenantRangesRequest, +) (*serverpb.TenantRangesResponse, error) { + ctx = propagateGatewayMetadata(ctx) + ctx = t.AnnotateCtx(ctx) + + // The tenant range report contains replica metadata which is admin-only. + if _, err := t.privilegeChecker.requireAdminUser(ctx); err != nil { + return nil, err + } + + return t.sqlServer.tenantConnect.TenantRanges(ctx, req) +} + // GetFiles returns a list of files of type defined in the request. func (t *tenantStatusServer) GetFiles( ctx context.Context, req *serverpb.GetFilesRequest, diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 26080934b28c..1f694fda1412 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -477,6 +477,11 @@ func (ts *TestServer) TestingKnobs() *base.TestingKnobs { return nil } +// TenantStatusServer returns the TenantStatusServer used by the TestServer. +func (ts *TestServer) TenantStatusServer() interface{} { + return ts.status +} + // Start starts the TestServer by bootstrapping an in-memory store // (defaults to maximum of 100M). The server is started, launching the // node RPC server and all HTTP endpoints. Use the value of @@ -554,6 +559,11 @@ func (t *TestTenant) StatusServer() interface{} { return t.execCfg.SQLStatusServer } +// TenantStatusServer is part of TestTenantInterface. +func (t *TestTenant) TenantStatusServer() interface{} { + return t.execCfg.TenantStatusServer +} + // DistSQLServer is part of TestTenantInterface. func (t *TestTenant) DistSQLServer() interface{} { return t.SQLServer.distSQLServer diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index def33aefaa57..3ffacdbd608e 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -1150,18 +1150,19 @@ type ExecutorConfig struct { NodesStatusServer serverpb.OptionalNodesStatusServer // SQLStatusServer gives access to a subset of the Status service and is // available when not running as a system tenant. - SQLStatusServer serverpb.SQLStatusServer - RegionsServer serverpb.RegionsServer - MetricsRecorder nodeStatusGenerator - SessionRegistry *SessionRegistry - SQLLiveness sqlliveness.Liveness - JobRegistry *jobs.Registry - VirtualSchemas *VirtualSchemaHolder - DistSQLPlanner *DistSQLPlanner - TableStatsCache *stats.TableStatisticsCache - StatsRefresher *stats.Refresher - InternalExecutor *InternalExecutor - QueryCache *querycache.C + SQLStatusServer serverpb.SQLStatusServer + TenantStatusServer serverpb.TenantStatusServer + RegionsServer serverpb.RegionsServer + MetricsRecorder nodeStatusGenerator + SessionRegistry *SessionRegistry + SQLLiveness sqlliveness.Liveness + JobRegistry *jobs.Registry + VirtualSchemas *VirtualSchemaHolder + DistSQLPlanner *DistSQLPlanner + TableStatsCache *stats.TableStatisticsCache + StatsRefresher *stats.Refresher + InternalExecutor *InternalExecutor + QueryCache *querycache.C SchemaChangerMetrics *SchemaChangerMetrics FeatureFlagMetrics *featureflag.DenialMetrics diff --git a/pkg/testutils/serverutils/test_tenant_shim.go b/pkg/testutils/serverutils/test_tenant_shim.go index 379987b325f7..d87ebbd6c932 100644 --- a/pkg/testutils/serverutils/test_tenant_shim.go +++ b/pkg/testutils/serverutils/test_tenant_shim.go @@ -56,6 +56,10 @@ type TestTenantInterface interface { // interface{}. StatusServer() interface{} + // TenantStatusServer returns the tenant's *server.TenantStatusServer as an + // interface{}. + TenantStatusServer() interface{} + // DistSQLServer returns the *distsql.ServerImpl as an interface{}. DistSQLServer() interface{} From ca3badbdb5f107db056c138a687b6034cd768e7c Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Wed, 23 Feb 2022 15:36:17 -0400 Subject: [PATCH 2/3] pkg/cli: add tenant-specific range report to tenant debug.zip's This commit modifies the cluster-wide debug zip config to attempt a `TenantRanges` request, and generate a file from the response. This file will provide range metadata for all the leaseholder replicas available in the tenant's keyspace, which can be used by tenants for debug purposes. Release note: none Release justification: Low risk, high benefit changes to existing functionality --- pkg/cli/testdata/zip/partial1 | 3 +++ pkg/cli/testdata/zip/partial1_excluded | 3 +++ pkg/cli/testdata/zip/partial2 | 3 +++ pkg/cli/testdata/zip/testzip | 3 +++ pkg/cli/testdata/zip/testzip_concurrent | 5 +++++ pkg/cli/testdata/zip/testzip_tenant | 3 +++ pkg/cli/zip_cluster_wide.go | 7 +++++++ pkg/cli/zip_tenant_test.go | 2 ++ 8 files changed, 29 insertions(+) diff --git a/pkg/cli/testdata/zip/partial1 b/pkg/cli/testdata/zip/partial1 index 954c9075a827..866e0da8154e 100644 --- a/pkg/cli/testdata/zip/partial1 +++ b/pkg/cli/testdata/zip/partial1 @@ -9,6 +9,9 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null [cluster] requesting data for debug/rangelog... received response... converting to JSON... writing binary output: debug/rangelog.json... done [cluster] requesting data for debug/settings... received response... converting to JSON... writing binary output: debug/settings.json... done [cluster] requesting data for debug/reports/problemranges... received response... converting to JSON... writing binary output: debug/reports/problemranges.json... done +[cluster] requesting data for debug/tenant_ranges... received response... +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_contention_events... writing output: debug/crdb_internal.cluster_contention_events.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_distsql_flows... writing output: debug/crdb_internal.cluster_distsql_flows.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_database_privileges... writing output: debug/crdb_internal.cluster_database_privileges.txt... done diff --git a/pkg/cli/testdata/zip/partial1_excluded b/pkg/cli/testdata/zip/partial1_excluded index 6356f1062a6f..90f06c98e8c3 100644 --- a/pkg/cli/testdata/zip/partial1_excluded +++ b/pkg/cli/testdata/zip/partial1_excluded @@ -9,6 +9,9 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 [cluster] requesting data for debug/rangelog... received response... converting to JSON... writing binary output: debug/rangelog.json... done [cluster] requesting data for debug/settings... received response... converting to JSON... writing binary output: debug/settings.json... done [cluster] requesting data for debug/reports/problemranges... received response... converting to JSON... writing binary output: debug/reports/problemranges.json... done +[cluster] requesting data for debug/tenant_ranges... received response... +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_contention_events... writing output: debug/crdb_internal.cluster_contention_events.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_distsql_flows... writing output: debug/crdb_internal.cluster_distsql_flows.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_database_privileges... writing output: debug/crdb_internal.cluster_database_privileges.txt... done diff --git a/pkg/cli/testdata/zip/partial2 b/pkg/cli/testdata/zip/partial2 index d3c5ce374a21..c82838ad5b49 100644 --- a/pkg/cli/testdata/zip/partial2 +++ b/pkg/cli/testdata/zip/partial2 @@ -9,6 +9,9 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null [cluster] requesting data for debug/rangelog... received response... converting to JSON... writing binary output: debug/rangelog.json... done [cluster] requesting data for debug/settings... received response... converting to JSON... writing binary output: debug/settings.json... done [cluster] requesting data for debug/reports/problemranges... received response... converting to JSON... writing binary output: debug/reports/problemranges.json... done +[cluster] requesting data for debug/tenant_ranges... received response... +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_contention_events... writing output: debug/crdb_internal.cluster_contention_events.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_distsql_flows... writing output: debug/crdb_internal.cluster_distsql_flows.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_database_privileges... writing output: debug/crdb_internal.cluster_database_privileges.txt... done diff --git a/pkg/cli/testdata/zip/testzip b/pkg/cli/testdata/zip/testzip index 618100494679..c3d503201a52 100644 --- a/pkg/cli/testdata/zip/testzip +++ b/pkg/cli/testdata/zip/testzip @@ -9,6 +9,9 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [cluster] requesting data for debug/rangelog... received response... converting to JSON... writing binary output: debug/rangelog.json... done [cluster] requesting data for debug/settings... received response... converting to JSON... writing binary output: debug/settings.json... done [cluster] requesting data for debug/reports/problemranges... received response... converting to JSON... writing binary output: debug/reports/problemranges.json... done +[cluster] requesting data for debug/tenant_ranges... received response... +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_contention_events... writing output: debug/crdb_internal.cluster_contention_events.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_distsql_flows... writing output: debug/crdb_internal.cluster_distsql_flows.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_database_privileges... writing output: debug/crdb_internal.cluster_database_privileges.txt... done diff --git a/pkg/cli/testdata/zip/testzip_concurrent b/pkg/cli/testdata/zip/testzip_concurrent index 1536750813c5..f379a58e4474 100644 --- a/pkg/cli/testdata/zip/testzip_concurrent +++ b/pkg/cli/testdata/zip/testzip_concurrent @@ -30,6 +30,11 @@ zip [cluster] requesting data for debug/settings: done [cluster] requesting data for debug/settings: received response... [cluster] requesting data for debug/settings: writing binary output: debug/settings.json... +[cluster] requesting data for debug/tenant_ranges... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... +[cluster] requesting data for debug/tenant_ranges: done +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: received response... [cluster] requesting liveness... [cluster] requesting liveness: converting to JSON... [cluster] requesting liveness: done diff --git a/pkg/cli/testdata/zip/testzip_tenant b/pkg/cli/testdata/zip/testzip_tenant index e87787880c95..89e96d624498 100644 --- a/pkg/cli/testdata/zip/testzip_tenant +++ b/pkg/cli/testdata/zip/testzip_tenant @@ -17,6 +17,9 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null [cluster] requesting data for debug/reports/problemranges... received response... [cluster] requesting data for debug/reports/problemranges: last request failed: rpc error: ... [cluster] requesting data for debug/reports/problemranges: creating error output: debug/reports/problemranges.json.err.txt... done +[cluster] requesting data for debug/tenant_ranges... received response... +[cluster] requesting data for debug/tenant_ranges: last request failed: rpc error: ... +[cluster] requesting data for debug/tenant_ranges: creating error output: debug/tenant_ranges.json.err.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_contention_events... writing output: debug/crdb_internal.cluster_contention_events.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_distsql_flows... writing output: debug/crdb_internal.cluster_distsql_flows.txt... done [cluster] retrieving SQL data for crdb_internal.cluster_database_privileges... writing output: debug/crdb_internal.cluster_database_privileges.txt... done diff --git a/pkg/cli/zip_cluster_wide.go b/pkg/cli/zip_cluster_wide.go index 9e3424cb57d6..702422cca249 100644 --- a/pkg/cli/zip_cluster_wide.go +++ b/pkg/cli/zip_cluster_wide.go @@ -32,6 +32,7 @@ const ( schemaPrefix = debugBase + "/schema" settingsName = debugBase + "/settings" problemRangesName = reportsPrefix + "/problemranges" + tenantRangesName = debugBase + "/tenant_ranges" ) // makeClusterWideZipRequests defines the zipRequests that are to be @@ -66,6 +67,12 @@ func makeClusterWideZipRequests( }, pathName: problemRangesName, }, + { + fn: func(ctx context.Context) (interface{}, error) { + return status.TenantRanges(ctx, &serverpb.TenantRangesRequest{}) + }, + pathName: tenantRangesName, + }, } } diff --git a/pkg/cli/zip_tenant_test.go b/pkg/cli/zip_tenant_test.go index 7e69de1f1ac1..099fff97a0a6 100644 --- a/pkg/cli/zip_tenant_test.go +++ b/pkg/cli/zip_tenant_test.go @@ -46,6 +46,8 @@ func TestTenantZip(t *testing.T) { StoreSpecs: []base.StoreSpec{{ Path: hostDir, }}, + // TODO(abarganier): Switch to secure mode once underlying infra has been + // updated to support it. See: https://github.com/cockroachdb/cockroach/issues/77173 Insecure: true, TenantArgs: &tenantArgs, }) From 726f83d0a4f43a8264251837df067d4c68f690a8 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Thu, 3 Mar 2022 15:04:21 +0100 Subject: [PATCH 3/3] kvserver: downgrade & augment "slow raft ready" message It doesn't rise up to the level of a `Warning`, rather, it is informational. While I was here, I also added to the message that seeing it could indicate that the node (or storage) is overloaded. Triggered by an internal question[^1] about this message. [^1]: https://cockroachlabs.slack.com/archives/CHKQGKYEM/p1646245983917929 Release justification: low-risk logging improvement. Release note: None --- pkg/kv/kvserver/store_raft.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kv/kvserver/store_raft.go b/pkg/kv/kvserver/store_raft.go index 95215b88bd4b..5bb8d6fd7642 100644 --- a/pkg/kv/kvserver/store_raft.go +++ b/pkg/kv/kvserver/store_raft.go @@ -519,7 +519,7 @@ func (s *Store) processReady(rangeID roachpb.RangeID) { // processing time means we'll have starved local replicas of ticks and // remote replicas will likely start campaigning. if elapsed >= defaultReplicaRaftMuWarnThreshold { - log.Warningf(ctx, "handle raft ready: %.1fs [applied=%d, batches=%d, state_assertions=%d]", + log.Infof(ctx, "handle raft ready: %.1fs [applied=%d, batches=%d, state_assertions=%d]; node might be overloaded", elapsed.Seconds(), stats.entriesProcessed, stats.batchesProcessed, stats.stateAssertions) } }