From c248ac64fc3bafb61f6ab8ed40186d680f6dc262 Mon Sep 17 00:00:00 2001 From: Andrii Vorobiov Date: Sun, 2 Jan 2022 21:56:56 +0200 Subject: [PATCH] server: hot ranges api This change implements second version of hot ranges api that's required for UI to represent enhanced Hot Ranges page. Before, Hot ranges (under Advanced debugging page) used former version of HotRanges api that provided information about hot ranges with internal/ sensitive data (see: https://github.com/cockroachdb/cockroach/issues/53212) that should not be exposed to users. Now, in addition to existing endpoint, additional one is implemented that is based on a former version and provides hot ranges information that is only needed for Hot Ranges page (range id, qps, table, db, and index names for particular range). The list of hot ranges and their QPS is provided by `HotRanges` service and then information like DB, table and index names are retrieved from range's `StartKey` that might include this info or not (in case if it's meta range, or range that stores index itself for instance). `HotRange` and `HotRangeV2` services expect the same request type as an argument but return different responses. `HotRangeV2` service returns a flat list of hot ranges instead of grouped ranges per node/store. Release note: None server: add leaseholder node id to hot ranges api Current change extends `statuspb.HotRangesResponse` to include `LeaseholderNodeID` field to indicate the node id that contains leaseholder replica for current hot range. This change was made in `localHotRanges` function (that is used by `HotRanges` that in turn used by `HotRangeV2` service) to reuse existing logic of iteration over the stores and querying hot ranges. It extends its response by `LeaseholderNodeID` field. Otherwise, the same logic should be implemented in `HotRangeV2` service by calling `VisitStores` iterator. Release note: None Release justification: bug fixes and low-risk updates to new functionality --- docs/generated/http/full.md | 66 +++++++++++++ docs/generated/http/hotranges-other.md | 1 + pkg/server/BUILD.bazel | 1 + pkg/server/serverpb/status.proto | 52 +++++++++++ pkg/server/status.go | 92 +++++++++++++++++++ pkg/server/status_test.go | 25 +++++ .../views/reports/containers/debug/index.tsx | 8 +- 7 files changed, 241 insertions(+), 4 deletions(-) diff --git a/docs/generated/http/full.md b/docs/generated/http/full.md index 3059c4f009ec..9e410296d420 100644 --- a/docs/generated/http/full.md +++ b/docs/generated/http/full.md @@ -3209,6 +3209,72 @@ target node(s) selected in a HotRangesRequest. | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.server.serverpb.HotRangesResponse-cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#cockroach.server.serverpb.HotRangesResponse-double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponse-int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | + + + + + + +## HotRangesV2 + +`GET /_status/v2/hotranges` + + + +Support status: [reserved](#support-status) + +#### Request Parameters + + + + +HotRangesRequest queries one or more cluster nodes for a list +of ranges currently considered “hot” by the node(s). + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| node_id | [string](#cockroach.server.serverpb.HotRangesRequest-string) | | NodeID indicates which node to query for a hot range report. It is possible to populate any node ID; if the node receiving the request is not the target node, it will forward the request to the target node.

If left empty, the request is forwarded to every node in the cluster. | [alpha](#support-status) | + + + + + + + +#### Response Parameters + + + + + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| ranges | [HotRangesResponseV2.HotRange](#cockroach.server.serverpb.HotRangesResponseV2-cockroach.server.serverpb.HotRangesResponseV2.HotRange) | repeated | ranges contain list of hot ranges info that has highest number of QPS | [reserved](#support-status) | + + + + + + + +#### HotRangesResponseV2.HotRange + + + +| Field | Type | Label | Description | Support status | +| ----- | ---- | ----- | ----------- | -------------- | +| range_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | range_id indicates Range ID that's identified as hot range | [reserved](#support-status) | +| node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | node_id indicates on node that contains current hot range | [reserved](#support-status) | +| qps | [double](#cockroach.server.serverpb.HotRangesResponseV2-double) | | qps (queries per second) shows the amount of queries that interact with current range | [reserved](#support-status) | +| table_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | table_name indicates table which data is stored in this hot range | [reserved](#support-status) | +| database_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | database_name indicates on database that has current hot range | [reserved](#support-status) | +| index_name | [string](#cockroach.server.serverpb.HotRangesResponseV2-string) | | index_name indicates the index name for current range | [reserved](#support-status) | +| replica_node_ids | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | repeated | replica_node_ids specifies the list of node ids that contain replicas with current hot range | [reserved](#support-status) | +| leaseholder_node_id | [int32](#cockroach.server.serverpb.HotRangesResponseV2-int32) | | leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder | [reserved](#support-status) | diff --git a/docs/generated/http/hotranges-other.md b/docs/generated/http/hotranges-other.md index 55397521a649..7d5f0b096b97 100644 --- a/docs/generated/http/hotranges-other.md +++ b/docs/generated/http/hotranges-other.md @@ -62,5 +62,6 @@ Support status: [alpha](#support-status) | ----- | ---- | ----- | ----------- | -------------- | | desc | [cockroach.roachpb.RangeDescriptor](#cockroach.roachpb.RangeDescriptor) | | Desc is the descriptor of the range for which the report was produced.

TODO(knz): This field should be removed. See: https://github.com/cockroachdb/cockroach/issues/53212 | [reserved](#support-status) | | queries_per_second | [double](#double) | | QueriesPerSecond is the recent number of queries per second on this range. | [alpha](#support-status) | +| leaseholder_node_id | [int32](#int32) | | LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder | [reserved](#support-status) | diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index f58762595581..e970f1f1bc24 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -142,6 +142,7 @@ go_library( "//pkg/spanconfig/spanconfigsqltranslator", "//pkg/spanconfig/spanconfigsqlwatcher", "//pkg/sql", + "//pkg/sql/catalog", "//pkg/sql/catalog/bootstrap", "//pkg/sql/catalog/catalogkeys", "//pkg/sql/catalog/catconstants", diff --git a/pkg/server/serverpb/status.proto b/pkg/server/serverpb/status.proto index a45a6d50d078..57c40d4d3d96 100644 --- a/pkg/server/serverpb/status.proto +++ b/pkg/server/serverpb/status.proto @@ -1140,6 +1140,12 @@ message HotRangesResponse { // on this range. // API: PUBLIC ALPHA double queries_per_second = 2; + // LeaseholderNodeID indicates on Node ID that contains replica that is leaseholder + int32 leaseholder_node_id = 3 [ + (gogoproto.customname) = "LeaseholderNodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; } // StoreResponse contains the part of a hot ranges report that @@ -1188,6 +1194,45 @@ message HotRangesResponse { ]; } +message HotRangesResponseV2 { + message HotRange { + // range_id indicates Range ID that's identified as hot range + int32 range_id = 1 [ + (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID", + (gogoproto.customname) = "RangeID" + ]; + // node_id indicates on node that contains current hot range + int32 node_id = 2 [ + (gogoproto.customname) = "NodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + // qps (queries per second) shows the amount of queries that interact with current range + double qps = 3 [ + (gogoproto.customname) = "QPS" + ]; + // table_name indicates table which data is stored in this hot range + string table_name = 4; + // database_name indicates on database that has current hot range + string database_name = 5; + // index_name indicates the index name for current range + string index_name = 6; + // replica_node_ids specifies the list of node ids that contain replicas with current hot range + repeated int32 replica_node_ids = 7 [ + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + // leaseholder_node_id indicates on Node ID that contains replica that is a leaseholder + int32 leaseholder_node_id = 8 [ + (gogoproto.customname) = "LeaseholderNodeID", + (gogoproto.casttype) = + "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID" + ]; + } + // ranges contain list of hot ranges info that has highest number of QPS + repeated HotRange ranges = 1; +} + message RangeRequest { int64 range_id = 1; } @@ -1874,6 +1919,13 @@ service Status { get : "/_status/hotranges" }; } + + rpc HotRangesV2(HotRangesRequest) returns (HotRangesResponseV2) { + option (google.api.http) = { + get : "/_status/v2/hotranges" + }; + } + rpc Range(RangeRequest) returns (RangeResponse) { option (google.api.http) = { get : "/_status/range/{range_id}" diff --git a/pkg/server/status.go b/pkg/server/status.go index 1f8d7c9b01cb..40e89f5c72ec 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -49,7 +49,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql" + "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants" + "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" "github.com/cockroachdb/cockroach/pkg/sql/contention" "github.com/cockroachdb/cockroach/pkg/sql/contentionpb" "github.com/cockroachdb/cockroach/pkg/sql/flowinfra" @@ -2105,6 +2107,92 @@ func (s *statusServer) HotRanges( return response, nil } +func (s *statusServer) HotRangesV2( + ctx context.Context, req *serverpb.HotRangesRequest, +) (*serverpb.HotRangesResponseV2, error) { + resp, err := s.HotRanges(ctx, req) + if err != nil { + return nil, err + } + + dbNames := make(map[uint32]string) + tableNames := make(map[uint32]string) + indexNames := make(map[uint32]map[uint32]string) + parents := make(map[uint32]uint32) + + var descrs []catalog.Descriptor + if err = s.sqlServer.distSQLServer.CollectionFactory.Txn( + ctx, s.sqlServer.internalExecutor, s.db, + func(ctx context.Context, txn *kv.Txn, descriptors *descs.Collection) error { + all, err := descriptors.GetAllDescriptors(ctx, txn) + if err != nil { + return err + } + descrs = all.OrderedDescriptors() + return nil + }); err != nil { + return nil, err + } + + for _, desc := range descrs { + id := uint32(desc.GetID()) + switch desc := desc.(type) { + case catalog.TableDescriptor: + parents[id] = uint32(desc.GetParentID()) + tableNames[id] = desc.GetName() + indexNames[id] = make(map[uint32]string) + for _, idx := range desc.AllIndexes() { + indexNames[id][uint32(idx.GetID())] = idx.GetName() + } + case catalog.DatabaseDescriptor: + dbNames[id] = desc.GetName() + } + } + + var ranges []*serverpb.HotRangesResponseV2_HotRange + // TODO (koorosh): how to flatten triple nested loop? + for nodeID, hr := range resp.HotRangesByNodeID { + for _, store := range hr.Stores { + for _, r := range store.HotRanges { + var ( + dbName, tableName, indexName string + replicaNodeIDs []roachpb.NodeID + ) + _, tableID, err := s.sqlServer.execCfg.Codec.DecodeTablePrefix(r.Desc.StartKey.AsRawKey()) + if err != nil { + continue + } + parent := parents[tableID] + if parent != 0 { + tableName = tableNames[tableID] + dbName = dbNames[parent] + } else { + dbName = dbNames[tableID] + } + _, _, idxID, err := s.sqlServer.execCfg.Codec.DecodeIndexPrefix(r.Desc.StartKey.AsRawKey()) + if err == nil { + indexName = indexNames[tableID][idxID] + } + for _, repl := range r.Desc.Replicas().Descriptors() { + replicaNodeIDs = append(replicaNodeIDs, repl.NodeID) + } + ranges = append(ranges, &serverpb.HotRangesResponseV2_HotRange{ + RangeID: r.Desc.RangeID, + NodeID: nodeID, + QPS: r.QueriesPerSecond, + TableName: tableName, + DatabaseName: dbName, + IndexName: indexName, + ReplicaNodeIds: replicaNodeIDs, + LeaseholderNodeID: r.LeaseholderNodeID, + }) + } + } + } + + return &serverpb.HotRangesResponseV2{Ranges: ranges}, nil +} + func (s *statusServer) localHotRanges(ctx context.Context) serverpb.HotRangesResponse_NodeResponse { var resp serverpb.HotRangesResponse_NodeResponse err := s.stores.VisitStores(func(store *kvserver.Store) error { @@ -2114,6 +2202,10 @@ func (s *statusServer) localHotRanges(ctx context.Context) serverpb.HotRangesRes HotRanges: make([]serverpb.HotRangesResponse_HotRange, len(ranges)), } for i, r := range ranges { + replica, err := store.GetReplica(r.Desc.GetRangeID()) + if err == nil { + storeResp.HotRanges[i].LeaseholderNodeID = replica.State(ctx).Lease.Replica.NodeID + } storeResp.HotRanges[i].Desc = *r.Desc storeResp.HotRanges[i].QueriesPerSecond = r.QPS } diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index 0e3dd78617f5..ae14b903ea4b 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -1102,6 +1102,31 @@ func TestHotRangesResponse(t *testing.T) { } } +func TestHotRanges2Response(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ts := startServer(t) + defer ts.Stopper().Stop(context.Background()) + + var hotRangesResp serverpb.HotRangesResponseV2 + if err := getStatusJSONProto(ts, "v2/hotranges", &hotRangesResp); err != nil { + t.Fatal(err) + } + if len(hotRangesResp.Ranges) == 0 { + t.Fatalf("didn't get hot range responses from any nodes") + } + lastQPS := math.MaxFloat64 + for _, r := range hotRangesResp.Ranges { + if r.RangeID == 0 { + t.Errorf("unexpected empty range id: %d", r.RangeID) + } + if r.QPS > lastQPS { + t.Errorf("unexpected increase in qps between ranges; prev=%.2f, current=%.2f", lastQPS, r.QPS) + } + lastQPS = r.QPS + } +} + func TestRangesResponse(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx index 2b57bd4616d3..c6d2d4bf8442 100644 --- a/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/reports/containers/debug/index.tsx @@ -505,13 +505,13 @@ export default function Debug() {