diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index e5b1504101c0..3aecf3fe1a0c 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -58,6 +58,7 @@ server.clock.persist_upper_bound_interval duration 0s the interval between persi server.eventlog.enabled boolean true if set, logged notable events are also stored in the table system.eventlog tenant-rw server.eventlog.ttl duration 2160h0m0s if nonzero, entries in system.eventlog older than this duration are periodically purged tenant-rw server.host_based_authentication.configuration string host-based authentication configuration to use during connection authentication tenant-rw +server.hot_ranges.node.timeout duration 5m0s the duration allowed for a single node to return hot range data before the request is cancelled; if set to 0, there is no timeout tenant-rw server.hsts.enabled boolean false if true, HSTS headers will be sent along with all HTTP requests. The headers will contain a max-age setting of one year. Browsers honoring the header will always use HTTPS to access the DB Console. Ensure that TLS is correctly configured prior to enabling. tenant-rw server.identity_map.configuration string system-identity to database-username mappings tenant-rw server.log_gc.max_deletions_per_cycle integer 1000 the maximum number of entries to delete on each purge of log-like system tables tenant-rw diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 2cb7dd3d216b..0be1e6e3d762 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -88,6 +88,7 @@
server.eventlog.enabled
booleantrueif set, logged notable events are also stored in the table system.eventlogServerless/Dedicated/Self-Hosted
server.eventlog.ttl
duration2160h0m0sif nonzero, entries in system.eventlog older than this duration are periodically purgedServerless/Dedicated/Self-Hosted
server.host_based_authentication.configuration
stringhost-based authentication configuration to use during connection authenticationServerless/Dedicated/Self-Hosted +
server.hot_ranges.node.timeout
duration5m0sthe duration allowed for a single node to return hot range data before the request is cancelled; if set to 0, there is no timeoutServerless/Dedicated/Self-Hosted
server.hsts.enabled
booleanfalseif true, HSTS headers will be sent along with all HTTP requests. The headers will contain a max-age setting of one year. Browsers honoring the header will always use HTTPS to access the DB Console. Ensure that TLS is correctly configured prior to enabling.Serverless/Dedicated/Self-Hosted
server.identity_map.configuration
stringsystem-identity to database-username mappingsServerless/Dedicated/Self-Hosted
server.log_gc.max_deletions_per_cycle
integer1000the maximum number of entries to delete on each purge of log-like system tablesServerless/Dedicated/Self-Hosted diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index eea4f5485b35..9c064a3ab809 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -27,6 +27,7 @@ go_library( "fanout_clients.go", "grpc_gateway.go", "grpc_server.go", + "hot_ranges.go", "import_ts.go", "index_usage_stats.go", "init.go", diff --git a/pkg/server/api_v2_ranges.go b/pkg/server/api_v2_ranges.go index a6589bcbe511..ea146f5323a1 100644 --- a/pkg/server/api_v2_ranges.go +++ b/pkg/server/api_v2_ranges.go @@ -566,8 +566,9 @@ func (a *apiV2Server) listHotRanges(w http.ResponseWriter, r *http.Request) { }) } + timeout := HotRangesNodeTimeout.Get(&a.status.st.SV) next, err := a.status.paginatedIterateNodes( - ctx, "hot ranges", limit, start, requestedNodes, dialFn, + ctx, "hot ranges", limit, start, requestedNodes, timeout, dialFn, nodeFn, responseFn, errorFn) if err != nil { diff --git a/pkg/server/hot_ranges.go b/pkg/server/hot_ranges.go new file mode 100644 index 000000000000..511a18ebeb39 --- /dev/null +++ b/pkg/server/hot_ranges.go @@ -0,0 +1,25 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package server + +import ( + "time" + + "github.com/cockroachdb/cockroach/pkg/settings" +) + +var HotRangesNodeTimeout = settings.RegisterDurationSetting( + settings.TenantWritable, + "server.hot_ranges.node.timeout", + "the duration allowed for a single node to return hot range data before the request is cancelled; if set to 0, there is no timeout", + time.Minute*5, + settings.NonNegativeDuration, +).WithPublic() diff --git a/pkg/server/pagination.go b/pkg/server/pagination.go index ce7a7f075755..6f734035a6b1 100644 --- a/pkg/server/pagination.go +++ b/pkg/server/pagination.go @@ -23,6 +23,7 @@ import ( "strings" "sync" "sync/atomic" + "time" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" @@ -323,10 +324,14 @@ func (r *rpcNodePaginator) init() { r.responseChan = make(chan paginatedNodeResponse, r.numNodes) } +const noTimeout time.Duration = 0 + // queryNode queries the given node, and sends the responses back through responseChan // in order of idx (i.e. when all nodes with a lower idx have already sent theirs). // Safe for concurrent use. -func (r *rpcNodePaginator) queryNode(ctx context.Context, nodeID roachpb.NodeID, idx int) { +func (r *rpcNodePaginator) queryNode( + ctx context.Context, nodeID roachpb.NodeID, idx int, timeout time.Duration, +) { if atomic.LoadInt32(&r.done) != 0 { // There are more values than we need. currentLen >= limit. return @@ -379,7 +384,18 @@ func (r *rpcNodePaginator) queryNode(ctx context.Context, nodeID roachpb.NodeID, return } - res, err := r.nodeFn(ctx, client, nodeID) + var res interface{} + var err error + if timeout == noTimeout { + res, err = r.nodeFn(ctx, client, nodeID) + } else { + err = timeutil.RunWithTimeout(ctx, "node fn", timeout, func(ctx context.Context) error { + var _err error + res, _err = r.nodeFn(ctx, client, nodeID) + return _err + }) + } + if err != nil { err = errors.Wrapf(err, "error requesting %s from node %d (%s)", r.errorCtx, nodeID, r.nodeStatuses[serverID(nodeID)]) diff --git a/pkg/server/pagination_test.go b/pkg/server/pagination_test.go index 7fb21eeb500a..912a24b0bb01 100644 --- a/pkg/server/pagination_test.go +++ b/pkg/server/pagination_test.go @@ -19,9 +19,11 @@ import ( "testing" "time" + "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" + "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/datadriven" @@ -389,7 +391,7 @@ func TestRPCPaginator(t *testing.T) { // Issue requests in parallel. for idx, nodeID := range nodesToQuery { go func(nodeID roachpb.NodeID, idx int) { - paginator.queryNode(ctx, nodeID, idx) + paginator.queryNode(ctx, nodeID, idx, noTimeout) }(nodeID, idx) } @@ -412,3 +414,54 @@ func TestRPCPaginator(t *testing.T) { } } + +func TestRPCPaginatorWithTimeout(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + server, _, _ := serverutils.StartServer(t, base.TestServerArgs{}) + defer server.Stopper().Stop(ctx) + + s := server.StatusServer().(*systemStatusServer) + + dialFn := func(ctx context.Context, nodeID roachpb.NodeID) (interface{}, error) { + client, err := s.dialNode(ctx, nodeID) + return client, err + } + nodeFn := func(ctx context.Context, client interface{}, nodeID roachpb.NodeID) (interface{}, error) { + select { + case <-time.After(time.Second * 10): + case <-ctx.Done(): + break + } + + // Return an error that mimics the error returned when a rpc's context is cancelled: + return nil, errors.New("some error") + } + responseFn := func(nodeID roachpb.NodeID, resp interface{}) { + // noop + } + + var timeoutError error + errorFn := func(nodeID roachpb.NodeID, err error) { + timeoutError = err + log.Infof(ctx, "error from node %d: %v", nodeID, err) + } + + pagState := paginationState{} + + _, _ = s.paginatedIterateNodes( + ctx, + "test-paginate-with-timeout", + 1000, + pagState, + []roachpb.NodeID{}, + time.Second*2, + dialFn, + nodeFn, + responseFn, + errorFn, + ) + + require.ErrorContains(t, timeoutError, "operation \"node fn\" timed out") +} diff --git a/pkg/server/status.go b/pkg/server/status.go index af46ff780eac..12bb8a8d1998 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -2809,8 +2809,9 @@ func (s *systemStatusServer) HotRangesV2( response.ErrorsByNodeID[nodeID] = err.Error() } + timeout := HotRangesNodeTimeout.Get(&s.st.SV) next, err := s.paginatedIterateNodes( - ctx, "hotRanges", size, start, requestedNodes, dialFn, + ctx, "hotRanges", size, start, requestedNodes, timeout, dialFn, nodeFn, responseFn, errorFn) if err != nil { @@ -3087,12 +3088,14 @@ func (s *statusServer) iterateNodes( // and nodeError on every error result. It returns the next `limit` results // after `start`. If `requestedNodes` is specified and non-empty, iteration is // only done on that subset of nodes in addition to any nodes already in pagState. +// If non-zero, nodeFn will run with a timeout specified by nodeFnTimeout. func (s *statusServer) paginatedIterateNodes( ctx context.Context, errorCtx string, limit int, pagState paginationState, requestedNodes []roachpb.NodeID, + nodeFnTimeout time.Duration, dialFn func(ctx context.Context, nodeID roachpb.NodeID) (interface{}, error), nodeFn func(ctx context.Context, client interface{}, nodeID roachpb.NodeID) (interface{}, error), responseFn func(nodeID roachpb.NodeID, resp interface{}), @@ -3154,7 +3157,9 @@ func (s *statusServer) paginatedIterateNodes( Sem: sem, WaitForSem: true, }, - func(ctx context.Context) { paginator.queryNode(ctx, nodeID, idx) }, + func(ctx context.Context) { + paginator.queryNode(ctx, nodeID, idx, nodeFnTimeout) + }, ); err != nil { return pagState, err } @@ -3208,7 +3213,7 @@ func (s *statusServer) listSessionsHelper( var err error var pagState paginationState if pagState, err = s.paginatedIterateNodes( - ctx, "session list", limit, start, nil, dialFn, nodeFn, responseFn, errorFn); err != nil { + ctx, "session list", limit, start, nil, noTimeout, dialFn, nodeFn, responseFn, errorFn); err != nil { err := serverpb.ListSessionsError{Message: err.Error()} response.Errors = append(response.Errors, err) } diff --git a/pkg/ui/workspaces/db-console/src/views/hotRanges/index.tsx b/pkg/ui/workspaces/db-console/src/views/hotRanges/index.tsx index 9a0d197e0472..da68d2417fd6 100644 --- a/pkg/ui/workspaces/db-console/src/views/hotRanges/index.tsx +++ b/pkg/ui/workspaces/db-console/src/views/hotRanges/index.tsx @@ -55,16 +55,6 @@ const HotRangesPage = () => { } }, [dispatch, isValid]); - useEffect(() => { - dispatch( - refreshHotRanges( - new HotRangesRequest({ - page_size: 1000, - }), - ), - ); - }, [dispatch]); - const [filteredHotRanges, setFilteredHotRanges] = useState(hotRanges);