From d2e63eebe056a3d6494286b6ca722e0b1dd47e80 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 18 Dec 2023 14:53:46 -0800 Subject: [PATCH 1/2] logictest: fix rare flake This commit fixes a rare flake in `TestCCLLogic_crdb_internal` where the test ensures that a newly-created secondary tenant doesn't have a tenant capability by default, then grants that capability, and the in-memory cache is updated accordingly. The problem was that this test uses "can_admin_split" capability for the check, and the test was added before 879bd1c9efb9c4e8f11910dac5db7c01ebf84c79 merged. That patch made it so that "can_admin_split" capability is granted by default, so it made the existing test racy - namely, the race is between the tenant capabilities watcher propagating the grant and the logic test framework querying the in-memory cache. The fix is simple - use a capability that is not granted by default ("can_view_node_info"). Release note: None --- .../testdata/logic_test/crdb_internal | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal index 2e58cad5190a..02074c148e72 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal +++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal @@ -188,21 +188,22 @@ statement ok SELECT crdb_internal.create_tenant(5) # Use retry because source data is eventually consistent. -query ITT colnames,retry -SELECT * FROM crdb_internal.node_tenant_capabilities_cache WHERE capability_name = 'can_admin_split' +query ITT colnames,retry,rowsort +SELECT * FROM crdb_internal.node_tenant_capabilities_cache WHERE capability_name = 'can_view_node_info' ---- -tenant_id capability_name capability_value -1 can_admin_split true +tenant_id capability_name capability_value +1 can_view_node_info true +5 can_view_node_info false statement ok -ALTER TENANT [5] GRANT CAPABILITY can_admin_split +ALTER TENANT [5] GRANT CAPABILITY can_view_node_info # Use retry because source data is eventually consistent. query ITT colnames,retry,rowsort -SELECT * FROM crdb_internal.node_tenant_capabilities_cache WHERE capability_name = 'can_admin_split' +SELECT * FROM crdb_internal.node_tenant_capabilities_cache WHERE capability_name = 'can_view_node_info' ---- tenant_id capability_name capability_value -1 can_admin_split true -5 can_admin_split true +1 can_view_node_info true +5 can_view_node_info true subtest end From 8a6e4e18e6c3575a44ae35708dbc2154f1f4d754 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 18 Dec 2023 16:25:31 -0800 Subject: [PATCH 2/2] kvstreamer: avoid an allocation on the hot path This commit avoids an allocation in `logStatistics` call when we don't have verbose logging enabled (previously, we would always get an allocation in `humanizeutil.IBytes` call). Also this avoids a few atomic loads too. (Noticed this while looking at the CPU profile of one of the production clusters.) Release note: None --- pkg/kv/kvclient/kvstreamer/streamer.go | 38 ++++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pkg/kv/kvclient/kvstreamer/streamer.go b/pkg/kv/kvclient/kvstreamer/streamer.go index a79a1e9efb85..333a496e170f 100644 --- a/pkg/kv/kvclient/kvstreamer/streamer.go +++ b/pkg/kv/kvclient/kvstreamer/streamer.go @@ -932,24 +932,26 @@ func (w *workerCoordinator) mainLoop(ctx context.Context) { // tracing span of the Streamer's user. Some time has been spent to figure it // out but led to no success. This should be cleaned up. func (w *workerCoordinator) logStatistics(ctx context.Context) { - avgResponseSize, _ := w.getAvgResponseSize() - log.VEventf( - ctx, 1, - "enqueueCalls=%d enqueuedRequests=%d enqueuedSingleRangeRequests=%d kvPairsRead=%d "+ - "batchRequestsIssued=%d resumeBatchRequests=%d resumeSingleRangeRequests=%d "+ - "numSpilledResults=%d emptyBatchResponses=%d droppedBatchResponses=%d avgResponseSize=%s", - w.s.enqueueCalls, - w.s.enqueuedRequests, - w.s.enqueuedSingleRangeRequests, - atomic.LoadInt64(w.s.atomics.kvPairsRead), - atomic.LoadInt64(w.s.atomics.batchRequestsIssued), - atomic.LoadInt64(&w.s.atomics.resumeBatchRequests), - atomic.LoadInt64(&w.s.atomics.resumeSingleRangeRequests), - w.s.results.numSpilledResults(), - atomic.LoadInt64(&w.s.atomics.emptyBatchResponses), - atomic.LoadInt64(&w.s.atomics.droppedBatchResponses), - humanizeutil.IBytes(avgResponseSize), - ) + if log.ExpensiveLogEnabled(ctx, 1) { + avgResponseSize, _ := w.getAvgResponseSize() + log.Eventf( + ctx, + "enqueueCalls=%d enqueuedRequests=%d enqueuedSingleRangeRequests=%d kvPairsRead=%d "+ + "batchRequestsIssued=%d resumeBatchRequests=%d resumeSingleRangeRequests=%d "+ + "numSpilledResults=%d emptyBatchResponses=%d droppedBatchResponses=%d avgResponseSize=%s", + w.s.enqueueCalls, + w.s.enqueuedRequests, + w.s.enqueuedSingleRangeRequests, + atomic.LoadInt64(w.s.atomics.kvPairsRead), + atomic.LoadInt64(w.s.atomics.batchRequestsIssued), + atomic.LoadInt64(&w.s.atomics.resumeBatchRequests), + atomic.LoadInt64(&w.s.atomics.resumeSingleRangeRequests), + w.s.results.numSpilledResults(), + atomic.LoadInt64(&w.s.atomics.emptyBatchResponses), + atomic.LoadInt64(&w.s.atomics.droppedBatchResponses), + humanizeutil.IBytes(avgResponseSize), + ) + } } // waitForRequests blocks until there is at least one request to be served.