Skip to content

Commit

Permalink
Merge #108852
Browse files Browse the repository at this point in the history
108852: server, sql: span stats returns logical MVCC stats r=zachlite a=zachlite

Previously, the MVCC stats returned by span stats were mistakenly accumulated from each node. This commit fixes that, and adds clarifying comments.

Resolves: #108766 Epic: CRDB-30635
Release note (bug fix): A bug was fixed where a SpanStatsRequest would return post-replicated MVCC stats. Now, a SpanStatsRequest returns the logical MVCC stats for the requested span.

Co-authored-by: zachlite <[email protected]>
  • Loading branch information
craig[bot] and zachlite committed Aug 21, 2023
2 parents 1679936 + 3f3074f commit eb78af2
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 18 deletions.
7 changes: 0 additions & 7 deletions pkg/roachpb/span_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,3 @@ var RangeDescPageSize = settings.RegisterIntSetting(
100,
settings.IntInRange(5, 25000),
)

func (m *SpanStats) Add(other *SpanStats) {
m.TotalStats.Add(other.TotalStats)
m.ApproximateDiskBytes += other.ApproximateDiskBytes
m.RemoteFileBytes += other.RemoteFileBytes
m.ExternalFileBytes += other.ExternalFileBytes
}
6 changes: 5 additions & 1 deletion pkg/roachpb/span_stats.proto
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ message SpanStatsRequest {
}

message SpanStats {
// TotalStats are the logical MVCC stats for the requested span.
cockroach.storage.enginepb.MVCCStats total_stats = 1 [(gogoproto.nullable) = false];
// range_count measures the number of ranges that the request span falls within.
// A SpanStatsResponse for a span that lies within a range, and whose start
Expand All @@ -46,15 +47,18 @@ message SpanStats {
// ApproximateDiskBytes is the approximate size "on-disk" in all files of the
// data in the span. NB; this *includes* files stored remotely, not just on
// _local_ disk; see the RemoteFileBytes field below.
// It represents a physical value across all replicas.
// NB: The explicit jsontag prevents 'omitempty` from being added by default.
uint64 approximate_disk_bytes = 3 [(gogoproto.jsontag) = "approximate_disk_bytes"];

// RemoteFileBytes is the subset of ApproximateDiskBytes which are stored in
// "remote" files (i.e. shared files and external files).
// "remote" files (i.e. shared files and external files). It represents a
// physical value across all replicas.
uint64 remote_file_bytes = 5;

// ExternalFileBytes is the subset of RemoteFileBytes that are in "external"
// files (not written/owned by this cluster, such as in restored backups).
// It represents a physical value across all replicas.
uint64 external_file_bytes = 6;

// NEXT ID: 7.
Expand Down
21 changes: 17 additions & 4 deletions pkg/server/span_stats_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func (s *systemStatusServer) spanStatsFanOut(
res.SpanToStats[sp.String()] = &roachpb.SpanStats{}
}

responses := make(map[string]struct{})

spansPerNode, err := s.getSpansPerNode(ctx, req)
if err != nil {
return nil, err
Expand Down Expand Up @@ -101,13 +103,24 @@ func (s *systemStatusServer) spanStatsFanOut(

nodeResponse := resp.(*roachpb.SpanStatsResponse)

// Values of ApproximateDiskBytes, RemoteFileBytes, and ExternalFileBytes should be physical values, but
// TotalStats (MVCC stats) should be the logical, pre-replicated value.
// Note: This implementation can return arbitrarily stale values, because instead of getting
// MVCC stats from the leaseholder, MVCC stats are taken from the node that responded first.
// See #108779.
for spanStr, spanStats := range nodeResponse.SpanToStats {
// We are not counting replicas, so only consider range count
// if it has not been set.
if res.SpanToStats[spanStr].RangeCount == 0 {
// Accumulate physical values across all replicas:
res.SpanToStats[spanStr].ApproximateDiskBytes += spanStats.ApproximateDiskBytes
res.SpanToStats[spanStr].RemoteFileBytes += spanStats.RemoteFileBytes
res.SpanToStats[spanStr].ExternalFileBytes += spanStats.ExternalFileBytes

// Logical values: take the values from the node that responded first.
// TODO: This should really be read from the leaseholder.
if _, ok := responses[spanStr]; !ok {
res.SpanToStats[spanStr].TotalStats = spanStats.TotalStats
res.SpanToStats[spanStr].RangeCount = spanStats.RangeCount
responses[spanStr] = struct{}{}
}
res.SpanToStats[spanStr].Add(spanStats)
}
}

Expand Down
12 changes: 6 additions & 6 deletions pkg/server/span_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,12 @@ func TestSpanStatsFanOut(t *testing.T) {
}

testCases := []testCase{
{spans[0], 4, int64(numNodes * 6)},
{spans[1], 1, int64(numNodes * 3)},
{spans[2], 2, int64(numNodes * 5)},
{spans[3], 2, int64(numNodes * 1)},
{spans[4], 2, int64(numNodes * 3)},
{spans[5], 1, int64(numNodes * 2)},
{spans[0], 4, int64(6)},
{spans[1], 1, int64(3)},
{spans[2], 2, int64(5)},
{spans[3], 2, int64(1)},
{spans[4], 2, int64(3)},
{spans[5], 1, int64(2)},
}

testutils.SucceedsSoon(t, func() error {
Expand Down

0 comments on commit eb78af2

Please sign in to comment.