Skip to content

Commit

Permalink
server, statusccl: de-flake span stats fan-out tests
Browse files Browse the repository at this point in the history
This commit fixes flaky behavior while running
`TestSpanStatsFanOut` and `TestTenantSpanStats` under stress.

Both tests have been updated to ensure the following behavior:
- The tests make sure range splits occur before proceeding.
- The tests will retry their assertions to give the new key-value pairs
time to replicate.

Additionally, `NewTestTenantHelper` was updated to accept a parameter
for the number of nodes in the test host cluster. `TestTenantSpanStats`
now uses a 3-node cluster to test a real fan-out.

Resolves #99770
Resolves #99559
Epic:none
Release note: None
  • Loading branch information
zachlite committed Jul 11, 2023
1 parent e2af0b8 commit f1bc66e
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 77 deletions.
71 changes: 35 additions & 36 deletions pkg/ccl/serverccl/statusccl/tenant_status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ func TestTenantStatusAPI(t *testing.T) {
})

t.Run("tenant_span_stats", func(t *testing.T) {
skip.UnderStressWithIssue(t, 99559)
skip.UnderDeadlockWithIssue(t, 99770)
testTenantSpanStats(ctx, t, testHelper)
})

Expand Down Expand Up @@ -212,18 +210,22 @@ func testTenantSpanStats(ctx context.Context, t *testing.T, helper serverccl.Ten
return bytes.Join(keys, nil)
}

controlStats, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
Spans: []roachpb.Span{aSpan},
})
require.NoError(t, err)

// Create a new range in this tenant.
_, _, err = helper.HostCluster().Server(0).SplitRange(makeKey(tPrefix, roachpb.Key("c")))
newRangeKey := makeKey(tPrefix, roachpb.Key("c"))
_, newDesc, err := helper.HostCluster().Server(0).SplitRange(newRangeKey)
require.NoError(t, err)

// Wait for the split to finish and propagate.
// Wait until the range split occurs.
testutils.SucceedsSoon(t, func() error {
desc, err := helper.HostCluster().LookupRange(newRangeKey)
require.NoError(t, err)
if !desc.StartKey.Equal(newDesc.StartKey) {
return errors.New("range has not split")
}
return nil
})

// Wait for the new range to replicate.
err = helper.HostCluster().WaitForFullReplication()
require.NoError(t, err)

Expand All @@ -237,19 +239,6 @@ func testTenantSpanStats(ctx context.Context, t *testing.T, helper serverccl.Ten
}
}

stats, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
Spans: []roachpb.Span{aSpan},
})

require.NoError(t, err)

controlSpanStats := controlStats.SpanToStats[aSpan.String()]
testSpanStats := stats.SpanToStats[aSpan.String()]
require.Equal(t, controlSpanStats.RangeCount+1, testSpanStats.RangeCount)
require.Equal(t, controlSpanStats.TotalStats.LiveCount+int64(len(incKeys)), testSpanStats.TotalStats.LiveCount)

// Make a multi-span call
type spanCase struct {
span roachpb.Span
Expand Down Expand Up @@ -301,19 +290,29 @@ func testTenantSpanStats(ctx context.Context, t *testing.T, helper serverccl.Ten
spans = append(spans, sc.span)
}

stats, err = tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
Spans: spans,
})
testutils.SucceedsSoon(t, func() error {
stats, err := tenantA.TenantStatusSrv().(serverpb.TenantStatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0", // 0 indicates we want stats from all nodes.
Spans: spans,
})

require.NoError(t, err)

for _, sc := range spanCases {
spanStats := stats.SpanToStats[sc.span.String()]
if sc.expectedRangeCount != spanStats.RangeCount {
return errors.Newf("mismatch on expected range count for span case with span %v", sc.span.String())
}

if sc.expectedLiveCount != spanStats.TotalStats.LiveCount {
return errors.Newf("mismatch on expected live count for span case with span %v", sc.span.String())
}
}

return nil
})

require.NoError(t, err)
// Check each span has their expected values.
for _, sc := range spanCases {
spanStats := stats.SpanToStats[sc.span.String()]
require.Equal(t, spanStats.RangeCount, sc.expectedRangeCount, fmt.Sprintf("mismatch on expected range count for span case with span %v", sc.span.String()))
require.Equal(t, spanStats.TotalStats.LiveCount, sc.expectedLiveCount, fmt.Sprintf("mismatch on expected live count for span case with span %v", sc.span.String()))
}
})

}
Expand Down
81 changes: 40 additions & 41 deletions pkg/server/span_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ func TestSpanStatsMetaScan(t *testing.T) {
}
}

func TestLocalSpanStats(t *testing.T) {
func TestSpanStatsFanOut(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
skip.UnderStress(t)
ctx := context.Background()
numNodes := 3
const numNodes = 3
tc := testcluster.StartTestCluster(t, numNodes, base.TestClusterArgs{})
defer tc.Stopper().Stop(ctx)

Expand Down Expand Up @@ -149,47 +148,47 @@ func TestLocalSpanStats(t *testing.T) {
{spans[4], 2, int64(numNodes * 3)},
{spans[5], 1, int64(numNodes * 2)},
}
// Multi-span request
multiResult, err := s.StatusServer().(serverpb.StatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0",
Spans: spans,
},
)
require.NoError(t, err)

// Verify stats across different spans.
for _, tcase := range testCases {
rSpan, err := keys.SpanAddr(tcase.span)
require.NoError(t, err)
testutils.SucceedsSoon(t, func() error {

// Assert expected values from multi-span request
spanStats := multiResult.SpanToStats[tcase.span.String()]
require.Equal(
t,
tcase.expectedRanges,
spanStats.RangeCount,
fmt.Sprintf(
"Multi-span: expected %d ranges in span [%s - %s], found %d",
tcase.expectedRanges,
rSpan.Key.String(),
rSpan.EndKey.String(),
spanStats.RangeCount,
),
)
require.Equal(
t,
tcase.expectedKeys,
spanStats.TotalStats.LiveCount,
fmt.Sprintf(
"Multi-span: expected %d keys in span [%s - %s], found %d",
tcase.expectedKeys,
rSpan.Key.String(),
rSpan.EndKey.String(),
spanStats.TotalStats.LiveCount,
),
// Multi-span request
multiResult, err := s.StatusServer().(serverpb.StatusServer).SpanStats(ctx,
&roachpb.SpanStatsRequest{
NodeID: "0",
Spans: spans,
},
)
}
require.NoError(t, err)

// Verify stats across different spans.
for _, tcase := range testCases {
rSpan, err := keys.SpanAddr(tcase.span)
require.NoError(t, err)

// Assert expected values from multi-span request
spanStats := multiResult.SpanToStats[tcase.span.String()]
if tcase.expectedRanges != spanStats.RangeCount {
return errors.Newf("Multi-span: expected %d ranges in span [%s - %s], found %d",
tcase.expectedRanges,
rSpan.Key.String(),
rSpan.EndKey.String(),
spanStats.RangeCount,
)
}
if tcase.expectedKeys != spanStats.TotalStats.LiveCount {
return errors.Newf(
"Multi-span: expected %d keys in span [%s - %s], found %d",
tcase.expectedKeys,
rSpan.Key.String(),
rSpan.EndKey.String(),
spanStats.TotalStats.LiveCount,
)
}
}

return nil
})

}

// BenchmarkSpanStats measures the cost of collecting span statistics.
Expand Down

0 comments on commit f1bc66e

Please sign in to comment.