From bb185044df2bc98df34218816df58fe9f60a624c Mon Sep 17 00:00:00 2001 From: Thomas Hardy Date: Tue, 4 Oct 2022 14:52:41 -0400 Subject: [PATCH] scheduledlogging: shorten TestCaptureIndexUsageStats run time Previously, TestCaptureIndexUsageStats ran through 4 iterations of 20 seconds for a run time of over a minute. This change reduces the run time of the test to under 10 seconds. Release note: None --- pkg/sql/scheduledlogging/BUILD.bazel | 2 - .../captured_index_usage_stats.go | 30 ++--- .../captured_index_usage_stats_test.go | 115 ++++++++++-------- 3 files changed, 80 insertions(+), 67 deletions(-) diff --git a/pkg/sql/scheduledlogging/BUILD.bazel b/pkg/sql/scheduledlogging/BUILD.bazel index 051a04d5cbb5..50484a27d8b5 100644 --- a/pkg/sql/scheduledlogging/BUILD.bazel +++ b/pkg/sql/scheduledlogging/BUILD.bazel @@ -38,9 +38,7 @@ go_test( "//pkg/security/securitytest", "//pkg/server", "//pkg/settings/cluster", - "//pkg/testutils", "//pkg/testutils/serverutils", - "//pkg/testutils/skip", "//pkg/testutils/sqlutils", "//pkg/testutils/testcluster", "//pkg/util/leaktest", diff --git a/pkg/sql/scheduledlogging/captured_index_usage_stats.go b/pkg/sql/scheduledlogging/captured_index_usage_stats.go index 596729eca7c6..d8f0c9d17285 100644 --- a/pkg/sql/scheduledlogging/captured_index_usage_stats.go +++ b/pkg/sql/scheduledlogging/captured_index_usage_stats.go @@ -72,6 +72,9 @@ type CaptureIndexUsageStatsTestingKnobs struct { // scheduled interval in the case that the logging duration exceeds the // default scheduled interval duration. getOverlapDuration func() time.Duration + // onScheduleComplete allows tests to hook into when the current schedule + // is completed to check for the expected logs. + onScheduleComplete func() } // ModuleTestingKnobs implements base.ModuleTestingKnobs interface. @@ -104,17 +107,14 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) durationOnOverlap() time.Durati } func (s *CaptureIndexUsageStatsLoggingScheduler) durationUntilNextInterval() time.Duration { - // If telemetry is disabled, return the disabled interval duration. - if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) { - return telemetryCaptureIndexUsageStatsStatusCheckEnabledInterval.Get(&s.st.SV) - } + loggingDur := s.getLoggingDuration() // If the previous logging operation took longer than or equal to the set // schedule interval, schedule the next interval immediately. - if s.getLoggingDuration() >= telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) { + if loggingDur >= telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) { return s.durationOnOverlap() } // Otherwise, schedule the next interval normally. - return telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) + return telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) - loggingDur } // Start starts the capture index usage statistics logging scheduler. @@ -146,7 +146,15 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) start(ctx context.Context, stop case <-stopper.ShouldQuiesce(): return case <-timer.C: + if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) { + timer.Reset(telemetryCaptureIndexUsageStatsStatusCheckEnabledInterval.Get(&s.st.SV)) + continue + } s.currentCaptureStartTime = timeutil.Now() + err := captureIndexUsageStats(ctx, s.ie, stopper, telemetryCaptureIndexUsageStatsLoggingDelay.Get(&s.st.SV)) + if err != nil { + log.Warningf(ctx, "error capturing index usage stats: %+v", err) + } dur := s.durationUntilNextInterval() if dur < time.Second { // Avoid intervals that are too short, to prevent a hot @@ -154,14 +162,8 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) start(ctx context.Context, stop dur = time.Second } timer.Reset(dur) - - if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) { - continue - } - - err := captureIndexUsageStats(ctx, s.ie, stopper, telemetryCaptureIndexUsageStatsLoggingDelay.Get(&s.st.SV)) - if err != nil { - log.Warningf(ctx, "error capturing index usage stats: %+v", err) + if s.knobs != nil && s.knobs.onScheduleComplete != nil { + s.knobs.onScheduleComplete() } } } diff --git a/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go b/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go index 654006586ee8..e23023119059 100644 --- a/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go +++ b/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go @@ -22,9 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" - "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -66,26 +64,22 @@ func (s *stubDurations) getOverlapDuration() time.Duration { func TestCaptureIndexUsageStats(t *testing.T) { defer leaktest.AfterTest(t)() - - skip.UnderShort(t, "#87772") - sc := log.ScopeWithoutShowLogs(t) defer sc.Close(t) cleanup := logtestutils.InstallTelemetryLogFileSink(sc, t) defer cleanup() + firstScheduleLoggingDuration := 1 * time.Second sd := stubDurations{} - sd.setLoggingDuration(1 * time.Second) - sd.setOverlapDuration(10 * time.Second) - stubScheduleInterval := 20 * time.Second - stubScheduleCheckEnabledInterval := 1 * time.Second + sd.setLoggingDuration(firstScheduleLoggingDuration) + sd.setOverlapDuration(time.Second) + stubScheduleInterval := 2 * time.Second + stubScheduleCheckEnabledInterval := time.Second stubLoggingDelay := 0 * time.Second - // timeBuffer is a short time buffer to account for delays in the schedule - // timings when running tests. The time buffer is smaller than the difference - // between each schedule interval to ensure that there is no overlap. - timeBuffer := 5 * time.Second + // timeBuffer is a short time buffer to account for non-determinism in the logging timings. + timeBuffer := time.Second settings := cluster.MakeTestingClusterSettings() // Configure capture index usage statistics to be disabled. This is to test @@ -103,12 +97,18 @@ func TestCaptureIndexUsageStats(t *testing.T) { // Configure the delay between each emission of index usage stats logs. telemetryCaptureIndexUsageStatsLoggingDelay.Override(context.Background(), &settings.SV, stubLoggingDelay) + scheduleCompleteChan := make(chan struct{}) + s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{ Settings: settings, Knobs: base.TestingKnobs{ CapturedIndexUsageStatsKnobs: &CaptureIndexUsageStatsTestingKnobs{ getLoggingDuration: sd.getLoggingDuration, getOverlapDuration: sd.getOverlapDuration, + onScheduleComplete: func() { + scheduleCompleteChan <- struct{}{} + <-scheduleCompleteChan + }, }, }, }) @@ -157,18 +157,19 @@ func TestCaptureIndexUsageStats(t *testing.T) { expectedTotalNumEntriesInSingleInterval := 8 expectedNumberOfIndividualIndexEntriesInSingleInterval := 1 - // Expect index usage statistics logs once the schedule disabled interval has passed. - // Assert that we have the expected number of total logs and expected number - // of logs for each index. - testutils.SucceedsWithin(t, func() error { - return checkNumTotalEntriesAndNumIndexEntries(t, - expectedIndexNames, - expectedTotalNumEntriesInSingleInterval, - expectedNumberOfIndividualIndexEntriesInSingleInterval, - ) - }, stubScheduleCheckEnabledInterval+timeBuffer) - - // Verify that a second schedule has run after the enabled interval has passed. + // Wait for channel value from end of 1st schedule. + <-scheduleCompleteChan + + // Check the expected number of entries from the 1st schedule. + require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t, + expectedIndexNames, + expectedTotalNumEntriesInSingleInterval, + expectedNumberOfIndividualIndexEntriesInSingleInterval, + scheduleCompleteChan, + ), "error encountered checking the number of total entries and number of index entries") + + scheduleCompleteChan <- struct{}{} + // Expect number of total entries to hold 2 times the number of entries in a // single interval. expectedTotalNumEntriesAfterTwoIntervals := expectedTotalNumEntriesInSingleInterval * 2 @@ -180,18 +181,19 @@ func TestCaptureIndexUsageStats(t *testing.T) { stubLoggingDuration := stubScheduleInterval * 2 sd.setLoggingDuration(stubLoggingDuration) - // Expect index usage statistics logs once the schedule enabled interval has passed. - // Assert that we have the expected number of total logs and expected number - // of logs for each index. - testutils.SucceedsWithin(t, func() error { - return checkNumTotalEntriesAndNumIndexEntries(t, - expectedIndexNames, - expectedTotalNumEntriesAfterTwoIntervals, - expectedNumberOfIndividualIndexEntriesAfterTwoIntervals, - ) - }, stubScheduleInterval+timeBuffer) - - // Verify that a third schedule has run after the overlap duration has passed. + // Wait for channel value from end of 2nd schedule. + <-scheduleCompleteChan + + // Check the expected number of entries from the 2nd schedule. + require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t, + expectedIndexNames, + expectedTotalNumEntriesAfterTwoIntervals, + expectedNumberOfIndividualIndexEntriesAfterTwoIntervals, + scheduleCompleteChan, + ), "error encountered checking the number of total entries and number of index entries") + + scheduleCompleteChan <- struct{}{} + // Expect number of total entries to hold 3 times the number of entries in a // single interval. expectedTotalNumEntriesAfterThreeIntervals := expectedTotalNumEntriesInSingleInterval * 3 @@ -199,18 +201,22 @@ func TestCaptureIndexUsageStats(t *testing.T) { // entries in a single interval. expectedNumberOfIndividualIndexEntriesAfterThreeIntervals := expectedNumberOfIndividualIndexEntriesInSingleInterval * 3 - // Assert that we have the expected number of total logs and expected number - // of logs for each index. - testutils.SucceedsWithin(t, func() error { - return checkNumTotalEntriesAndNumIndexEntries(t, - expectedIndexNames, - expectedTotalNumEntriesAfterThreeIntervals, - expectedNumberOfIndividualIndexEntriesAfterThreeIntervals, - ) - }, sd.getOverlapDuration()+timeBuffer) + // Wait for channel value from end of 3rd schedule. + <-scheduleCompleteChan + + // Check the expected number of entries from the 3rd schedule. + require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t, + expectedIndexNames, + expectedTotalNumEntriesAfterThreeIntervals, + expectedNumberOfIndividualIndexEntriesAfterThreeIntervals, + scheduleCompleteChan, + ), "error encountered checking the number of total entries and number of index entries") + // Stop capturing index usage statistics. telemetryCaptureIndexUsageStatsEnabled.Override(context.Background(), &settings.SV, false) + scheduleCompleteChan <- struct{}{} + // Iterate through entries, ensure that the timestamp difference between each // schedule is as expected. startTimestamp := int64(0) @@ -234,7 +240,7 @@ func TestCaptureIndexUsageStats(t *testing.T) { testData := []time.Duration{ 0 * time.Second, // the difference in number of seconds between first and second schedule - stubScheduleInterval, + stubScheduleInterval - firstScheduleLoggingDuration, // the difference in number of seconds between second and third schedule sd.getOverlapDuration(), } @@ -257,9 +263,9 @@ func TestCaptureIndexUsageStats(t *testing.T) { } actualDuration := time.Duration(currentTimestamp-previousTimestamp) * time.Nanosecond - // Use a time window to afford some non-determinisim in the test. - require.Greater(t, expectedDuration, actualDuration-time.Second) - require.Greater(t, actualDuration+time.Second, expectedDuration) + // Use a time window to afford some non-determinism in the test. + require.Greater(t, expectedDuration, actualDuration-timeBuffer) + require.Greater(t, actualDuration+timeBuffer, expectedDuration) previousTimestamp = currentTimestamp } } @@ -273,7 +279,9 @@ func checkNumTotalEntriesAndNumIndexEntries( expectedIndexNames []string, expectedTotalEntries int, expectedIndividualIndexEntries int, + scheduleCompleteChan chan struct{}, ) error { + log.Flush() // Fetch log entries. entries, err := log.FetchEntriesFromFiles( 0, @@ -283,11 +291,13 @@ func checkNumTotalEntriesAndNumIndexEntries( log.WithMarkedSensitiveData, ) if err != nil { + close(scheduleCompleteChan) return err } // Assert that we have the correct number of entries. if expectedTotalEntries != len(entries) { + close(scheduleCompleteChan) return errors.Newf("expected %d total entries, got %d", expectedTotalEntries, len(entries)) } @@ -305,6 +315,7 @@ func checkNumTotalEntriesAndNumIndexEntries( } err := json.Unmarshal([]byte(e.Message), &s) if err != nil { + close(scheduleCompleteChan) t.Fatal(err) } countByIndex[s.IndexName] = countByIndex[s.IndexName] + 1 @@ -313,11 +324,13 @@ func checkNumTotalEntriesAndNumIndexEntries( t.Logf("found index counts: %+v", countByIndex) if expected, actual := expectedTotalEntries/expectedIndividualIndexEntries, len(countByIndex); actual != expected { + close(scheduleCompleteChan) return errors.Newf("expected %d indexes, got %d", expected, actual) } for idxName, c := range countByIndex { if c != expectedIndividualIndexEntries { + close(scheduleCompleteChan) return errors.Newf("for index %s: expected entry count %d, got %d", idxName, expectedIndividualIndexEntries, c) } @@ -325,9 +338,9 @@ func checkNumTotalEntriesAndNumIndexEntries( for _, idxName := range expectedIndexNames { if _, ok := countByIndex[idxName]; !ok { + close(scheduleCompleteChan) return errors.Newf("no entry found for index %s", idxName) } } - return nil }