From bb185044df2bc98df34218816df58fe9f60a624c Mon Sep 17 00:00:00 2001
From: Thomas Hardy <thardy@cockroachlabs.com>
Date: Tue, 4 Oct 2022 14:52:41 -0400
Subject: [PATCH] scheduledlogging: shorten TestCaptureIndexUsageStats run time

Previously, TestCaptureIndexUsageStats ran through 4 iterations of 20
seconds for a run time of over a minute. This change reduces the run
time of the test to under 10 seconds.

Release note: None
---
 pkg/sql/scheduledlogging/BUILD.bazel          |   2 -
 .../captured_index_usage_stats.go             |  30 ++---
 .../captured_index_usage_stats_test.go        | 115 ++++++++++--------
 3 files changed, 80 insertions(+), 67 deletions(-)

diff --git a/pkg/sql/scheduledlogging/BUILD.bazel b/pkg/sql/scheduledlogging/BUILD.bazel
index 051a04d5cbb5..50484a27d8b5 100644
--- a/pkg/sql/scheduledlogging/BUILD.bazel
+++ b/pkg/sql/scheduledlogging/BUILD.bazel
@@ -38,9 +38,7 @@ go_test(
         "//pkg/security/securitytest",
         "//pkg/server",
         "//pkg/settings/cluster",
-        "//pkg/testutils",
         "//pkg/testutils/serverutils",
-        "//pkg/testutils/skip",
         "//pkg/testutils/sqlutils",
         "//pkg/testutils/testcluster",
         "//pkg/util/leaktest",
diff --git a/pkg/sql/scheduledlogging/captured_index_usage_stats.go b/pkg/sql/scheduledlogging/captured_index_usage_stats.go
index 596729eca7c6..d8f0c9d17285 100644
--- a/pkg/sql/scheduledlogging/captured_index_usage_stats.go
+++ b/pkg/sql/scheduledlogging/captured_index_usage_stats.go
@@ -72,6 +72,9 @@ type CaptureIndexUsageStatsTestingKnobs struct {
 	// scheduled interval in the case that the logging duration exceeds the
 	// default scheduled interval duration.
 	getOverlapDuration func() time.Duration
+	// onScheduleComplete allows tests to hook into when the current schedule
+	// is completed to check for the expected logs.
+	onScheduleComplete func()
 }
 
 // ModuleTestingKnobs implements base.ModuleTestingKnobs interface.
@@ -104,17 +107,14 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) durationOnOverlap() time.Durati
 }
 
 func (s *CaptureIndexUsageStatsLoggingScheduler) durationUntilNextInterval() time.Duration {
-	// If telemetry is disabled, return the disabled interval duration.
-	if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) {
-		return telemetryCaptureIndexUsageStatsStatusCheckEnabledInterval.Get(&s.st.SV)
-	}
+	loggingDur := s.getLoggingDuration()
 	// If the previous logging operation took longer than or equal to the set
 	// schedule interval, schedule the next interval immediately.
-	if s.getLoggingDuration() >= telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) {
+	if loggingDur >= telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) {
 		return s.durationOnOverlap()
 	}
 	// Otherwise, schedule the next interval normally.
-	return telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV)
+	return telemetryCaptureIndexUsageStatsInterval.Get(&s.st.SV) - loggingDur
 }
 
 // Start starts the capture index usage statistics logging scheduler.
@@ -146,7 +146,15 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) start(ctx context.Context, stop
 			case <-stopper.ShouldQuiesce():
 				return
 			case <-timer.C:
+				if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) {
+					timer.Reset(telemetryCaptureIndexUsageStatsStatusCheckEnabledInterval.Get(&s.st.SV))
+					continue
+				}
 				s.currentCaptureStartTime = timeutil.Now()
+				err := captureIndexUsageStats(ctx, s.ie, stopper, telemetryCaptureIndexUsageStatsLoggingDelay.Get(&s.st.SV))
+				if err != nil {
+					log.Warningf(ctx, "error capturing index usage stats: %+v", err)
+				}
 				dur := s.durationUntilNextInterval()
 				if dur < time.Second {
 					// Avoid intervals that are too short, to prevent a hot
@@ -154,14 +162,8 @@ func (s *CaptureIndexUsageStatsLoggingScheduler) start(ctx context.Context, stop
 					dur = time.Second
 				}
 				timer.Reset(dur)
-
-				if !telemetryCaptureIndexUsageStatsEnabled.Get(&s.st.SV) {
-					continue
-				}
-
-				err := captureIndexUsageStats(ctx, s.ie, stopper, telemetryCaptureIndexUsageStatsLoggingDelay.Get(&s.st.SV))
-				if err != nil {
-					log.Warningf(ctx, "error capturing index usage stats: %+v", err)
+				if s.knobs != nil && s.knobs.onScheduleComplete != nil {
+					s.knobs.onScheduleComplete()
 				}
 			}
 		}
diff --git a/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go b/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go
index 654006586ee8..e23023119059 100644
--- a/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go
+++ b/pkg/sql/scheduledlogging/captured_index_usage_stats_test.go
@@ -22,9 +22,7 @@ import (
 
 	"github.com/cockroachdb/cockroach/pkg/base"
 	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
-	"github.com/cockroachdb/cockroach/pkg/testutils"
 	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
-	"github.com/cockroachdb/cockroach/pkg/testutils/skip"
 	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
 	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
 	"github.com/cockroachdb/cockroach/pkg/util/log"
@@ -66,26 +64,22 @@ func (s *stubDurations) getOverlapDuration() time.Duration {
 
 func TestCaptureIndexUsageStats(t *testing.T) {
 	defer leaktest.AfterTest(t)()
-
-	skip.UnderShort(t, "#87772")
-
 	sc := log.ScopeWithoutShowLogs(t)
 	defer sc.Close(t)
 
 	cleanup := logtestutils.InstallTelemetryLogFileSink(sc, t)
 	defer cleanup()
 
+	firstScheduleLoggingDuration := 1 * time.Second
 	sd := stubDurations{}
-	sd.setLoggingDuration(1 * time.Second)
-	sd.setOverlapDuration(10 * time.Second)
-	stubScheduleInterval := 20 * time.Second
-	stubScheduleCheckEnabledInterval := 1 * time.Second
+	sd.setLoggingDuration(firstScheduleLoggingDuration)
+	sd.setOverlapDuration(time.Second)
+	stubScheduleInterval := 2 * time.Second
+	stubScheduleCheckEnabledInterval := time.Second
 	stubLoggingDelay := 0 * time.Second
 
-	// timeBuffer is a short time buffer to account for delays in the schedule
-	// timings when running tests. The time buffer is smaller than the difference
-	// between each schedule interval to ensure that there is no overlap.
-	timeBuffer := 5 * time.Second
+	// timeBuffer is a short time buffer to account for non-determinism in the logging timings.
+	timeBuffer := time.Second
 
 	settings := cluster.MakeTestingClusterSettings()
 	// Configure capture index usage statistics to be disabled. This is to test
@@ -103,12 +97,18 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 	// Configure the delay between each emission of index usage stats logs.
 	telemetryCaptureIndexUsageStatsLoggingDelay.Override(context.Background(), &settings.SV, stubLoggingDelay)
 
+	scheduleCompleteChan := make(chan struct{})
+
 	s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{
 		Settings: settings,
 		Knobs: base.TestingKnobs{
 			CapturedIndexUsageStatsKnobs: &CaptureIndexUsageStatsTestingKnobs{
 				getLoggingDuration: sd.getLoggingDuration,
 				getOverlapDuration: sd.getOverlapDuration,
+				onScheduleComplete: func() {
+					scheduleCompleteChan <- struct{}{}
+					<-scheduleCompleteChan
+				},
 			},
 		},
 	})
@@ -157,18 +157,19 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 	expectedTotalNumEntriesInSingleInterval := 8
 	expectedNumberOfIndividualIndexEntriesInSingleInterval := 1
 
-	// Expect index usage statistics logs once the schedule disabled interval has passed.
-	// Assert that we have the expected number of total logs and expected number
-	// of logs for each index.
-	testutils.SucceedsWithin(t, func() error {
-		return checkNumTotalEntriesAndNumIndexEntries(t,
-			expectedIndexNames,
-			expectedTotalNumEntriesInSingleInterval,
-			expectedNumberOfIndividualIndexEntriesInSingleInterval,
-		)
-	}, stubScheduleCheckEnabledInterval+timeBuffer)
-
-	// Verify that a second schedule has run after the enabled interval has passed.
+	// Wait for channel value from end of 1st schedule.
+	<-scheduleCompleteChan
+
+	// Check the expected number of entries from the 1st schedule.
+	require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t,
+		expectedIndexNames,
+		expectedTotalNumEntriesInSingleInterval,
+		expectedNumberOfIndividualIndexEntriesInSingleInterval,
+		scheduleCompleteChan,
+	), "error encountered checking the number of total entries and number of index entries")
+
+	scheduleCompleteChan <- struct{}{}
+
 	// Expect number of total entries to hold 2 times the number of entries in a
 	// single interval.
 	expectedTotalNumEntriesAfterTwoIntervals := expectedTotalNumEntriesInSingleInterval * 2
@@ -180,18 +181,19 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 	stubLoggingDuration := stubScheduleInterval * 2
 	sd.setLoggingDuration(stubLoggingDuration)
 
-	// Expect index usage statistics logs once the schedule enabled interval has passed.
-	// Assert that we have the expected number of total logs and expected number
-	// of logs for each index.
-	testutils.SucceedsWithin(t, func() error {
-		return checkNumTotalEntriesAndNumIndexEntries(t,
-			expectedIndexNames,
-			expectedTotalNumEntriesAfterTwoIntervals,
-			expectedNumberOfIndividualIndexEntriesAfterTwoIntervals,
-		)
-	}, stubScheduleInterval+timeBuffer)
-
-	// Verify that a third schedule has run after the overlap duration has passed.
+	// Wait for channel value from end of 2nd schedule.
+	<-scheduleCompleteChan
+
+	// Check the expected number of entries from the 2nd schedule.
+	require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t,
+		expectedIndexNames,
+		expectedTotalNumEntriesAfterTwoIntervals,
+		expectedNumberOfIndividualIndexEntriesAfterTwoIntervals,
+		scheduleCompleteChan,
+	), "error encountered checking the number of total entries and number of index entries")
+
+	scheduleCompleteChan <- struct{}{}
+
 	// Expect number of total entries to hold 3 times the number of entries in a
 	// single interval.
 	expectedTotalNumEntriesAfterThreeIntervals := expectedTotalNumEntriesInSingleInterval * 3
@@ -199,18 +201,22 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 	// entries in a single interval.
 	expectedNumberOfIndividualIndexEntriesAfterThreeIntervals := expectedNumberOfIndividualIndexEntriesInSingleInterval * 3
 
-	// Assert that we have the expected number of total logs and expected number
-	// of logs for each index.
-	testutils.SucceedsWithin(t, func() error {
-		return checkNumTotalEntriesAndNumIndexEntries(t,
-			expectedIndexNames,
-			expectedTotalNumEntriesAfterThreeIntervals,
-			expectedNumberOfIndividualIndexEntriesAfterThreeIntervals,
-		)
-	}, sd.getOverlapDuration()+timeBuffer)
+	// Wait for channel value from end of 3rd schedule.
+	<-scheduleCompleteChan
+
+	// Check the expected number of entries from the 3rd schedule.
+	require.NoError(t, checkNumTotalEntriesAndNumIndexEntries(t,
+		expectedIndexNames,
+		expectedTotalNumEntriesAfterThreeIntervals,
+		expectedNumberOfIndividualIndexEntriesAfterThreeIntervals,
+		scheduleCompleteChan,
+	), "error encountered checking the number of total entries and number of index entries")
+
 	// Stop capturing index usage statistics.
 	telemetryCaptureIndexUsageStatsEnabled.Override(context.Background(), &settings.SV, false)
 
+	scheduleCompleteChan <- struct{}{}
+
 	// Iterate through entries, ensure that the timestamp difference between each
 	// schedule is as expected.
 	startTimestamp := int64(0)
@@ -234,7 +240,7 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 	testData := []time.Duration{
 		0 * time.Second,
 		// the difference in number of seconds between first and second schedule
-		stubScheduleInterval,
+		stubScheduleInterval - firstScheduleLoggingDuration,
 		// the difference in number of seconds between second and third schedule
 		sd.getOverlapDuration(),
 	}
@@ -257,9 +263,9 @@ func TestCaptureIndexUsageStats(t *testing.T) {
 		}
 
 		actualDuration := time.Duration(currentTimestamp-previousTimestamp) * time.Nanosecond
-		// Use a time window to afford some non-determinisim in the test.
-		require.Greater(t, expectedDuration, actualDuration-time.Second)
-		require.Greater(t, actualDuration+time.Second, expectedDuration)
+		// Use a time window to afford some non-determinism in the test.
+		require.Greater(t, expectedDuration, actualDuration-timeBuffer)
+		require.Greater(t, actualDuration+timeBuffer, expectedDuration)
 		previousTimestamp = currentTimestamp
 	}
 }
@@ -273,7 +279,9 @@ func checkNumTotalEntriesAndNumIndexEntries(
 	expectedIndexNames []string,
 	expectedTotalEntries int,
 	expectedIndividualIndexEntries int,
+	scheduleCompleteChan chan struct{},
 ) error {
+	log.Flush()
 	// Fetch log entries.
 	entries, err := log.FetchEntriesFromFiles(
 		0,
@@ -283,11 +291,13 @@ func checkNumTotalEntriesAndNumIndexEntries(
 		log.WithMarkedSensitiveData,
 	)
 	if err != nil {
+		close(scheduleCompleteChan)
 		return err
 	}
 
 	// Assert that we have the correct number of entries.
 	if expectedTotalEntries != len(entries) {
+		close(scheduleCompleteChan)
 		return errors.Newf("expected %d total entries, got %d", expectedTotalEntries, len(entries))
 	}
 
@@ -305,6 +315,7 @@ func checkNumTotalEntriesAndNumIndexEntries(
 		}
 		err := json.Unmarshal([]byte(e.Message), &s)
 		if err != nil {
+			close(scheduleCompleteChan)
 			t.Fatal(err)
 		}
 		countByIndex[s.IndexName] = countByIndex[s.IndexName] + 1
@@ -313,11 +324,13 @@ func checkNumTotalEntriesAndNumIndexEntries(
 	t.Logf("found index counts: %+v", countByIndex)
 
 	if expected, actual := expectedTotalEntries/expectedIndividualIndexEntries, len(countByIndex); actual != expected {
+		close(scheduleCompleteChan)
 		return errors.Newf("expected %d indexes, got %d", expected, actual)
 	}
 
 	for idxName, c := range countByIndex {
 		if c != expectedIndividualIndexEntries {
+			close(scheduleCompleteChan)
 			return errors.Newf("for index %s: expected entry count %d, got %d",
 				idxName, expectedIndividualIndexEntries, c)
 		}
@@ -325,9 +338,9 @@ func checkNumTotalEntriesAndNumIndexEntries(
 
 	for _, idxName := range expectedIndexNames {
 		if _, ok := countByIndex[idxName]; !ok {
+			close(scheduleCompleteChan)
 			return errors.Newf("no entry found for index %s", idxName)
 		}
 	}
-
 	return nil
 }