From fbcd656a2936e25f10643d8a27285542537b534c Mon Sep 17 00:00:00 2001 From: Andy Yang Date: Wed, 2 Aug 2023 17:36:04 -0400 Subject: [PATCH] sql/sem/builtins: deflake TestSerialNormalizationWithUniqueUnorderedID This patch changes the test to run its main logic multiple times (currently set to 10) and only fail due to non-uniformity if we see statistically significant non-uniformity in a majority of the runs. For context, an occasional failure is not unexpected given this is a statistical test, but multiple failures in a row would be more suggestive of an actual problem with the uniformity of generated IDs. Release note: None --- pkg/sql/sem/builtins/BUILD.bazel | 2 - pkg/sql/sem/builtins/builtins_test.go | 164 ++++++++++++++------------ 2 files changed, 88 insertions(+), 78 deletions(-) diff --git a/pkg/sql/sem/builtins/BUILD.bazel b/pkg/sql/sem/builtins/BUILD.bazel index ec9ca5ac2607..5292399d19b0 100644 --- a/pkg/sql/sem/builtins/BUILD.bazel +++ b/pkg/sql/sem/builtins/BUILD.bazel @@ -214,10 +214,8 @@ go_test( "//pkg/util/log", "//pkg/util/mon", "//pkg/util/randutil", - "//pkg/util/retry", "//pkg/util/syncutil", "//pkg/util/timeutil", - "@com_github_cockroachdb_errors//:errors", "@com_github_lib_pq//:pq", "@com_github_lib_pq//oid", "@com_github_stretchr_testify//assert", diff --git a/pkg/sql/sem/builtins/builtins_test.go b/pkg/sql/sem/builtins/builtins_test.go index 4c55e0581108..8517601884c3 100644 --- a/pkg/sql/sem/builtins/builtins_test.go +++ b/pkg/sql/sem/builtins/builtins_test.go @@ -14,6 +14,7 @@ import ( "bytes" "context" "fmt" + "math" "math/bits" "math/rand" "testing" @@ -32,9 +33,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" - "github.com/cockroachdb/errors" "github.com/lib/pq" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -97,8 +96,8 @@ func TestMapToUniqueUnorderedID(t *testing.T) { } // TestSerialNormalizationWithUniqueUnorderedID makes sure that serial -// normalization can use unique_unordered_id() and a split in a table followed -// by insertions guarantees a (somewhat) uniform distribution of the data. +// normalization can use the unordered_rowid mode and a large number of +// insertions (80k) results in a (somewhat) uniform distribution of the data. func TestSerialNormalizationWithUniqueUnorderedID(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -107,25 +106,33 @@ func TestSerialNormalizationWithUniqueUnorderedID(t *testing.T) { "assumes large N") ctx := context.Background() - const attempts = 2 - // This test can flake when the random data is not distributed evenly. - err := retry.WithMaxAttempts(ctx, retry.Options{}, attempts, func() error { - - params := base.TestServerArgs{ - Knobs: base.TestingKnobs{ - SQLEvalContext: &eval.TestingKnobs{ - // We disable the randomization of some batch sizes because - // with some low values the test takes much longer. - ForceProductionValues: true, + + // Since this is a statistical test, an occasional observation of non-uniformity + // is not in and of itself a huge concern. As such, we'll only fail the test if + // a majority of our observations show statistically significant amounts of + // non-uniformity. + const numObservations = 10 + numNonUniformObservations := 0 + for i := 0; i < numObservations; i++ { + // We use an anonymous function because of the defer in each iteration. + func() { + t.Logf("starting observation %d", i) + + params := base.TestServerArgs{ + Knobs: base.TestingKnobs{ + SQLEvalContext: &eval.TestingKnobs{ + // We disable the randomization of some batch sizes because + // with some low values the test takes much longer. + ForceProductionValues: true, + }, }, - }, - } - s, db, _ := serverutils.StartServer(t, params) - defer s.Stopper().Stop(ctx) + } + s, db, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) - tdb := sqlutils.MakeSQLRunner(db) - // Create a new table with serial primary key i (unordered_rowid) and int j (index). - tdb.Exec(t, ` + tdb := sqlutils.MakeSQLRunner(db) + // Create a new table with serial primary key i (unordered_rowid) and int j (index). + tdb.Exec(t, ` SET serial_normalization TO 'unordered_rowid'; CREATE DATABASE t; USE t; @@ -134,30 +141,29 @@ CREATE TABLE t ( j INT )`) - numberOfRows := 100000 - - // Insert rows. - tdb.Exec(t, fmt.Sprintf(` + // Insert rows. + numberOfRows := 80000 + tdb.Exec(t, fmt.Sprintf(` INSERT INTO t(j) SELECT * FROM generate_series(1, %d); `, numberOfRows)) - // Build an equi-width histogram over the key range. The below query will - // generate the key bounds for each high-order bit pattern as defined by - // prefixBits. For example, if this were 3, we'd get the following groups: - // - // low | high - // ----------------------+---------------------- - // 0 | 2305843009213693952 - // 2305843009213693952 | 4611686018427387904 - // 4611686018427387904 | 6917529027641081856 - // 6917529027641081856 | 9223372036854775807 - // - const prefixBits = 4 - var keyCounts pq.Int64Array - tdb.QueryRow(t, ` + // Build an equi-width histogram over the key range. The below query will + // generate the key bounds for each high-order bit pattern as defined by + // prefixBits. For example, if this were 3, we'd get the following groups: + // + // low | high + // ----------------------+---------------------- + // 0 | 2305843009213693952 + // 2305843009213693952 | 4611686018427387904 + // 4611686018427387904 | 6917529027641081856 + // 6917529027641081856 | 9223372036854775807 + // + const prefixBits = 4 + var keyCounts pq.Int64Array + tdb.QueryRow(t, ` WITH boundaries AS ( - SELECT i << (64 - $1) AS p FROM ROWS FROM (generate_series(0, (1<<($1-1)) -1)) AS t (i) - UNION ALL SELECT (((1 << 62) - 1) << 1)+1 -- int63 max value + SELECT i << (64 - $1) AS p FROM ROWS FROM (generate_series(0, (1 << ($1 - 1)) - 1)) AS t (i) + UNION ALL SELECT (((1 << 62) - 1) << 1) + 1 -- int63 max value ), groups AS ( SELECT * @@ -167,55 +173,61 @@ INSERT INTO t(j) SELECT * FROM generate_series(1, %d); counts AS ( SELECT count(i) AS c FROM t, groups - WHERE low <= i AND high > i + WHERE low < i AND i <= high GROUP BY (low, high) ) SELECT array_agg(c) FROM counts;`, prefixBits).Scan(&keyCounts) - t.Log("Key counts in each split range") - for i, keyCount := range keyCounts { - t.Logf("range %d: %d\n", i, keyCount) - } - require.Len(t, keyCounts, 1<<(prefixBits-1)) - - // To check that the distribution over ranges is not uniform, we use a - // chi-square goodness of fit statistic. We'll set our null hypothesis as - // 'each range in the distribution should have the same probability of getting - // a row inserted' and we'll check if we can reject the null hypothesis if - // chi-square is greater than the critical value we currently set as 35.2585, - // a deliberate choice that gives us a p-value of 0.00001 according to - // https://www.fourmilab.ch/rpkp/experiments/analysis/chiCalc.html. If we are - // able to reject the null hypothesis, then the distribution is not uniform, - // and we raise an error. This test has 7 degrees of freedom (categories - 1). - chiSquared := discreteUniformChiSquared(keyCounts) - criticalValue := 35.2585 - if chiSquared >= criticalValue { - return errors.Newf("chiSquared value of %f must be"+ - " less than criticalVal %f to guarantee distribution is relatively uniform", - chiSquared, criticalValue) - } - return nil - }) - require.NoError(t, err) + t.Log("Key counts in each split range") + for i, keyCount := range keyCounts { + t.Logf("range %d: %d\n", i, keyCount) + } + require.Len(t, keyCounts, 1<<(prefixBits-1)) + + // To check that the distribution over ranges is not uniform, we use a + // chi-square goodness of fit statistic. We'll set our null hypothesis as + // 'each range in the distribution should have the same probability of getting + // a row inserted' and we'll check if we can reject the null hypothesis if + // chi-squared is greater than the critical value we currently set as 35.2585, + // a deliberate choice that gives us a p-value of 0.00001 according to + // https://www.fourmilab.ch/rpkp/experiments/analysis/chiCalc.html. If we are + // able to reject the null hypothesis, then the distribution is not uniform, + // and we raise an error. This test has 7 degrees of freedom (categories - 1). + chiSquared := discreteUniformChiSquared(keyCounts) + criticalValue := 35.2585 + if chiSquared > criticalValue { + t.Logf("chiSquared value of %f > criticalVal %f indicates that the distribution "+ + "is non-uniform (statistically significant, p < 0.00001)", + chiSquared, criticalValue) + numNonUniformObservations += 1 + } else { + t.Logf("chiSquared value of %f <= criticalVal %f indicates that the distribution "+ + "is relatively uniform", + chiSquared, criticalValue) + } + }() + } + + if numNonUniformObservations >= numObservations/2 { + t.Fatalf("a majority of our observations indicate the distribution is non-uniform") + } } // discreteUniformChiSquared calculates the chi-squared statistic (ref: // https://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm) to be used // in our hypothesis testing for the distribution of rows among ranges. -func discreteUniformChiSquared(counts []int64) float64 { +func discreteUniformChiSquared(buckets []int64) float64 { var n int64 - for _, c := range counts { + for _, c := range buckets { n += c } - p := float64(1) / float64(len(counts)) - var stat float64 - for _, c := range counts { - oSubE := float64(c)/float64(n) - p - stat += (oSubE * oSubE) / p + expectedPerBucket := float64(n) / float64(len(buckets)) + var chiSquared float64 + for _, c := range buckets { + chiSquared += math.Pow(float64(c)-expectedPerBucket, 2) / expectedPerBucket } - stat *= float64(n) - return stat + return chiSquared } func TestStringToArrayAndBack(t *testing.T) {