From de478e1fea199de49bd7305f3a54ccce50f5424a Mon Sep 17 00:00:00 2001 From: Marylia Gutierrez Date: Thu, 11 Aug 2022 15:12:22 -0400 Subject: [PATCH] sql: fix aggregation of statistics Previously, because we were using a join, we were double counting statistics when we had the same fingerprint in memory and persisted that had more than one index recommendation. This commit adds a `DISTINCT` so we only count them once. Fixes #85958 Release note: None --- pkg/sql/crdb_internal.go | 2 +- .../testdata/logic_test/create_statements | 4 +- .../sqlstats/persistedsqlstats/reader_test.go | 61 +++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index 129a456edbea..9827b26634f1 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -5571,7 +5571,7 @@ SELECT plan_hash, app_name, max(metadata) as metadata, - crdb_internal.merge_statement_stats(array_agg(statistics)), + crdb_internal.merge_statement_stats(array_agg(DISTINCT statistics)), max(sampled_plan), aggregation_interval, array_remove(array_agg(index_rec), NULL) AS index_recommendations diff --git a/pkg/sql/logictest/testdata/logic_test/create_statements b/pkg/sql/logictest/testdata/logic_test/create_statements index 543ad9394a3e..ab9260c2eb0a 100644 --- a/pkg/sql/logictest/testdata/logic_test/create_statements +++ b/pkg/sql/logictest/testdata/logic_test/create_statements @@ -1512,7 +1512,7 @@ CREATE VIEW crdb_internal.statement_statistics ( plan_hash, app_name, max(metadata) AS metadata, - crdb_internal.merge_statement_stats(array_agg(statistics)), + crdb_internal.merge_statement_stats(array_agg(DISTINCT statistics)), max(sampled_plan), aggregation_interval, array_remove(array_agg(index_rec), NULL) AS index_recommendations @@ -1571,7 +1571,7 @@ CREATE VIEW crdb_internal.statement_statistics ( plan_hash, app_name, max(metadata) AS metadata, - crdb_internal.merge_statement_stats(array_agg(statistics)), + crdb_internal.merge_statement_stats(array_agg(DISTINCT statistics)), max(sampled_plan), aggregation_interval, array_remove(array_agg(index_rec), NULL) AS index_recommendations diff --git a/pkg/sql/sqlstats/persistedsqlstats/reader_test.go b/pkg/sql/sqlstats/persistedsqlstats/reader_test.go index 1580d542c5c6..75bd15527702 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/reader_test.go +++ b/pkg/sql/sqlstats/persistedsqlstats/reader_test.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlstats" "github.com/cockroachdb/cockroach/pkg/sql/sqlstats/persistedsqlstats" "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" + "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -143,6 +144,66 @@ func TestPersistedSQLStatsRead(t *testing.T) { }) } +// Testing same fingerprint having more than one index recommendation and +// checking the aggregation on the crdb_internal.statement_statistics table. +// Testing for issue #85958. +func TestSQLStatsWithMultipleIdxRec(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + skip.UnderStressRace(t, "expensive tests") + + fakeTime := stubTime{ + aggInterval: time.Hour, + } + fakeTime.setTime(timeutil.Now()) + + testCluster := serverutils.StartNewTestCluster(t, 3 /* numNodes */, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + Knobs: base.TestingKnobs{ + SQLStatsKnobs: &sqlstats.TestingKnobs{ + StubTimeNow: fakeTime.Now, + AOSTClause: "AS OF SYSTEM TIME '-1us'", + }, + }, + }, + }) + ctx := context.Background() + defer testCluster.Stopper().Stop(ctx) + + sqlConn := sqlutils.MakeSQLRunner(testCluster.ServerConn(0 /* idx */)) + + sqlConn.Exec(t, "CREATE TABLE t1 (k INT, i INT, f FLOAT, s STRING)") + sqlConn.Exec(t, "CREATE TABLE t2 (k INT, i INT, s STRING)") + + query := "SELECT t1.k FROM t1 JOIN t2 ON t1.k = t2.k WHERE t1.i > 3 AND t2.i > 3" + memorySelect := "SELECT statistics -> 'statistics' ->> 'cnt' as count, " + + "array_length(index_recommendations, 1) FROM " + + "crdb_internal.cluster_statement_statistics WHERE metadata ->> 'query' = " + + "'SELECT t1.k FROM t1 JOIN t2 ON t1.k = t2.k WHERE (t1.i > _) AND (t2.i > _)'" + combinedSelect := "SELECT statistics -> 'statistics' ->> 'cnt' as count, " + + "array_length(index_recommendations, 1) FROM " + + "crdb_internal.statement_statistics WHERE metadata ->> 'query' = " + + "'SELECT t1.k FROM t1 JOIN t2 ON t1.k = t2.k WHERE (t1.i > _) AND (t2.i > _)'" + + // Execute enough times to have index recommendations generated. + // This will generate two recommendations. + for i := 0; i < 6; i++ { + sqlConn.Exec(t, query) + } + var cnt int64 + var recs int64 + // It must have the same count 6 on both in-memory and combined tables. + // This test when there are more than one recommendation, so adding this + // example that has 2 recommendations; + sqlConn.QueryRow(t, memorySelect).Scan(&cnt, &recs) + require.Equal(t, int64(6), cnt) + require.Equal(t, int64(2), recs) + sqlConn.QueryRow(t, combinedSelect).Scan(&cnt, &recs) + require.Equal(t, int64(6), cnt) + require.Equal(t, int64(2), recs) +} + func verifyStoredStmtFingerprints( t *testing.T, expectedStmtFingerprints map[string]int64,