From 3442f62d1ced19e8d4334d470b5aaeaae5e45ee5 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Fri, 2 Jun 2023 18:09:37 -0500 Subject: [PATCH 1/2] sql: make transaction_rows_read_err prevent large scans Prior to this commit, setting transaction_rows_read_err to a non-zero value would cause a transaction to fail as soon as a statement caused the total number of rows read to exceed transaction_rows_read_err. However, it was possible that each statement could read many more than transaction_rows_read_err rows. This commit adds logic so that a single scan never reads more than transaction_rows_read_err+1 rows if transaction_rows_read_err is set. Informs #70473 Release note (performance improvement): If transaction_rows_read_err is set to a non-zero value, we now ensure that any single scan never reads more than transaction_rows_read_err+1 rows. This prevents transactions that would error due to the transaction_rows_read_err setting from causing a large performance overhead due to large scans. --- .../tests/3node-tenant/generated_test.go | 7 + .../logictest/testdata/logic_test/guardrails | 74 ++++++++++ .../tests/fakedist-disk/generated_test.go | 7 + .../tests/fakedist-vec-off/generated_test.go | 7 + .../tests/fakedist/generated_test.go | 7 + .../generated_test.go | 7 + .../local-mixed-22.2-23.1/generated_test.go | 7 + .../tests/local-vec-off/generated_test.go | 7 + .../logictest/tests/local/generated_test.go | 7 + pkg/sql/opt/exec/execbuilder/relational.go | 18 ++- .../opt/exec/execbuilder/testdata/guardrails | 133 ++++++++++++++++++ .../execbuilder/tests/local/generated_test.go | 7 + pkg/sql/opt/memo/memo.go | 3 + pkg/sql/opt/memo/memo_test.go | 6 + 14 files changed, 293 insertions(+), 4 deletions(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/guardrails create mode 100644 pkg/sql/opt/exec/execbuilder/testdata/guardrails diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index fed1f1e8c579..c84b4abed8a7 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -927,6 +927,13 @@ func TestTenantLogic_group_join( runLogicTest(t, "group_join") } +func TestTenantLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestTenantLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/testdata/logic_test/guardrails b/pkg/sql/logictest/testdata/logic_test/guardrails new file mode 100644 index 000000000000..b80720435648 --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/guardrails @@ -0,0 +1,74 @@ +statement ok +CREATE TABLE guardrails (i INT PRIMARY KEY); +INSERT INTO guardrails SELECT generate_series(1, 100) + +# When the transaction_rows_read_err guardrail is set to 1, we apply a limit +# of 2 in all cases except for when we know at most 2 rows are returned. +statement ok +SET transaction_rows_read_err = 1 + +query error txn has read 2 rows, which is above the limit +SELECT * FROM guardrails + +query error txn has read 2 rows, which is above the limit +SELECT * FROM guardrails LIMIT 50 + +statement ok +SELECT * FROM guardrails WHERE i = 1 + +statement error txn has read 2 rows, which is above the limit +SELECT * FROM guardrails WHERE i IN (1, 2) + +query error txn has read 2 rows, which is above the limit +SELECT * FROM guardrails WHERE i > 0 AND i <= 10 + +# When the transaction_rows_read_err guardrail is set to 50, we only apply a +# limit if it's possible that more than 51 rows may be returned. +statement ok +SET transaction_rows_read_err = 50 + +query error txn has read 51 rows, which is above the limit +SELECT * FROM guardrails + +statement ok +SELECT * FROM guardrails LIMIT 50 + +statement ok +SELECT * FROM guardrails WHERE i = 1 + +statement ok +SELECT * FROM guardrails WHERE i > 0 AND i <= 10 + +statement ok +SET transaction_rows_read_err = 150 + +statement ok +CREATE TABLE guardrails2 (i INT PRIMARY KEY); +INSERT INTO guardrails2 SELECT generate_series(1, 150) + +statement ok +BEGIN + +# A full scan shouldn't error if it only scans transaction_rows_read_err rows. +statement ok +SELECT * FROM guardrails2 + +statement ok +COMMIT + +statement ok +BEGIN + +statement ok +SELECT * FROM guardrails + +# The whole transaction has now read more than transaction_rows_read_err rows, +# so error. +query error txn has read 250 rows, which is above the limit +SELECT * FROM guardrails2 + +statement ok +ROLLBACK + +statement ok +RESET transaction_rows_read_err diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index 5a470be9fb1b..cfa85f74dfb3 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -898,6 +898,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index b6762a993ccb..f488a6d9c37e 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -898,6 +898,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index 4ab35a0298e4..8975cef885f8 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -905,6 +905,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index 95b98e1740b4..54a9fb9df046 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -884,6 +884,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go b/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go index 58e2ecbcf2ab..ab8177fa56fc 100644 --- a/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go +++ b/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go @@ -884,6 +884,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 565a69fbfcf6..521ac1440511 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -905,6 +905,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index 9269558ff59e..996a2556b24f 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -982,6 +982,13 @@ func TestLogic_group_join( runLogicTest(t, "group_join") } +func TestLogic_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "guardrails") +} + func TestLogic_hash_join( t *testing.T, ) { diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go index 78f3c6732149..97c2c298bb83 100644 --- a/pkg/sql/opt/exec/execbuilder/relational.go +++ b/pkg/sql/opt/exec/execbuilder/relational.go @@ -645,6 +645,17 @@ func (b *Builder) scanParams( softLimit := reqProps.LimitHintInt64() hardLimit := scan.HardLimit.RowCount() + maxResults, maxResultsOk := b.indexConstraintMaxResults(scan, relProps) + + // If the txn_rows_read_err guardrail is set, make sure that we never read + // more than txn_rows_read_err+1 rows on any single scan. Adding a hard limit + // of txn_rows_read_err+1 ensures that the results will still be correct since + // the conn_executor will return an error if the limit is actually reached. + if txnRowsReadErr := b.evalCtx.SessionData().TxnRowsReadErr; txnRowsReadErr > 0 && + (hardLimit == 0 || hardLimit > txnRowsReadErr+1) && + (!maxResultsOk || maxResults > uint64(txnRowsReadErr+1)) { + hardLimit = txnRowsReadErr + 1 + } // If this is a bounded staleness query, check that it touches at most one // range. @@ -659,8 +670,7 @@ func (b *Builder) scanParams( // multiple ranges if the first range is empty. valid = false } else { - maxResults, ok := b.indexConstraintMaxResults(scan, relProps) - valid = ok && maxResults == 1 + valid = maxResultsOk && maxResults == 1 } if !valid { return exec.ScanParams{}, opt.ColMap{}, unimplemented.NewWithIssuef(67562, @@ -672,8 +682,8 @@ func (b *Builder) scanParams( parallelize := false if hardLimit == 0 && softLimit == 0 { - maxResults, ok := b.indexConstraintMaxResults(scan, relProps) - if ok && maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) { + if maxResultsOk && + maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) { // Don't set the flag when we have a single span which returns a single // row: it does nothing in this case except litter EXPLAINs. // There are still cases where the flag doesn't do anything when the spans diff --git a/pkg/sql/opt/exec/execbuilder/testdata/guardrails b/pkg/sql/opt/exec/execbuilder/testdata/guardrails new file mode 100644 index 000000000000..4a0162f6ff5b --- /dev/null +++ b/pkg/sql/opt/exec/execbuilder/testdata/guardrails @@ -0,0 +1,133 @@ +# LogicTest: local + +statement ok +CREATE TABLE guardrails (i INT PRIMARY KEY) + +# When the transaction_rows_read_err guardrail is set to 1, we apply a limit +# of 2 in all cases except for when we know at most 2 rows are returned. +statement ok +SET transaction_rows_read_err = 1 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 1,000 (missing stats) + table: guardrails@guardrails_pkey + spans: LIMITED SCAN + limit: 2 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 50 (missing stats) + table: guardrails@guardrails_pkey + spans: LIMITED SCAN + limit: 2 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 1 (missing stats) + table: guardrails@guardrails_pkey + spans: /1/0 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i IN (1, 2) +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 2 (missing stats) + table: guardrails@guardrails_pkey + spans: /1-/3 + parallel + + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 10 (missing stats) + table: guardrails@guardrails_pkey + spans: /1-/11 + limit: 2 + +# When the transaction_rows_read_err guardrail is set to 50, we only apply a +# limit if it's possible that more than 51 rows may be returned. +statement ok +SET transaction_rows_read_err = 50 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 1,000 (missing stats) + table: guardrails@guardrails_pkey + spans: LIMITED SCAN + limit: 51 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 50 (missing stats) + table: guardrails@guardrails_pkey + spans: LIMITED SCAN + limit: 50 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 1 (missing stats) + table: guardrails@guardrails_pkey + spans: /1/0 + +query T +EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10 +---- +distribution: local +vectorized: true +· +• scan + columns: (i) + estimated row count: 10 (missing stats) + table: guardrails@guardrails_pkey + spans: /1-/11 + parallel + +statement ok +RESET transaction_rows_read_err diff --git a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go index 4237d46387bb..d0823b07dac0 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go @@ -245,6 +245,13 @@ func TestExecBuild_group_join( runExecBuildLogicTest(t, "group_join") } +func TestExecBuild_guardrails( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runExecBuildLogicTest(t, "guardrails") +} + func TestExecBuild_hash_sharded_index( t *testing.T, ) { diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index 9bde4b4aba82..9cd060dae529 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -151,6 +151,7 @@ type Memo struct { propagateInputOrdering bool disallowFullTableScans bool largeFullScanRows float64 + txnRowsReadErr int64 nullOrderedLast bool costScansWithDefaultColSize bool allowUnconstrainedNonCoveringIndexScan bool @@ -211,6 +212,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) { propagateInputOrdering: evalCtx.SessionData().PropagateInputOrdering, disallowFullTableScans: evalCtx.SessionData().DisallowFullTableScans, largeFullScanRows: evalCtx.SessionData().LargeFullScanRows, + txnRowsReadErr: evalCtx.SessionData().TxnRowsReadErr, nullOrderedLast: evalCtx.SessionData().NullOrderedLast, costScansWithDefaultColSize: evalCtx.SessionData().CostScansWithDefaultColSize, allowUnconstrainedNonCoveringIndexScan: evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled, @@ -355,6 +357,7 @@ func (m *Memo) IsStale( m.propagateInputOrdering != evalCtx.SessionData().PropagateInputOrdering || m.disallowFullTableScans != evalCtx.SessionData().DisallowFullTableScans || m.largeFullScanRows != evalCtx.SessionData().LargeFullScanRows || + m.txnRowsReadErr != evalCtx.SessionData().TxnRowsReadErr || m.nullOrderedLast != evalCtx.SessionData().NullOrderedLast || m.costScansWithDefaultColSize != evalCtx.SessionData().CostScansWithDefaultColSize || m.allowUnconstrainedNonCoveringIndexScan != evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled || diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index d804a3d5719f..d0810de9df62 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -276,6 +276,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().LargeFullScanRows = 0 notStale() + // Stale txn rows read error. + evalCtx.SessionData().TxnRowsReadErr = 1000 + stale() + evalCtx.SessionData().TxnRowsReadErr = 0 + notStale() + // Stale null ordered last. evalCtx.SessionData().NullOrderedLast = true stale() From 1d51268ce7884d39da1427a5307756650e8f1bb6 Mon Sep 17 00:00:00 2001 From: j82w Date: Mon, 5 Jun 2023 10:58:44 -0400 Subject: [PATCH 2/2] sql: fix reset_sql_stats to truncate activity tables Previously: The reset stats on the ui and crdb_internal.reset_sql_stats() would only reset the statement_statistics and transaction_statics tables. This would leave the sql_activity table with old data. The reset stats now truncates the sql_activity table as well. Fixes: https://github.com/cockroachdb/cockroach/issues/104321 Epic: none Release note (sql change): Fix crdb_internal.reset_sql_stats() to cleanup the sql_activity table which work as a cache for the stats. --- pkg/sql/sql_activity_update_job_test.go | 28 +++++++++++++++++++ .../sqlstats/persistedsqlstats/BUILD.bazel | 1 + .../sqlstats/persistedsqlstats/controller.go | 16 ++++++++++- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/pkg/sql/sql_activity_update_job_test.go b/pkg/sql/sql_activity_update_job_test.go index 545fc46af1d1..355aaefc3764 100644 --- a/pkg/sql/sql_activity_update_job_test.go +++ b/pkg/sql/sql_activity_update_job_test.go @@ -186,6 +186,34 @@ func TestSqlActivityUpdateJob(t *testing.T) { err = row.Scan(&count) require.NoError(t, err) require.Equal(t, count, 1, "statement_activity after transfer: expect:1, actual:%d", count) + + // Reset the stats and verify it's empty + _, err = db.ExecContext(ctx, "SELECT crdb_internal.reset_sql_stats()") + require.NoError(t, err) + + row = db.QueryRowContext(ctx, "SELECT count_rows() "+ + "FROM system.public.transaction_activity") + err = row.Scan(&count) + require.NoError(t, err) + require.Zero(t, count, "transaction_activity after transfer: expect:0, actual:%d", count) + + row = db.QueryRowContext(ctx, "SELECT count_rows() "+ + "FROM system.public.statement_activity") + err = row.Scan(&count) + require.NoError(t, err) + require.Zero(t, count, "statement_activity after transfer: expect:0, actual:%d", count) + + row = db.QueryRowContext(ctx, "SELECT count_rows() "+ + "FROM crdb_internal.transaction_activity") + err = row.Scan(&count) + require.NoError(t, err) + require.Zero(t, count, "transaction_activity after transfer: expect:0, actual:%d", count) + + row = db.QueryRowContext(ctx, "SELECT count_rows() "+ + "FROM crdb_internal.statement_activity") + err = row.Scan(&count) + require.NoError(t, err) + require.Zero(t, count, "statement_activity after transfer: expect:0, actual:%d", count) } // TestSqlActivityUpdateJob verifies that the diff --git a/pkg/sql/sqlstats/persistedsqlstats/BUILD.bazel b/pkg/sql/sqlstats/persistedsqlstats/BUILD.bazel index 4aca0403cbc5..cdd63e2ab86a 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/BUILD.bazel +++ b/pkg/sql/sqlstats/persistedsqlstats/BUILD.bazel @@ -24,6 +24,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/base", + "//pkg/clusterversion", "//pkg/jobs", "//pkg/jobs/jobspb", "//pkg/scheduledjobs", diff --git a/pkg/sql/sqlstats/persistedsqlstats/controller.go b/pkg/sql/sqlstats/persistedsqlstats/controller.go index 5c6a760ec86c..77d725496675 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/controller.go +++ b/pkg/sql/sqlstats/persistedsqlstats/controller.go @@ -13,6 +13,7 @@ package persistedsqlstats import ( "context" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/server/serverpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/isql" @@ -68,9 +69,22 @@ func (s *Controller) ResetClusterSQLStats(ctx context.Context) error { "TRUNCATE "+tableName) return err } + if err := resetSysTableStats("system.statement_statistics"); err != nil { return err } - return resetSysTableStats("system.transaction_statistics") + if err := resetSysTableStats("system.transaction_statistics"); err != nil { + return err + } + + if !s.st.Version.IsActive(ctx, clusterversion.V23_1CreateSystemActivityUpdateJob) { + return nil + } + + if err := resetSysTableStats("system.statement_activity"); err != nil { + return err + } + + return resetSysTableStats("system.transaction_activity") }