Merge cockroachdb#104290 cockroachdb#104337

104290: sql: make transaction_rows_read_err prevent large scans r=rytaft a=rytaft Prior to this commit, setting `transaction_rows_read_err` to a non-zero value would cause a transaction to fail as soon as a statement caused the total number of rows read to exceed `transaction_rows_read_err`. However, it was possible that each statement could read many more than `transaction_rows_read_err` rows. This commit adds logic so that a single scan never reads more than `transaction_rows_read_err+1` rows if `transaction_rows_read_err` is set. Informs cockroachdb#70473 Release note (performance improvement): If `transaction_rows_read_err` is set to a non-zero value, we now ensure that any single scan never reads more than `transaction_rows_read_err+1` rows. This prevents transactions that would error due to the `transaction_rows_read_err` setting from causing a large performance overhead due to large scans. 104337: sql: fix reset_sql_stats to truncate activity tables r=j82w a=j82w Previously: The reset stats on the ui and crdb_internal.reset_sql_stats() would only reset the statement_statistics and transaction_statics tables. This would leave the sql_activity table with old data. The reset stats now truncates the sql_activity table as well. Fixes: cockroachdb#104321 Epic: none Release note (sql change): Fix crdb_internal.reset_sql_stats() to cleanup the sql_activity table which work as a cache for the stats. Co-authored-by: Rebecca Taft <[email protected]> Co-authored-by: j82w <[email protected]>
zachlite · Jun 5, 2023 · 7207c46 · 7207c46
3 parents 89887ee + 3442f62 + 1d51268
commit 7207c46
Show file tree

Hide file tree

Showing 17 changed files with 337 additions and 5 deletions.
diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go
diff --git a/pkg/sql/logictest/testdata/logic_test/guardrails b/pkg/sql/logictest/testdata/logic_test/guardrails
@@ -0,0 +1,74 @@
+statement ok
+CREATE TABLE guardrails (i INT PRIMARY KEY);
+INSERT INTO guardrails SELECT generate_series(1, 100)
+
+# When the transaction_rows_read_err guardrail is set to 1, we apply a limit
+# of 2 in all cases except for when we know at most 2 rows are returned.
+statement ok
+SET transaction_rows_read_err = 1
+
+query error txn has read 2 rows, which is above the limit
+SELECT * FROM guardrails
+
+query error txn has read 2 rows, which is above the limit
+SELECT * FROM guardrails LIMIT 50
+
+statement ok
+SELECT * FROM guardrails WHERE i = 1
+
+statement error txn has read 2 rows, which is above the limit
+SELECT * FROM guardrails WHERE i IN (1, 2)
+
+query error txn has read 2 rows, which is above the limit
+SELECT * FROM guardrails WHERE i > 0 AND i <= 10
+
+# When the transaction_rows_read_err guardrail is set to 50, we only apply a
+# limit if it's possible that more than 51 rows may be returned.
+statement ok
+SET transaction_rows_read_err = 50
+
+query error txn has read 51 rows, which is above the limit
+SELECT * FROM guardrails
+
+statement ok
+SELECT * FROM guardrails LIMIT 50
+
+statement ok
+SELECT * FROM guardrails WHERE i = 1
+
+statement ok
+SELECT * FROM guardrails WHERE i > 0 AND i <= 10
+
+statement ok
+SET transaction_rows_read_err = 150
+
+statement ok
+CREATE TABLE guardrails2 (i INT PRIMARY KEY);
+INSERT INTO guardrails2 SELECT generate_series(1, 150)
+
+statement ok
+BEGIN
+
+# A full scan shouldn't error if it only scans transaction_rows_read_err rows.
+statement ok
+SELECT * FROM guardrails2
+
+statement ok
+COMMIT
+
+statement ok
+BEGIN
+
+statement ok
+SELECT * FROM guardrails
+
+# The whole transaction has now read more than transaction_rows_read_err rows,
+# so error.
+query error txn has read 250 rows, which is above the limit
+SELECT * FROM guardrails2
+
+statement ok
+ROLLBACK
+
+statement ok
+RESET transaction_rows_read_err
diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go
diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go
diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go
diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go
diff --git a/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go b/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go
diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go
diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go
diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go
@@ -645,6 +645,17 @@ func (b *Builder) scanParams(
 
 	softLimit := reqProps.LimitHintInt64()
 	hardLimit := scan.HardLimit.RowCount()
+	maxResults, maxResultsOk := b.indexConstraintMaxResults(scan, relProps)
+
+	// If the txn_rows_read_err guardrail is set, make sure that we never read
+	// more than txn_rows_read_err+1 rows on any single scan. Adding a hard limit
+	// of txn_rows_read_err+1 ensures that the results will still be correct since
+	// the conn_executor will return an error if the limit is actually reached.
+	if txnRowsReadErr := b.evalCtx.SessionData().TxnRowsReadErr; txnRowsReadErr > 0 &&
+		(hardLimit == 0 || hardLimit > txnRowsReadErr+1) &&
+		(!maxResultsOk || maxResults > uint64(txnRowsReadErr+1)) {
+		hardLimit = txnRowsReadErr + 1
+	}
 
 	// If this is a bounded staleness query, check that it touches at most one
 	// range.
@@ -659,8 +670,7 @@ func (b *Builder) scanParams(
 			// multiple ranges if the first range is empty.
 			valid = false
 		} else {
-			maxResults, ok := b.indexConstraintMaxResults(scan, relProps)
-			valid = ok && maxResults == 1
+			valid = maxResultsOk && maxResults == 1
 		}
 		if !valid {
 			return exec.ScanParams{}, opt.ColMap{}, unimplemented.NewWithIssuef(67562,
@@ -672,8 +682,8 @@ func (b *Builder) scanParams(
 
 	parallelize := false
 	if hardLimit == 0 && softLimit == 0 {
-		maxResults, ok := b.indexConstraintMaxResults(scan, relProps)
-		if ok && maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) {
+		if maxResultsOk &&
+			maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) {
 			// Don't set the flag when we have a single span which returns a single
 			// row: it does nothing in this case except litter EXPLAINs.
 			// There are still cases where the flag doesn't do anything when the spans

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/guardrails b/pkg/sql/opt/exec/execbuilder/testdata/guardrails
@@ -0,0 +1,133 @@
+# LogicTest: local
+
+statement ok
+CREATE TABLE guardrails (i INT PRIMARY KEY)
+
+# When the transaction_rows_read_err guardrail is set to 1, we apply a limit
+# of 2 in all cases except for when we know at most 2 rows are returned.
+statement ok
+SET transaction_rows_read_err = 1
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 1,000 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: LIMITED SCAN
+  limit: 2
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 50 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: LIMITED SCAN
+  limit: 2
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 1 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: /1/0
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i IN (1, 2)
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 2 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: /1-/3
+  parallel
+
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 10 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: /1-/11
+  limit: 2
+
+# When the transaction_rows_read_err guardrail is set to 50, we only apply a
+# limit if it's possible that more than 51 rows may be returned.
+statement ok
+SET transaction_rows_read_err = 50
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 1,000 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: LIMITED SCAN
+  limit: 51
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 50 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: LIMITED SCAN
+  limit: 50
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 1 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: /1/0
+
+query T
+EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10
+----
+distribution: local
+vectorized: true
+·
+• scan
+  columns: (i)
+  estimated row count: 10 (missing stats)
+  table: guardrails@guardrails_pkey
+  spans: /1-/11
+  parallel
+
+statement ok
+RESET transaction_rows_read_err
diff --git a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go
diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go
@@ -151,6 +151,7 @@ type Memo struct {
 	propagateInputOrdering                     bool
 	disallowFullTableScans                     bool
 	largeFullScanRows                          float64
+	txnRowsReadErr                             int64
 	nullOrderedLast                            bool
 	costScansWithDefaultColSize                bool
 	allowUnconstrainedNonCoveringIndexScan     bool
@@ -211,6 +212,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
 		propagateInputOrdering:                     evalCtx.SessionData().PropagateInputOrdering,
 		disallowFullTableScans:                     evalCtx.SessionData().DisallowFullTableScans,
 		largeFullScanRows:                          evalCtx.SessionData().LargeFullScanRows,
+		txnRowsReadErr:                             evalCtx.SessionData().TxnRowsReadErr,
 		nullOrderedLast:                            evalCtx.SessionData().NullOrderedLast,
 		costScansWithDefaultColSize:                evalCtx.SessionData().CostScansWithDefaultColSize,
 		allowUnconstrainedNonCoveringIndexScan:     evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled,
@@ -355,6 +357,7 @@ func (m *Memo) IsStale(
 		m.propagateInputOrdering != evalCtx.SessionData().PropagateInputOrdering ||
 		m.disallowFullTableScans != evalCtx.SessionData().DisallowFullTableScans ||
 		m.largeFullScanRows != evalCtx.SessionData().LargeFullScanRows ||
+		m.txnRowsReadErr != evalCtx.SessionData().TxnRowsReadErr ||
 		m.nullOrderedLast != evalCtx.SessionData().NullOrderedLast ||
 		m.costScansWithDefaultColSize != evalCtx.SessionData().CostScansWithDefaultColSize ||
 		m.allowUnconstrainedNonCoveringIndexScan != evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled ||