Skip to content

Commit

Permalink
sql: make transaction_rows_read_err prevent large scans
Browse files Browse the repository at this point in the history
Prior to this commit, setting transaction_rows_read_err to a non-zero
value would cause a transaction to fail as soon as a statement caused
the total number of rows read to exceed transaction_rows_read_err. However,
it was possible that each statement could read many more than
transaction_rows_read_err rows. This commit adds logic so that a single
scan never reads more than transaction_rows_read_err+1 rows if
transaction_rows_read_err is set.

Informs cockroachdb#70473

Release note (performance improvement): If transaction_rows_read_err is
set to a non-zero value, we now ensure that any single scan never reads
more than transaction_rows_read_err+1 rows. This prevents transactions that
would error due to the transaction_rows_read_err setting from causing
a large performance overhead due to large scans.
  • Loading branch information
rytaft committed Jun 5, 2023
1 parent 4d78501 commit 70a143a
Show file tree
Hide file tree
Showing 13 changed files with 286 additions and 4 deletions.
7 changes: 7 additions & 0 deletions pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 74 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/guardrails
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
statement ok
CREATE TABLE guardrails (i INT PRIMARY KEY);
INSERT INTO guardrails SELECT generate_series(1, 100)

# When the transaction_rows_read_err guardrail is set to 1, we apply a limit
# of 2 in all cases except for when we know at most 2 rows are returned.
statement ok
SET transaction_rows_read_err = 1

query error txn has read 2 rows, which is above the limit
SELECT * FROM guardrails

query error txn has read 2 rows, which is above the limit
SELECT * FROM guardrails LIMIT 50

statement ok
SELECT * FROM guardrails WHERE i = 1

statement error txn has read 2 rows, which is above the limit
SELECT * FROM guardrails WHERE i IN (1, 2)

query error txn has read 2 rows, which is above the limit
SELECT * FROM guardrails WHERE i > 0 AND i <= 10

# When the transaction_rows_read_err guardrail is set to 50, we only apply a
# limit if it's possible that more than 51 rows may be returned.
statement ok
SET transaction_rows_read_err = 50

query error txn has read 51 rows, which is above the limit
SELECT * FROM guardrails

statement ok
SELECT * FROM guardrails LIMIT 50

statement ok
SELECT * FROM guardrails WHERE i = 1

statement ok
SELECT * FROM guardrails WHERE i > 0 AND i <= 10

statement ok
SET transaction_rows_read_err = 150

statement ok
CREATE TABLE guardrails2 (i INT PRIMARY KEY);
INSERT INTO guardrails2 SELECT generate_series(1, 150)

statement ok
BEGIN

# A full scan shouldn't error if it only scans transaction_rows_read_err rows.
statement ok
SELECT * FROM guardrails2

statement ok
COMMIT

statement ok
BEGIN

statement ok
SELECT * FROM guardrails

# The whole transaction has now read more than transaction_rows_read_err rows,
# so error.
query error txn has read 250 rows, which is above the limit
SELECT * FROM guardrails2

statement ok
ROLLBACK

statement ok
RESET transaction_rows_read_err
7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist-disk/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/fakedist/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/local-vec-off/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/sql/logictest/tests/local/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 14 additions & 4 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,17 @@ func (b *Builder) scanParams(

softLimit := reqProps.LimitHintInt64()
hardLimit := scan.HardLimit.RowCount()
maxResults, maxResultsOk := b.indexConstraintMaxResults(scan, relProps)

// If the txn_rows_read_err guardrail is set, make sure that we never read
// more than txn_rows_read_err+1 rows on any single scan. Adding a hard limit
// of txn_rows_read_err+1 ensures that the results will still be correct since
// the conn_executor will return an error if the limit is actually reached.
if txnRowsReadErr := b.evalCtx.SessionData().TxnRowsReadErr; txnRowsReadErr > 0 &&
(hardLimit == 0 || hardLimit > txnRowsReadErr+1) &&
(!maxResultsOk || maxResults > uint64(txnRowsReadErr+1)) {
hardLimit = txnRowsReadErr + 1
}

// If this is a bounded staleness query, check that it touches at most one
// range.
Expand All @@ -667,8 +678,7 @@ func (b *Builder) scanParams(
// multiple ranges if the first range is empty.
valid = false
} else {
maxResults, ok := b.indexConstraintMaxResults(scan, relProps)
valid = ok && maxResults == 1
valid = maxResultsOk && maxResults == 1
}
if !valid {
return exec.ScanParams{}, opt.ColMap{}, unimplemented.NewWithIssuef(67562,
Expand All @@ -680,8 +690,8 @@ func (b *Builder) scanParams(

parallelize := false
if hardLimit == 0 && softLimit == 0 {
maxResults, ok := b.indexConstraintMaxResults(scan, relProps)
if ok && maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) {
if maxResultsOk &&
maxResults < getParallelScanResultThreshold(b.evalCtx.TestingKnobs.ForceProductionValues) {
// Don't set the flag when we have a single span which returns a single
// row: it does nothing in this case except litter EXPLAINs.
// There are still cases where the flag doesn't do anything when the spans
Expand Down
133 changes: 133 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/guardrails
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# LogicTest: local

statement ok
CREATE TABLE guardrails (i INT PRIMARY KEY)

# When the transaction_rows_read_err guardrail is set to 1, we apply a limit
# of 2 in all cases except for when we know at most 2 rows are returned.
statement ok
SET transaction_rows_read_err = 1

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 1,000 (missing stats)
table: guardrails@guardrails_pkey
spans: LIMITED SCAN
limit: 2

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 50 (missing stats)
table: guardrails@guardrails_pkey
spans: LIMITED SCAN
limit: 2

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 1 (missing stats)
table: guardrails@guardrails_pkey
spans: /1/0

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i IN (1, 2)
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 2 (missing stats)
table: guardrails@guardrails_pkey
spans: /1-/3
parallel


query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 10 (missing stats)
table: guardrails@guardrails_pkey
spans: /1-/11
limit: 2

# When the transaction_rows_read_err guardrail is set to 50, we only apply a
# limit if it's possible that more than 51 rows may be returned.
statement ok
SET transaction_rows_read_err = 50

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 1,000 (missing stats)
table: guardrails@guardrails_pkey
spans: LIMITED SCAN
limit: 51

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails LIMIT 50
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 50 (missing stats)
table: guardrails@guardrails_pkey
spans: LIMITED SCAN
limit: 50

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i = 1
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 1 (missing stats)
table: guardrails@guardrails_pkey
spans: /1/0

query T
EXPLAIN (VERBOSE) SELECT * FROM guardrails WHERE i > 0 AND i <= 10
----
distribution: local
vectorized: true
·
• scan
columns: (i)
estimated row count: 10 (missing stats)
table: guardrails@guardrails_pkey
spans: /1-/11
parallel

statement ok
RESET transaction_rows_read_err
7 changes: 7 additions & 0 deletions pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ type Memo struct {
propagateInputOrdering bool
disallowFullTableScans bool
largeFullScanRows float64
txnRowsReadErr int64
nullOrderedLast bool
costScansWithDefaultColSize bool
allowUnconstrainedNonCoveringIndexScan bool
Expand Down Expand Up @@ -210,6 +211,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
propagateInputOrdering: evalCtx.SessionData().PropagateInputOrdering,
disallowFullTableScans: evalCtx.SessionData().DisallowFullTableScans,
largeFullScanRows: evalCtx.SessionData().LargeFullScanRows,
txnRowsReadErr: evalCtx.SessionData().TxnRowsReadErr,
nullOrderedLast: evalCtx.SessionData().NullOrderedLast,
costScansWithDefaultColSize: evalCtx.SessionData().CostScansWithDefaultColSize,
allowUnconstrainedNonCoveringIndexScan: evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled,
Expand Down Expand Up @@ -354,6 +356,7 @@ func (m *Memo) IsStale(
m.propagateInputOrdering != evalCtx.SessionData().PropagateInputOrdering ||
m.disallowFullTableScans != evalCtx.SessionData().DisallowFullTableScans ||
m.largeFullScanRows != evalCtx.SessionData().LargeFullScanRows ||
m.txnRowsReadErr != evalCtx.SessionData().TxnRowsReadErr ||
m.nullOrderedLast != evalCtx.SessionData().NullOrderedLast ||
m.costScansWithDefaultColSize != evalCtx.SessionData().CostScansWithDefaultColSize ||
m.allowUnconstrainedNonCoveringIndexScan != evalCtx.SessionData().UnconstrainedNonCoveringIndexScanEnabled ||
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().LargeFullScanRows = 0
notStale()

// Stale txn rows read error.
evalCtx.SessionData().TxnRowsReadErr = 1000
stale()
evalCtx.SessionData().TxnRowsReadErr = 0
notStale()

// Stale null ordered last.
evalCtx.SessionData().NullOrderedLast = true
stale()
Expand Down

0 comments on commit 70a143a

Please sign in to comment.