From 56a18e0ac52bfe11399b342d8fbf2e3ae3798ed3 Mon Sep 17 00:00:00 2001 From: Mark Sirek Date: Fri, 6 Oct 2023 16:21:10 -0700 Subject: [PATCH] constraint: perform cancel checking when combining constraints Previously, the combining of constraint spans when one constraint is a suffix of the other could take a long time and cause the `statement_timeout` session setting to not be honored when each constraint has hundreds or thousands of spans. The issue is that `constraint.Combine` has double nested loops to consider every combination of one span from one constraint with one span of the other constraint. The building of possibly millions of spans may take excessive CPU time and allocate excessive amounts of memory. The fix is to maintain a counter in `constraint.Combine` and call the query cancel check function every 16 iterations. The cancel check function itself will check for query timeout every 1024 iterations, so effectively every 16K iterations `constraint.Combine` will perform cancel checking and abort the query if the timeout has been reached. Epic: none Fixes: #111862 Release note (bug fix): This patch fixes an issue where the optimizer fails to honor the `statement_timeout` session setting when generating constrained index scans for queries with large IN lists or `= ANY` predicates on multiple index key columns, which may lead to an out of memory condition on the node. --- pkg/sql/opt/constraint/BUILD.bazel | 2 + pkg/sql/opt/constraint/constraint.go | 14 +++- pkg/sql/opt/constraint/constraint_test.go | 79 ++++++++++++++++++- .../opt/idxconstraint/index_constraints.go | 4 +- 4 files changed, 95 insertions(+), 4 deletions(-) diff --git a/pkg/sql/opt/constraint/BUILD.bazel b/pkg/sql/opt/constraint/BUILD.bazel index 70ac3e783c82..89a5c1de4450 100644 --- a/pkg/sql/opt/constraint/BUILD.bazel +++ b/pkg/sql/opt/constraint/BUILD.bazel @@ -51,5 +51,7 @@ go_test( "//pkg/util/intsets", "//pkg/util/leaktest", "//pkg/util/randutil", + "@com_github_cockroachdb_errors//:errors", + "@com_github_stretchr_testify//require", ], ) diff --git a/pkg/sql/opt/constraint/constraint.go b/pkg/sql/opt/constraint/constraint.go index fa2e9be2a095..85c61e9113d9 100644 --- a/pkg/sql/opt/constraint/constraint.go +++ b/pkg/sql/opt/constraint/constraint.go @@ -20,6 +20,8 @@ import ( "github.com/cockroachdb/errors" ) +const CancelCheckInterval = 16 + // Constraint specifies the possible set of values that one or more columns // will have in the result set. If this is a single column constraint, then // that column's value will always be part of one of the spans in this @@ -344,7 +346,7 @@ func (c *Constraint) findIntersectingSpan(keyCtx *KeyContext, sp *Span) (_ *Span // c: /a/b: [/1 - /2] [/4 - /4] // other: /b: [/5 - /5] // result: /a/b: [/1/5 - /2/5] [/4/5 - /4/5] -func (c *Constraint) Combine(evalCtx *eval.Context, other *Constraint) { +func (c *Constraint) Combine(evalCtx *eval.Context, other *Constraint, checkCancellation func()) { if !other.Columns.IsStrictSuffixOf(&c.Columns) { // Note: we don't want to let the c and other pointers escape by passing // them directly to Sprintf. @@ -366,7 +368,15 @@ func (c *Constraint) Combine(evalCtx *eval.Context, other *Constraint) { var resultInitialized bool keyCtx := KeyContext{Columns: c.Columns, EvalCtx: evalCtx} + numIterations := 0 + cancelCheck := func() { + numIterations++ + if checkCancellation != nil && (numIterations%CancelCheckInterval) == 0 { + checkCancellation() + } + } for i := 0; i < c.Spans.Count(); i++ { + cancelCheck() sp := *c.Spans.Get(i) startLen, endLen := sp.start.Length(), sp.end.Length() @@ -402,6 +412,7 @@ func (c *Constraint) Combine(evalCtx *eval.Context, other *Constraint) { } } for j := 0; j < other.Spans.Count(); j++ { + cancelCheck() extSp := other.Spans.Get(j) var newSp Span newSp.Init( @@ -450,6 +461,7 @@ func (c *Constraint) Combine(evalCtx *eval.Context, other *Constraint) { resultInitialized = true result.Alloc(c.Spans.Count()) for j := 0; j < i; j++ { + cancelCheck() result.Append(c.Spans.Get(j)) } } diff --git a/pkg/sql/opt/constraint/constraint_test.go b/pkg/sql/opt/constraint/constraint_test.go index 2b54f95abc32..3e62016ff892 100644 --- a/pkg/sql/opt/constraint/constraint_test.go +++ b/pkg/sql/opt/constraint/constraint_test.go @@ -22,6 +22,8 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/intsets" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/errors" + "github.com/stretchr/testify/require" ) func TestConstraintUnion(t *testing.T) { @@ -452,7 +454,7 @@ func TestConstraintCombine(t *testing.T) { t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { a := ParseConstraint(&evalCtx, tc.a) b := ParseConstraint(&evalCtx, tc.b) - a.Combine(&evalCtx, &b) + a.Combine(&evalCtx, &b, nil /* checkCancellation */) if res := a.String(); res != tc.e { t.Errorf("expected\n %s; got\n %s", tc.e, res) } @@ -460,6 +462,81 @@ func TestConstraintCombine(t *testing.T) { } } +// TestCancelLargeConstraintCombine tests that combining constraints can be +// successfully cancelled with a cancel function. +func TestCancelLargeConstraintCombine(t *testing.T) { + st := cluster.MakeTestingClusterSettings() + evalCtx := eval.MakeTestingEvalContext(st) + + testData := []struct { + a, b, c, d, e, f, g, h, j, k, l, m, n, o, p, q string + }{ + { + a: "/1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + b: "/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + c: "/3/4/5/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + d: "/4/5/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + e: "/5/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + f: "/6/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + g: "/7/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + h: "/8/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + j: "/9/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + k: "/10/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + l: "/11/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + m: "/12/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + n: "/13/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + o: "/14/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + p: "/15/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + q: "/16: [/10 - /10] [/20 - /20] [/30 - /30] [/40 - /40] [/50 - /50] [/60 - /60] [/70 - /70] [/80 - /80] [/90 - /90]", + }, + } + + iterations := 0 + cancelFunc := func() { + iterations++ + if iterations > 2000 { + panic(errors.AssertionFailedf("We successfully cancelled the operation!")) + } + } + for i, tc := range testData { + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + a := ParseConstraint(&evalCtx, tc.a) + b := ParseConstraint(&evalCtx, tc.b) + c := ParseConstraint(&evalCtx, tc.c) + d := ParseConstraint(&evalCtx, tc.d) + e := ParseConstraint(&evalCtx, tc.e) + f := ParseConstraint(&evalCtx, tc.f) + g := ParseConstraint(&evalCtx, tc.g) + h := ParseConstraint(&evalCtx, tc.h) + j := ParseConstraint(&evalCtx, tc.j) + k := ParseConstraint(&evalCtx, tc.k) + l := ParseConstraint(&evalCtx, tc.l) + m := ParseConstraint(&evalCtx, tc.m) + n := ParseConstraint(&evalCtx, tc.n) + o := ParseConstraint(&evalCtx, tc.o) + p := ParseConstraint(&evalCtx, tc.p) + q := ParseConstraint(&evalCtx, tc.q) + a.Combine(&evalCtx, &b, cancelFunc) + c.Combine(&evalCtx, &d, cancelFunc) + e.Combine(&evalCtx, &f, cancelFunc) + g.Combine(&evalCtx, &h, cancelFunc) + j.Combine(&evalCtx, &k, cancelFunc) + l.Combine(&evalCtx, &m, cancelFunc) + n.Combine(&evalCtx, &o, cancelFunc) + p.Combine(&evalCtx, &q, cancelFunc) + a.Combine(&evalCtx, &c, cancelFunc) + e.Combine(&evalCtx, &g, cancelFunc) + j.Combine(&evalCtx, &l, cancelFunc) + n.Combine(&evalCtx, &p, cancelFunc) + // The next Combine call will panic within a second or two. + require.Panics(t, func() { a.Combine(&evalCtx, &e, cancelFunc) }) + // This Combine call panics right away since the iterations target has + // already been reached. + require.Panics(t, func() { j.Combine(&evalCtx, &n, cancelFunc) }) + }) + } +} + func TestConsolidateSpans(t *testing.T) { defer leaktest.AfterTest(t)() st := cluster.MakeTestingClusterSettings() diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go index 68028488b45d..efa4da6c58be 100644 --- a/pkg/sql/opt/idxconstraint/index_constraints.go +++ b/pkg/sql/opt/idxconstraint/index_constraints.go @@ -845,11 +845,11 @@ func (c *indexConstraintCtx) makeSpansForAnd( } ofsC.IntersectWith(c.evalCtx, &exprConstraint) } - out.Combine(c.evalCtx, &ofsC) + out.Combine(c.evalCtx, &ofsC, c.checkCancellation) numIterations++ // In case we can't exit this loop, allow the cancel checker to cancel // this query. - if (numIterations % 16) == 0 { + if (numIterations % constraint.CancelCheckInterval) == 0 { c.checkCancellation() } }