diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row index 9167026d3636..720958fb03f1 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row @@ -876,6 +876,42 @@ SELECT * FROM [EXPLAIN SELECT * FROM regional_by_row_table WHERE pk = 1] OFFSET table: regional_by_row_table@primary spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1] [/'us-east-1'/1 - /'us-east-1'/1] +# Query with more than one key. +query T +SELECT * FROM [EXPLAIN (DISTSQL) SELECT * FROM regional_by_row_table WHERE pk IN (1, 4)] OFFSET 2 +---- +· +• union all +│ limit: 2 +│ +├── • scan +│ missing stats +│ table: regional_by_row_table@primary +│ spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1] [/'ap-southeast-2'/4 - /'ap-southeast-2'/4] +│ +└── • scan + missing stats + table: regional_by_row_table@primary + spans: [/'ca-central-1'/1 - /'ca-central-1'/1] [/'ca-central-1'/4 - /'ca-central-1'/4] [/'us-east-1'/1 - /'us-east-1'/1] [/'us-east-1'/4 - /'us-east-1'/4] +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJykkV-L1DAQwN_9FMM8eRJp0z1B8lTRLi6s27MtKFzLkmuGtdhNapLiHUu_u7R9uKvcCrc-zp_fzG-SE7pfLQrMk23ysYA3sM7SL3CbfL_Zftjs4PWnTV7kX7dXsGywdGiMlu3-7mFvze-9l3ctwbfPSZZA9xNGkjO4vqogXa_zpIAIGWqjaCeP5FDcIseKYWdNTc4ZO6ZOU8NG3aMIGTa66_2YrhjWxhKKE_rGt4QCi3FbRlKRDUJkqMjLpp3GPisWd7Y5SvuADPNOaicgKLEs79-HJQY8CEFqBSsw_gdZhwzT3guIOYsjFq9YfM3id1gNDE3vH6WclwdCwQd2mTi_VDx-Is1n6Rc6R2edH1V7bawiS2qhWQ3PXLUzb00XRMt7ts2x8RCddQhf8m4Zuc5oR3-5nJtcMSR1oPkgZ3pb04019bRmDtOJmxKKnJ-r0Rxs9FSaPvYpzP8Hjv4JrxZwOFTDqz8BAAD__1J1KGk= + +statement ok +SET tracing = on,kv,results; SELECT * FROM regional_by_row_table WHERE pk IN (1, 4); SET tracing = off + +# Both rows are found in the local region, so the other regions are not searched. +query T +SELECT message FROM [SHOW KV TRACE FOR SESSION] WITH ORDINALITY + WHERE message LIKE 'fetched:%' OR message LIKE 'output row%' + OR message LIKE 'Scan%' + ORDER BY ordinality ASC +---- +Scan /Table/72/1/"@"/1/0, /Table/72/1/"@"/4/0 +fetched: /regional_by_row_table/primary/'ap-southeast-2'/1/pk2/a/b/j -> /1/2/3/'{"a": "b"}' +output row: [1 1 2 3 '{"a": "b"}'] +fetched: /regional_by_row_table/primary/'ap-southeast-2'/4/pk2/a/b/j -> /4/5/6/'{"c": "d"}' +output row: [4 4 5 6 '{"c": "d"}'] + # Tests using locality optimized search for lookup joins (including foreign # key checks). statement ok diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go index e2cc7da35e0b..7daac54e44d5 100644 --- a/pkg/sql/opt/exec/execbuilder/relational.go +++ b/pkg/sql/opt/exec/execbuilder/relational.go @@ -1482,11 +1482,6 @@ func (b *Builder) buildSetOp(set memo.RelExpr) (execPlan, error) { // child. // TODO(rytaft): Store the limit in the expression. hardLimit = uint64(set.Relational().Cardinality.Max) - if hardLimit > 1 { - panic(errors.AssertionFailedf( - "locality optimized search is not yet supported for more than one row at a time", - )) - } } ep := execPlan{} diff --git a/pkg/sql/opt/xform/scan_funcs.go b/pkg/sql/opt/xform/scan_funcs.go index 872a7850b31d..4724a8e51a4c 100644 --- a/pkg/sql/opt/xform/scan_funcs.go +++ b/pkg/sql/opt/xform/scan_funcs.go @@ -146,11 +146,14 @@ func (c *CustomFuncs) CanMaybeGenerateLocalityOptimizedScan(scanPrivate *memo.Sc func (c *CustomFuncs) GenerateLocalityOptimizedScan( grp memo.RelExpr, scanPrivate *memo.ScanPrivate, ) { - // We can only generate a locality optimized scan if we know there is at - // most one row produced by the local spans. - // TODO(rytaft): We may be able to expand this to allow any number of rows, - // as long as there is a hard upper bound. - if !grp.Relational().Cardinality.IsZeroOrOne() { + // We can only generate a locality optimized scan if we know there is a hard + // upper bound on the number of rows produced by the local spans. We use + // a limit of 10,000 rows since that is the kv batch size, and it's probably + // better to use DistSQL once we're scanning multiple batches. + // TODO(rytaft): Revisit this when we have a more accurate cost model for data + // distribution. + const localityOptScanMaxRows = 10000 + if grp.Relational().Cardinality.Max > localityOptScanMaxRows { return } diff --git a/pkg/sql/opt/xform/testdata/rules/scan b/pkg/sql/opt/xform/testdata/rules/scan index 9186899fa7ee..5287d7d99169 100644 --- a/pkg/sql/opt/xform/testdata/rules/scan +++ b/pkg/sql/opt/xform/testdata/rules/scan @@ -725,23 +725,62 @@ index-join abc_part ├── key: () └── fd: ()-->(17-19,21) -# b is not constrained to a single value. -opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan +# b is constrained to multiple values. +opt locality=(region=east) expect=GenerateLocalityOptimizedScan SELECT a FROM abc_part WHERE b IN (1, 2) ---- project ├── columns: a:3!null ├── cardinality: [0 - 2] ├── key: (3) - └── scan abc_part@b_idx + └── locality-optimized-search ├── columns: a:3!null b:4!null - ├── constraint: /1/4 - │ ├── [/'central'/1 - /'central'/2] - │ ├── [/'east'/1 - /'east'/2] - │ └── [/'west'/1 - /'west'/2] + ├── left columns: a:11 b:12 + ├── right columns: a:19 b:20 ├── cardinality: [0 - 2] ├── key: (3) - └── fd: (3)-->(4), (4)-->(3) + ├── fd: (3)-->(4), (4)-->(3) + ├── scan abc_part@b_idx + │ ├── columns: a:11!null b:12!null + │ ├── constraint: /9/12: [/'east'/1 - /'east'/2] + │ ├── cardinality: [0 - 2] + │ ├── key: (11) + │ └── fd: (11)-->(12), (12)-->(11) + └── scan abc_part@b_idx + ├── columns: a:19!null b:20!null + ├── constraint: /17/20 + │ ├── [/'central'/1 - /'central'/2] + │ └── [/'west'/1 - /'west'/2] + ├── cardinality: [0 - 4] + ├── key: (19) + └── fd: (19)-->(20), (20)-->(19) + +# b is constrained to more than 10000 values. +opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan +SELECT a FROM abc_part WHERE b >= 0 AND b < 10001 +---- +project + ├── columns: a:3!null + ├── cardinality: [0 - 10001] + ├── key: (3) + └── select + ├── columns: a:3!null b:4!null + ├── cardinality: [0 - 10001] + ├── key: (3) + ├── fd: (3)-->(4), (4)-->(3) + ├── index-join abc_part + │ ├── columns: a:3!null b:4 + │ ├── key: (3) + │ ├── fd: (3)-->(4), (4)~~>(3) + │ └── scan abc_part@c_idx + │ ├── columns: a:3!null + │ ├── constraint: /1/2/5 + │ │ ├── [/'central'/1 - /'central'/3] + │ │ ├── [/'east'/1 - /'east'/3] + │ │ └── [/'west'/1 - /'west'/3] + │ └── key: (3) + └── filters + └── (b:4 >= 0) AND (b:4 < 10001) [outer=(4), constraints=(/4: [/0 - /10000]; tight)] # The spans target all remote partitions. opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan diff --git a/pkg/sql/opt_exec_factory.go b/pkg/sql/opt_exec_factory.go index edb494fb188b..ed51b74d871e 100644 --- a/pkg/sql/opt_exec_factory.go +++ b/pkg/sql/opt_exec_factory.go @@ -558,11 +558,6 @@ func (ef *execFactory) ConstructStreamingSetOp( func (ef *execFactory) ConstructUnionAll( left, right exec.Node, reqOrdering exec.OutputOrdering, hardLimit uint64, ) (exec.Node, error) { - if hardLimit > 1 { - return nil, errors.AssertionFailedf( - "locality optimized search is not yet supported for more than one row at a time", - ) - } return ef.planner.newUnionNode( tree.UnionOp, true, /* all */