diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row index 8de23485a20e..17a602c9a3b9 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row +++ b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row @@ -971,6 +971,42 @@ SELECT * FROM [EXPLAIN SELECT * FROM regional_by_row_table WHERE pk = 1] OFFSET table: regional_by_row_table@primary spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1] [/'us-east-1'/1 - /'us-east-1'/1] +# Query with more than one key. +query T +SELECT * FROM [EXPLAIN (DISTSQL) SELECT * FROM regional_by_row_table WHERE pk IN (1, 4)] OFFSET 2 +---- +· +• union all +│ limit: 2 +│ +├── • scan +│ missing stats +│ table: regional_by_row_table@primary +│ spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1] [/'ap-southeast-2'/4 - /'ap-southeast-2'/4] +│ +└── • scan + missing stats + table: regional_by_row_table@primary + spans: [/'ca-central-1'/1 - /'ca-central-1'/1] [/'ca-central-1'/4 - /'ca-central-1'/4] [/'us-east-1'/1 - /'us-east-1'/1] [/'us-east-1'/4 - /'us-east-1'/4] +· +Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJy0ksGK2zAQhu99imFO3aJiy_FC0cmldWggTbZxoIWNCVprSEUdyZVkukvwuxc7h21KGjYJPWo0n-eb39qh_1mjwCKf5h-W8AbGi_lnuM-_3U3fT2bw-uOkWBZfpjdw2OBoo62R9frhae3sr3WQDzXB10_5IofmB_QkZ5DelDAfj4t8CQkyNFbRTG7Jo7hHjiXDxtmKvLeuL-2Ghol6RBEz1KZpQ18uGVbWEYodBh1qQoHLftqCpCIXxchQUZC6Hj57VCxrnN5K94QMi0YaLyBa4Wr1-C5eYcSjGKRRwMGG7-SQ4bwNAjLOsoRlI5alLLvFsmNo2_Ds5IPcEArescu8-RXe1f_0ftZtjXWKHKkD1bI7stnMvrVNlPzVeHx0cmlko0sjy66Oa_SCuDw5LWs4O7X0cK2p3uoAyT9V4nPiW5BvrPH0oh8T946kNrTfy9vWVXTnbDWM2R_nAzcUFPmwv032h4kZroan9SfMr4GTk3B6Gh5dA6cn4dsDOO7K7tXvAAAA__8ZxrdU + +statement ok +SET tracing = on,kv,results; SELECT * FROM regional_by_row_table WHERE pk IN (1, 4); SET tracing = off + +# Both rows are found in the local region, so the other regions are not searched. +query T +SELECT message FROM [SHOW KV TRACE FOR SESSION] WITH ORDINALITY + WHERE message LIKE 'fetched:%' OR message LIKE 'output row%' + OR message LIKE 'Scan%' + ORDER BY ordinality ASC +---- +Scan /Table/73/1/"@"/1/0, /Table/73/1/"@"/4/0 +fetched: /regional_by_row_table/primary/'ap-southeast-2'/1/pk2/a/b/j -> /1/2/3/'{"a": "b"}' +output row: [1 1 2 3 '{"a": "b"}'] +fetched: /regional_by_row_table/primary/'ap-southeast-2'/4/pk2/a/b/j -> /4/5/6/'{"c": "d"}' +output row: [4 4 5 6 '{"c": "d"}'] + # Tests using locality optimized search for lookup joins (including foreign # key checks). statement ok diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go index 5db0f9b7a14c..19c54d1799cb 100644 --- a/pkg/sql/opt/exec/execbuilder/relational.go +++ b/pkg/sql/opt/exec/execbuilder/relational.go @@ -1485,11 +1485,6 @@ func (b *Builder) buildSetOp(set memo.RelExpr) (execPlan, error) { // child. // TODO(rytaft): Store the limit in the expression. hardLimit = uint64(set.Relational().Cardinality.Max) - if hardLimit > 1 { - panic(errors.AssertionFailedf( - "locality optimized search is not yet supported for more than one row at a time", - )) - } } ep := execPlan{} diff --git a/pkg/sql/opt/xform/BUILD.bazel b/pkg/sql/opt/xform/BUILD.bazel index d8b16139a6fb..d4bd8cf76a4c 100644 --- a/pkg/sql/opt/xform/BUILD.bazel +++ b/pkg/sql/opt/xform/BUILD.bazel @@ -39,6 +39,7 @@ go_library( "//pkg/sql/opt/partialidx", "//pkg/sql/opt/props", "//pkg/sql/opt/props/physical", + "//pkg/sql/rowinfra", "//pkg/sql/sem/tree", "//pkg/sql/types", "//pkg/util", diff --git a/pkg/sql/opt/xform/scan_funcs.go b/pkg/sql/opt/xform/scan_funcs.go index 872a7850b31d..64bde1cdc4fd 100644 --- a/pkg/sql/opt/xform/scan_funcs.go +++ b/pkg/sql/opt/xform/scan_funcs.go @@ -15,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/cockroach/pkg/sql/rowinfra" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/errors" ) @@ -146,11 +147,13 @@ func (c *CustomFuncs) CanMaybeGenerateLocalityOptimizedScan(scanPrivate *memo.Sc func (c *CustomFuncs) GenerateLocalityOptimizedScan( grp memo.RelExpr, scanPrivate *memo.ScanPrivate, ) { - // We can only generate a locality optimized scan if we know there is at - // most one row produced by the local spans. - // TODO(rytaft): We may be able to expand this to allow any number of rows, - // as long as there is a hard upper bound. - if !grp.Relational().Cardinality.IsZeroOrOne() { + // We can only generate a locality optimized scan if we know there is a hard + // upper bound on the number of rows produced by the local spans. We use the + // kv batch size as the limit, since it's probably better to use DistSQL once + // we're scanning multiple batches. + // TODO(rytaft): Revisit this when we have a more accurate cost model for data + // distribution. + if rowinfra.KeyLimit(grp.Relational().Cardinality.Max) > rowinfra.ProductionKVBatchSize { return } diff --git a/pkg/sql/opt/xform/testdata/rules/scan b/pkg/sql/opt/xform/testdata/rules/scan index 9186899fa7ee..bc4709c2cdf6 100644 --- a/pkg/sql/opt/xform/testdata/rules/scan +++ b/pkg/sql/opt/xform/testdata/rules/scan @@ -725,23 +725,62 @@ index-join abc_part ├── key: () └── fd: ()-->(17-19,21) -# b is not constrained to a single value. -opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan +# b is constrained to multiple values. +opt locality=(region=east) expect=GenerateLocalityOptimizedScan SELECT a FROM abc_part WHERE b IN (1, 2) ---- project ├── columns: a:3!null ├── cardinality: [0 - 2] ├── key: (3) - └── scan abc_part@b_idx + └── locality-optimized-search ├── columns: a:3!null b:4!null - ├── constraint: /1/4 - │ ├── [/'central'/1 - /'central'/2] - │ ├── [/'east'/1 - /'east'/2] - │ └── [/'west'/1 - /'west'/2] + ├── left columns: a:11 b:12 + ├── right columns: a:19 b:20 ├── cardinality: [0 - 2] ├── key: (3) - └── fd: (3)-->(4), (4)-->(3) + ├── fd: (3)-->(4), (4)-->(3) + ├── scan abc_part@b_idx + │ ├── columns: a:11!null b:12!null + │ ├── constraint: /9/12: [/'east'/1 - /'east'/2] + │ ├── cardinality: [0 - 2] + │ ├── key: (11) + │ └── fd: (11)-->(12), (12)-->(11) + └── scan abc_part@b_idx + ├── columns: a:19!null b:20!null + ├── constraint: /17/20 + │ ├── [/'central'/1 - /'central'/2] + │ └── [/'west'/1 - /'west'/2] + ├── cardinality: [0 - 4] + ├── key: (19) + └── fd: (19)-->(20), (20)-->(19) + +# b is constrained to more than 100000 values (the kv batch size). +opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan +SELECT a FROM abc_part WHERE b >= 0 AND b < 100001 +---- +project + ├── columns: a:3!null + ├── cardinality: [0 - 100001] + ├── key: (3) + └── select + ├── columns: a:3!null b:4!null + ├── cardinality: [0 - 100001] + ├── key: (3) + ├── fd: (3)-->(4), (4)-->(3) + ├── index-join abc_part + │ ├── columns: a:3!null b:4 + │ ├── key: (3) + │ ├── fd: (3)-->(4), (4)~~>(3) + │ └── scan abc_part@c_idx + │ ├── columns: a:3!null + │ ├── constraint: /1/2/5 + │ │ ├── [/'central'/1 - /'central'/3] + │ │ ├── [/'east'/1 - /'east'/3] + │ │ └── [/'west'/1 - /'west'/3] + │ └── key: (3) + └── filters + └── (b:4 >= 0) AND (b:4 < 100001) [outer=(4), constraints=(/4: [/0 - /100000]; tight)] # The spans target all remote partitions. opt locality=(region=east) expect-not=GenerateLocalityOptimizedScan diff --git a/pkg/sql/opt_exec_factory.go b/pkg/sql/opt_exec_factory.go index edb494fb188b..ed51b74d871e 100644 --- a/pkg/sql/opt_exec_factory.go +++ b/pkg/sql/opt_exec_factory.go @@ -558,11 +558,6 @@ func (ef *execFactory) ConstructStreamingSetOp( func (ef *execFactory) ConstructUnionAll( left, right exec.Node, reqOrdering exec.OutputOrdering, hardLimit uint64, ) (exec.Node, error) { - if hardLimit > 1 { - return nil, errors.AssertionFailedf( - "locality optimized search is not yet supported for more than one row at a time", - ) - } return ef.planner.newUnionNode( tree.UnionOp, true, /* all */