Skip to content

Commit

Permalink
opt: locality optimized scan for queries with a LIMIT clause
Browse files Browse the repository at this point in the history
This commit adds locality optimized scan support for queries which
place a hard limit on the number of rows returned via the LIMIT clause.
This optimization benefits tables with REGIONAL BY ROW locality by
splitting the spans accessed into a local spans set and a remote spans
set, combined via a UNION ALL operation where each branch of the UNION
ALL has the same hard limit as the original SELECT query block. If the
limit is reached by scanning just the local spans, then latency is
improved.

The optimization is not applied if the LIMIT is more than the KV batch
size of 100000 rows or if the number of spans in the scan exceeds 10000.

This commit also adds an improvement to span merging to avoid merging
local spans with remote spans in order to maximize the number of queries
that can utilize locality optimized scan.

Fixes #64862

Release note (Performance Improvement): Queries with a LIMIT clause
applied against a single table, either explicitly written, or implicit
such as in an uncorrelated EXISTS subquery, now scan that table with
improved latency if the table is defined with LOCALITY REGIONAL BY ROW
and the number of qualified rows residing in the local region is less
than or equal to the hard limit (sum of the LIMIT clause and optional
OFFSET clause values). This optimization is only applied if the hard
limit is 100000 or less.
  • Loading branch information
Mark Sirek committed Feb 16, 2022
1 parent ffd8b81 commit 2fdd9ae
Show file tree
Hide file tree
Showing 28 changed files with 2,421 additions and 506 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ ALL_TESTS = [
"//pkg/sql/opt/optgen/lang:lang_test",
"//pkg/sql/opt/ordering:ordering_test",
"//pkg/sql/opt/partialidx:partialidx_test",
"//pkg/sql/opt/partition:partition_test",
"//pkg/sql/opt/props/physical:physical_test",
"//pkg/sql/opt/props:props_test",
"//pkg/sql/opt/testutils/opttester:opttester_test",
Expand Down
132 changes: 132 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/regional_by_row
Original file line number Diff line number Diff line change
Expand Up @@ -1674,3 +1674,135 @@ statement ok
DROP TABLE regional_by_row;
DROP TABLE regional_by_row_as;
ALTER DATABASE drop_regions DROP REGION "ca-central-1";

##############################################
# Locality optimized scans with LIMIT clause #
##############################################
# In this section we are checking expected results of queries similar to those
# in pkg/sql/opt/xform/testdata/rules/scan and
# pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior
# where query plans and rule firing is checked.

statement ok
SET database = multi_region_test_db

# LIMIT clause enables locality optimized scan on a REGIONAL BY ROW table
query IIIIT
SELECT
pk, pk2, a, b, crdb_region
FROM
regional_by_row_table
LIMIT
1
----
1 1 2 3 ap-southeast-2

## Need to scan to remote region. Only 4 rows in local region.
query IIIIT
SELECT
pk, pk2, a, b, crdb_region
FROM
regional_by_row_table
LIMIT
1
OFFSET
4
----
7 7 8 9 ca-central-1

query IIII
SELECT
pk, pk2, a, b
FROM
regional_by_row_table AS a
WHERE
pk
IN (SELECT pk FROM regional_by_row_table AS b LIMIT 3)
----
1 1 2 3
4 4 5 6
23 23 24 25

# Test partitioning on an index column
statement ok
CREATE TABLE regional_by_row_table_as4 (
pk
INT8 PRIMARY KEY,
a
INT8,
crdb_region_col
crdb_internal_region
AS (
CASE
WHEN (a % 3) = 0 THEN 'ap-southeast-2'
WHEN (a % 3) = 1 THEN 'ca-central-1'
ELSE 'us-east-1'
END
) VIRTUAL
NOT NULL,
INDEX a_idx (a),
FAMILY (pk, a)
)
LOCALITY REGIONAL BY ROW AS crdb_region_col

statement ok
INSERT
INTO
regional_by_row_table_as4
SELECT
g, g
FROM
ROWS FROM (generate_series(1, 1000)) AS g (g)

# Locality optimized scan with a range query
query I nodeidx=0
SET database = multi_region_test_db;
SELECT
count(*)
FROM
(
SELECT
*
FROM
regional_by_row_table_as4@a_idx
WHERE
a BETWEEN 1 AND 100
LIMIT
10
)
----
10

# REGIONAL BY ROW AS table with an explicit crdb_internal_region check
# constraint.
statement ok
CREATE TABLE regional_by_row_table_as1 (
pk int PRIMARY KEY,
a int,
b int,
crdb_region_col1 crdb_internal_region NOT NULL AS (
CASE
WHEN pk <= 10 THEN 'ca-central-1'
ELSE 'us-east-1'
END
) VIRTUAL CHECK(crdb_region_col1 BETWEEN 'ap-southeast-2' AND 'us-east-1'),
crdb_region_col crdb_internal_region NOT NULL AS (
CASE
WHEN pk <= 1 THEN 'ca-central-1'
ELSE 'us-east-1'
END
) VIRTUAL,
INDEX (a),
UNIQUE (b),
FAMILY (pk, a, b)
) LOCALITY REGIONAL BY ROW AS crdb_region_col1

statement ok
INSERT INTO regional_by_row_table_as1 (pk) VALUES (1), (2), (3), (10), (20)

query IIITT
SELECT pk, a, b, crdb_region_col, crdb_region_col1 FROM regional_by_row_table_as1 LIMIT 3
----
1 NULL NULL ca-central-1 ca-central-1
2 NULL NULL us-east-1 ca-central-1
3 NULL NULL us-east-1 ca-central-1
Loading

0 comments on commit 2fdd9ae

Please sign in to comment.