Skip to content

Commit

Permalink
Merge #44492
Browse files Browse the repository at this point in the history
44492: opt: make the partitioned constraints examples more readable r=RaduBerinde a=RaduBerinde

The examples around the code for constraining scans using partitions
are very hard to read because of the very long timestamps. Changing
relevant examples and testcases to use an integer column instead, and
realinging some of the comments to make them more readable.

Release note: None

Co-authored-by: Radu Berinde <[email protected]>
  • Loading branch information
craig[bot] and RaduBerinde committed Jan 29, 2020
2 parents 553c9a2 + b95fbff commit 98f00d5
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,18 @@ select
└── filters
└── b = 3

# The orders example that inspired the partitioning index scan.
# The orders example that inspired the partitioning index scan (with creation
# timestamp replaced with sequence number to reduce clutter in spans).
statement ok
CREATE TABLE orders (
region STRING NOT NULL, id INT8 NOT NULL, total DECIMAL NOT NULL, created_at TIMESTAMP NOT NULL,
region STRING NOT NULL, id INT8 NOT NULL, total DECIMAL NOT NULL, seq_num INT NOT NULL,
PRIMARY KEY (region, id)
)

# Create an index with the appropriate partitions.
statement ok
CREATE INDEX orders_by_created_at
ON orders (region, created_at, id)
CREATE INDEX orders_by_seq_num
ON orders (region, seq_num, id)
STORING (total)
PARTITION BY LIST (region)
(
Expand All @@ -91,25 +92,25 @@ CREATE INDEX orders_by_created_at

# The index is used instead of the table scan.
query T
EXPLAIN (OPT) SELECT sum(total) FROM "orders" WHERE created_at >= '2019-05-04' AND created_at < '2019-05-05';
EXPLAIN (OPT) SELECT sum(total) FROM "orders" WHERE seq_num >= 100 AND seq_num < 200;
----
scalar-group-by
├── select
│ ├── scan orders@orders_by_created_at
│ │ └── constraint: /1/4/2: [ - /'europe-west2') [/'europe-west2'/'2019-05-04 00:00:00+00:00' - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] [/e'europe-west2\x00'/'2019-05-04 00:00:00+00:00' - /'us-east1') [/'us-east1'/'2019-05-04 00:00:00+00:00' - /'us-east1'/'2019-05-04 23:59:59.999999+00:00'] [/e'us-east1\x00'/'2019-05-04 00:00:00+00:00' - /'us-west1') [/'us-west1'/'2019-05-04 00:00:00+00:00' - /'us-west1'/'2019-05-04 23:59:59.999999+00:00'] [/e'us-west1\x00'/'2019-05-04 00:00:00+00:00' - ]
│ ├── scan orders@orders_by_seq_num
│ │ └── constraint: /1/4/2: [ - /'europe-west2') [/'europe-west2'/100 - /'europe-west2'/199] [/e'europe-west2\x00'/100 - /'us-east1') [/'us-east1'/100 - /'us-east1'/199] [/e'us-east1\x00'/100 - /'us-west1') [/'us-west1'/100 - /'us-west1'/199] [/e'us-west1\x00'/100 - ]
│ └── filters
│ └── (created_at >= '2019-05-04 00:00:00+00:00') AND (created_at < '2019-05-05 00:00:00+00:00')
│ └── (seq_num >= 100) AND (seq_num < 200)
└── aggregations
└── sum
└── variable: total

# The partition values are not required as the index is constrained as is.
query T
EXPLAIN (OPT) SELECT sum(total) FROM "orders" WHERE region = 'us-east1' AND created_at >= '2019-05-04' AND created_at < '2019-05-05';
EXPLAIN (OPT) SELECT sum(total) FROM "orders" WHERE region = 'us-east1' AND seq_num >= 100 AND seq_num < 200;
----
scalar-group-by
├── scan orders@orders_by_created_at
│ └── constraint: /1/4/2: [/'us-east1'/'2019-05-04 00:00:00+00:00' - /'us-east1'/'2019-05-04 23:59:59.999999+00:00']
├── scan orders@orders_by_seq_num
│ └── constraint: /1/4/2: [/'us-east1'/100 - /'us-east1'/199]
└── aggregations
└── sum
└── variable: total
Expand Down
49 changes: 30 additions & 19 deletions pkg/sql/opt/xform/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ func (c *CustomFuncs) GenerateConstrainedScans(
// also add the spans for the in between ranges. Consider the following index
// and its partition:
//
// CREATE INDEX orders_by_created_at
// ON orders (region, created_at, id)
// CREATE INDEX orders_by_seq_num
// ON orders (region, seq_num, id)
// STORING (total)
// PARTITION BY LIST (region)
// (
Expand All @@ -262,10 +262,11 @@ func (c *CustomFuncs) GenerateConstrainedScans(
// )
//
// The constraint generated for the query:
// SELECT sum(total) FROM orders WHERE created_at >= '2019-05-04' AND created_at < '2019-05-05'
// SELECT sum(total) FROM orders WHERE seq_num >= 100 AND seq_num < 200
// is:
//
// [/'europe-west2'/'2019-05-04 00:00:00+00:00' - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] [/'us-east1'/'2019-05-04 00:00:00+00:00' - /'us-east1'/'2019-05-04 23:59:59.999999+00:00'] [/'us-west1'/'2019-05-04 00:00:00+00:00' - /'us-west1'/'2019-05-04 23:59:59.999999+00:00']
// [/'europe-west2'/100 - /'europe-west2'/199]
// [/'us-east1'/100 - /'us-east1'/199]
// [/'us-west1'/100 - /'us-west1'/199]
//
// You'll notice that the spans before europe-west2, after us-west1 and in between
// the defined partitions are missing. We must add these spans now, appropriately
Expand All @@ -277,15 +278,20 @@ func (c *CustomFuncs) GenerateConstrainedScans(
// Using the partitioning example and the query above, if we added the in between
// spans at the same time as the partitioned ones, we would end up with a span that
// looked like:
// [ - /'europe-west2'/99]
//
// [ - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] ...
//
// However, allowing the partition spans to be constrained further and then adding the
// spans give us a more constrained index scan as shown below:
// Allowing the partition spans to be constrained further and then adding
// the spans give us a more constrained index scan as shown below:
// [ - /'europe-west2')
// [/'europe-west2'/100 - /'europe-west2'/199]
// [/e'europe-west2\x00'/100 - /'us-east1')
// [/'us-east1'/100 - /'us-east1'/199]
// [/e'us-east1\x00'/100 - /'us-west1')
// [/'us-west1'/100 - /'us-west1'/199]
// [/e'us-west1\x00'/100 - ]
//
// [ - /'europe-west2') [/'europe-west2'/'2019-05-04 00:00:00+00:00' - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] ...
// Notice how we 'skip' all the europe-west2 rows with seq_num < 100.
//
// Notice how we 'skip' all the europe-west2 values that satisfy (created_at < '2019-05-04')
if isIndexPartitioned {
inBetweenConstraint, inBetweenRemainingFilters, ok := c.tryConstrainIndex(
constrainedInBetweenFilters, scanPrivate.Table, iter.indexOrdinal, false /* isInverted */)
Expand Down Expand Up @@ -782,12 +788,12 @@ func (c *CustomFuncs) constructOr(conditions memo.ScalarListExpr) opt.ScalarExpr
// For example consider the following table and partitioned index:
//
// CREATE TABLE orders (
// region STRING NOT NULL, id INT8 NOT NULL, total DECIMAL NOT NULL, created_at TIMESTAMP NOT NULL,
// region STRING NOT NULL, id INT8 NOT NULL, total DECIMAL NOT NULL, seq_num INT NOT NULL,
// PRIMARY KEY (region, id)
// )
//
// CREATE INDEX orders_by_created_at
// ON orders (region, created_at, id)
// CREATE INDEX orders_by_seq_num
// ON orders (region, seq_num, id)
// STORING (total)
// PARTITION BY LIST (region)
// (
Expand All @@ -797,20 +803,25 @@ func (c *CustomFuncs) constructOr(conditions memo.ScalarListExpr) opt.ScalarExpr
// )
//
// Now consider the following query:
// SELECT sum(total) FROM orders WHERE created_at >= '2019-05-04' AND created_at < '2019-05-05'
// SELECT sum(total) FROM orders WHERE seq_num >= 100 AND seq_num < 200
//
// Normally, the index would not be utilized but because we know what the
// partition values are for the prefix of the index, we can generate
// filters that allow us to use the index (adding the appropriate in-between
// filters to catch all the values that are not part of the partitions).
// By doing so, we get the following plan:
// ----
// scalar-group-by
// ├── select
// │ ├── scan orders@orders_by_created_at
// │ │ └── constraint: /1/4/2: [ - /'europe-west2') [/'europe-west2'/'2019-05-04 00:00:00+00:00' - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] [/e'europe-west2\x00'/'2019-05-04 00:00:00+00:00' - /'us-east1') [/'us-east1'/'2019-05-04 00:00:00+00:00' - /'us-east1'/'2019-05-04 23:59:59.999999+00:00'] [/e'us-east1\x00'/'2019-05-04 00:00:00+00:00' - /'us-west1') [/'us-west1'/'2019-05-04 00:00:00+00:00' - /'us-west1'/'2019-05-04 23:59:59.999999+00:00'] [/e'us-west1\x00'/'2019-05-04 00:00:00+00:00' - ]
// │ ├── scan orders@orders_by_seq_num
// │ │ └── constraint: /1/4/2: [ - /'europe-west2')
// │ │ [/'europe-west2'/100 - /'europe-west2'/199]
// │ │ [/e'europe-west2\x00'/100 - /'us-east1')
// │ │ [/'us-east1'/100 - /'us-east1'/199]
// │ │ [/e'us-east1\x00'/100 - /'us-west1')
// │ │ [/'us-west1'/100 - /'us-west1'/199]
// │ │ [/e'us-west1\x00'/100 - ]
// │ └── filters
// │ └── (created_at >= '2019-05-04 00:00:00+00:00') AND (created_at < '2019-05-05 00:00:00+00:00')
// │ └── (seq_num >= 100) AND (seq_num < 200)
// └── aggregations
// └── sum
// └── variable: total
Expand Down
16 changes: 9 additions & 7 deletions pkg/sql/opt/xform/testdata/rules/scan
Original file line number Diff line number Diff line change
Expand Up @@ -531,14 +531,16 @@ select
└── s COLLATE en = 'hello' COLLATE en [type=bool, outer=(1)]

# Realistic example where using constraints as filters help.
# An even more realistic exmple would have a creation timestamp instead of a
# seq_num integer, but that makes the plans much more cluttered.
exec-ddl
CREATE TABLE "orders" (
region STRING NOT NULL,
id INT NOT NULL,
total DECIMAL NOT NULL,
created_at TIMESTAMP NOT NULL,
seq_num INT NOT NULL,
PRIMARY KEY (region, id),
UNIQUE INDEX orders_by_created_at (region, created_at, id) STORING (total),
UNIQUE INDEX orders_by_seq_num (region, seq_num, id) STORING (total),
CHECK (region IN ('us-east1', 'us-west1', 'europe-west2'))
)
----
Expand Down Expand Up @@ -567,7 +569,7 @@ ALTER TABLE "orders" INJECT STATISTICS '[
"created_at": "2018-01-01 1:00:00.00000+00:00"
},
{
"columns": ["created_at"],
"columns": ["seq_num"],
"distinct_count": 50,
"null_count": 0,
"row_count": 100,
Expand All @@ -577,16 +579,16 @@ ALTER TABLE "orders" INJECT STATISTICS '[
----

opt
SELECT sum(total) FROM "orders" WHERE created_at >= '2019-05-04' AND created_at < '2019-05-05'
SELECT sum(total) FROM "orders" WHERE seq_num >= 100 AND seq_num < 200
----
scalar-group-by
├── columns: sum:5(decimal)
├── cardinality: [1 - 1]
├── key: ()
├── fd: ()-->(5)
├── scan orders@orders_by_created_at
│ ├── columns: total:3(decimal!null) created_at:4(timestamp!null)
│ └── constraint: /1/4/2: [/'europe-west2'/'2019-05-04 00:00:00+00:00' - /'europe-west2'/'2019-05-04 23:59:59.999999+00:00'] [/'us-east1'/'2019-05-04 00:00:00+00:00' - /'us-east1'/'2019-05-04 23:59:59.999999+00:00'] [/'us-west1'/'2019-05-04 00:00:00+00:00' - /'us-west1'/'2019-05-04 23:59:59.999999+00:00']
├── scan orders@orders_by_seq_num
│ ├── columns: total:3(decimal!null) seq_num:4(int!null)
│ └── constraint: /1/4/2: [/'europe-west2'/100 - /'europe-west2'/199] [/'us-east1'/100 - /'us-east1'/199] [/'us-west1'/100 - /'us-west1'/199]
└── aggregations
└── sum [type=decimal, outer=(3)]
└── variable: total [type=decimal]
Expand Down

0 comments on commit 98f00d5

Please sign in to comment.