Skip to content

Commit

Permalink
sql: Add support for specifying window frames for window functions
Browse files Browse the repository at this point in the history
WIP on #26464:
ROWS mode is fully supported whereas RANGE works only with
UNBOUNDED PRECEDING/CURRENT ROW/UNBOUNDED FOLLOWING boundaries
(same as in PostgreSQL). Current implementation of aggregate functions
is naive (it simply computes the value of aggregate directly and
discards all the previous computations which results in quadratic time).

Release note (sql change): CockroachDB now supports custom frame
specification for window functions using ROWS (fully-supported)
and RANGE ('value' PRECEDING and 'value' FOLLOWING are not supported)
modes.
  • Loading branch information
yuzefovich committed Jun 13, 2018
1 parent 941cdc4 commit 939b7f5
Show file tree
Hide file tree
Showing 10 changed files with 794 additions and 123 deletions.
183 changes: 182 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/window
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# LogicTest: default opt parallel-stmts distsql distsql-opt distsql-metadata
# LogicTest: default

statement ok
CREATE TABLE kv (
Expand Down Expand Up @@ -1514,3 +1514,184 @@ SELECT MAX(i) * (1/j) * (ROW_NUMBER() OVER (ORDER BY MAX(i))) FROM (SELECT 1 AS
# regression test for #23798 until #10495 is fixed.
statement error function reserved for internal use
SELECT final_variance(1.2, 1.2, 123) OVER (PARTITION BY k) FROM kv


statement ok
CREATE TABLE products (
group_id serial PRIMARY KEY,
group_name VARCHAR (255) NOT NULL,
product_name VARCHAR (255) NOT NULL,
price DECIMAL (11, 2),
priceInt INT,
priceFloat FLOAT
)

statement ok
INSERT INTO products (group_name, product_name, price, priceInt, priceFloat) VALUES
('Smartphone', 'Microsoft Lumia', 200, 200, 200),
('Smartphone', 'HTC One', 400, 400, 400),
('Smartphone', 'Nexus', 500, 500, 500),
('Smartphone', 'iPhone', 900, 900, 900),
('Laptop', 'HP Elite', 1200, 1200, 1200),
('Laptop', 'Lenovo Thinkpad', 700, 700, 700),
('Laptop', 'Sony VAIO', 700, 700, 700),
('Laptop', 'Dell', 800, 800, 800),
('Tablet', 'iPad', 700, 700, 700),
('Tablet', 'Kindle Fire', 150, 150, 150),
('Tablet', 'Samsung', 200, 200, 200)

statement error cannot copy window "w" because it has a frame clause
SELECT price, max(price) OVER (w ORDER BY price) AS max_price FROM products WINDOW w AS (PARTITION BY price ROWS UNBOUNDED PRECEDING);

statement error frame start cannot be UNBOUNDED FOLLOWING
SELECT price, avg(price) OVER (w ORDER BY price ROWS BETWEEN UNBOUNDED FOLLOWING AND UNBOUNDED FOLLOWING) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame end cannot be UNBOUNDED PRECEDING
SELECT price, avg(price) OVER (w ORDER BY price ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED PRECEDING) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame starting from following row cannot end with current row
SELECT price, avg(price) OVER (w ORDER BY price ROWS 1 FOLLOWING) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame starting from current row cannot have preceding rows
SELECT price, avg(price) OVER (w ORDER BY price ROWS BETWEEN CURRENT ROW AND 1 PRECEDING) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame starting from following row cannot have preceding rows
SELECT price, avg(price) OVER (w ORDER BY price ROWS BETWEEN 1 FOLLOWING AND CURRENT ROW) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame starting from current row cannot have preceding rows
SELECT price, avg(price) OVER (w ORDER BY price ROWS BETWEEN CURRENT ROW AND 0 PRECEDING) AS avg_price FROM products WINDOW w AS (PARTITION BY price);

statement error frame starting from following row cannot have preceding rows
SELECT price, avg(price) OVER (PARTITION BY price ROWS BETWEEN 1 FOLLOWING AND 1 PRECEDING) AS avg_price FROM products;

statement error frame starting offset must not be negative
SELECT price, avg(price) OVER (PARTITION BY price ROWS -1 PRECEDING) AS avg_price FROM products;

statement error frame ending offset must not be negative
SELECT price, avg(price) OVER (PARTITION BY price ROWS BETWEEN 1 FOLLOWING AND -1 FOLLOWING) AS avg_price FROM products;

statement error RANGE PRECEDING is only supported with UNBOUNDED
SELECT product_name, price, last_value(product_name) OVER (PARTITION BY price ORDER BY price RANGE BETWEEN 100 PRECEDING AND CURRENT ROW) AS first FROM products ORDER BY price;

statement error RANGE FOLLOWING is only supported with UNBOUNDED
SELECT product_name, price, last_value(product_name) OVER (PARTITION BY price ORDER BY price RANGE BETWEEN CURRENT ROW AND 100 FOLLOWING) AS first FROM products ORDER BY price;

statement error RANGE PRECEDING is only supported with UNBOUNDED
SELECT product_name, price, last_value(product_name) OVER (PARTITION BY price ORDER BY price RANGE BETWEEN 100 PRECEDING AND 100 FOLLOWING) AS first FROM products ORDER BY price;

query TRT
SELECT product_name, price, first_value(product_name) OVER w AS first FROM products WHERE price = 200 OR price = 700 WINDOW w as (PARTITION BY price ORDER BY product_name RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY price, product_name;
----
Microsoft Lumia 200.00 Microsoft Lumia
Samsung 200.00 Microsoft Lumia
Lenovo Thinkpad 700.00 Lenovo Thinkpad
Sony VAIO 700.00 Lenovo Thinkpad
iPad 700.00 Lenovo Thinkpad

query TRT
SELECT product_name, price, last_value(product_name) OVER w AS last FROM products WHERE price = 200 OR price = 700 WINDOW w as (PARTITION BY price ORDER BY product_name RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) ORDER BY price, product_name;
----
Microsoft Lumia 200.00 Samsung
Samsung 200.00 Samsung
Lenovo Thinkpad 700.00 iPad
Sony VAIO 700.00 iPad
iPad 700.00 iPad

query TRT
SELECT product_name, price, nth_value(product_name, 2) OVER w AS second FROM products WHERE price = 200 OR price = 700 WINDOW w as (PARTITION BY price ORDER BY product_name RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ORDER BY price, product_name;
----
Microsoft Lumia 200.00 Samsung
Samsung 200.00 NULL
Lenovo Thinkpad 700.00 Sony VAIO
Sony VAIO 700.00 iPad
iPad 700.00 NULL

query TTRR
SELECT product_name, group_name, price, avg(price) OVER (PARTITION BY group_name ORDER BY price, product_name ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avg_of_three FROM products ORDER BY group_name, price, product_name;
----
Lenovo Thinkpad Laptop 700.00 700.00
Sony VAIO Laptop 700.00 733.33333333333333333
Dell Laptop 800.00 900.00
HP Elite Laptop 1200.00 1000.00
Microsoft Lumia Smartphone 200.00 300.00
HTC One Smartphone 400.00 366.66666666666666667
Nexus Smartphone 500.00 600.00
iPhone Smartphone 900.00 700.00
Kindle Fire Tablet 150.00 175.00
Samsung Tablet 200.00 350.00
iPad Tablet 700.00 450.00

query TTRR
SELECT product_name, group_name, price, avg(priceFloat) OVER (PARTITION BY group_name ORDER BY price, product_name ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avg_of_three_floats FROM products ORDER BY group_name, price, product_name;
----
Lenovo Thinkpad Laptop 700.00 700
Sony VAIO Laptop 700.00 733.3333333333334
Dell Laptop 800.00 900
HP Elite Laptop 1200.00 1000
Microsoft Lumia Smartphone 200.00 300
HTC One Smartphone 400.00 366.6666666666667
Nexus Smartphone 500.00 600
iPhone Smartphone 900.00 700
Kindle Fire Tablet 150.00 175
Samsung Tablet 200.00 350
iPad Tablet 700.00 450

query TTRR
SELECT product_name, group_name, price, avg(priceInt) OVER (PARTITION BY group_name ORDER BY price, product_name ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS avg_of_three_ints FROM products ORDER BY group_name, price, product_name;
----
Lenovo Thinkpad Laptop 700.00 700
Sony VAIO Laptop 700.00 733.33333333333333333
Dell Laptop 800.00 900
HP Elite Laptop 1200.00 1000
Microsoft Lumia Smartphone 200.00 300
HTC One Smartphone 400.00 366.66666666666666667
Nexus Smartphone 500.00 600
iPhone Smartphone 900.00 700
Kindle Fire Tablet 150.00 175
Samsung Tablet 200.00 350
iPad Tablet 700.00 450

query TTRR
SELECT group_name, product_name, price, avg(price) OVER (PARTITION BY group_name ROWS (SELECT COUNT(*) FROM PRODUCTS WHERE price = 200) PRECEDING) AS running_avg_of_three FROM products ORDER BY group_id;
----
Smartphone Microsoft Lumia 200.00 200.00
Smartphone HTC One 400.00 300.00
Smartphone Nexus 500.00 366.66666666666666667
Smartphone iPhone 900.00 600.00
Laptop HP Elite 1200.00 1200.00
Laptop Lenovo Thinkpad 700.00 950.00
Laptop Sony VAIO 700.00 866.66666666666666667
Laptop Dell 800.00 733.33333333333333333
Tablet iPad 700.00 700.00
Tablet Kindle Fire 150.00 425.00
Tablet Samsung 200.00 350.00

query TTRR
SELECT group_name, product_name, price, sum(price) OVER (PARTITION BY group_name ROWS 2 PRECEDING) AS running_sum FROM products ORDER BY group_id;
----
Smartphone Microsoft Lumia 200.00 200.00
Smartphone HTC One 400.00 600.00
Smartphone Nexus 500.00 1100.00
Smartphone iPhone 900.00 1800.00
Laptop HP Elite 1200.00 1200.00
Laptop Lenovo Thinkpad 700.00 1900.00
Laptop Sony VAIO 700.00 2600.00
Laptop Dell 800.00 2200.00
Tablet iPad 700.00 700.00
Tablet Kindle Fire 150.00 850.00
Tablet Samsung 200.00 1050.00

query TTRT
SELECT group_name, product_name, price, array_agg(price) OVER (PARTITION BY group_name ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) AS array_agg_price FROM products ORDER BY group_id;
----
Smartphone Microsoft Lumia 200.00 {200.00,400.00,500.00}
Smartphone HTC One 400.00 {200.00,400.00,500.00,900.00}
Smartphone Nexus 500.00 {400.00,500.00,900.00}
Smartphone iPhone 900.00 {500.00,900.00}
Laptop HP Elite 1200.00 {1200.00,700.00,700.00}
Laptop Lenovo Thinkpad 700.00 {1200.00,700.00,700.00,800.00}
Laptop Sony VAIO 700.00 {700.00,700.00,800.00}
Laptop Dell 800.00 {700.00,800.00}
Tablet iPad 700.00 {700.00,150.00,200.00}
Tablet Kindle Fire 150.00 {700.00,150.00,200.00}
Tablet Samsung 200.00 {150.00,200.00}
121 changes: 110 additions & 11 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,15 @@ func (u *sqlSymUnion) orders() []*tree.Order {
func (u *sqlSymUnion) groupBy() tree.GroupBy {
return u.val.(tree.GroupBy)
}
func (u *sqlSymUnion) windowFrame() *tree.WindowFrame {
return u.val.(*tree.WindowFrame)
}
func (u *sqlSymUnion) windowFrameBounds() *tree.WindowFrameBounds {
return u.val.(*tree.WindowFrameBounds)
}
func (u *sqlSymUnion) windowFrameBound() *tree.WindowFrameBound {
return u.val.(*tree.WindowFrameBound)
}
func (u *sqlSymUnion) distinctOn() tree.DistinctOn {
return u.val.(tree.DistinctOn)
}
Expand Down Expand Up @@ -926,7 +935,9 @@ func newNameFromStr(s string) *tree.Name {
%type <tree.Window> window_clause window_definition_list
%type <*tree.WindowDef> window_definition over_clause window_specification
%type <str> opt_existing_window_name
%type <empty> opt_frame_clause frame_extent frame_bound
%type <*tree.WindowFrame> opt_frame_clause
%type <*tree.WindowFrameBounds> frame_extent
%type <*tree.WindowFrameBound> frame_bound

%type <[]tree.ColumnID> opt_tableref_col_list tableref_col_list

Expand Down Expand Up @@ -7072,6 +7083,7 @@ window_specification:
RefName: tree.Name($2),
Partitions: $3.exprs(),
OrderBy: $4.orderBy(),
Frame: $5.windowFrame(),
}
}

Expand Down Expand Up @@ -7106,23 +7118,110 @@ opt_partition_clause:
// This is only a subset of the full SQL:2008 frame_clause grammar. We don't
// support <window frame exclusion> yet.
opt_frame_clause:
RANGE frame_extent { return unimplemented(sqllex, "frame range") }
| ROWS frame_extent { return unimplemented(sqllex, "frame rows") }
| /* EMPTY */ {}
RANGE frame_extent
{
bounds := $2.windowFrameBounds()
startBound := bounds.StartBound
endBound := bounds.EndBound
switch {
case startBound.BoundType == tree.ValuePreceding:
sqllex.Error("RANGE PRECEDING is only supported with UNBOUNDED")
return 1
case startBound.BoundType == tree.ValueFollowing:
sqllex.Error("RANGE FOLLOWING is only supported with UNBOUNDED")
return 1
case endBound != nil && endBound.BoundType == tree.ValuePreceding:
sqllex.Error("RANGE PRECEDING is only supported with UNBOUNDED")
return 1
case endBound != nil && endBound.BoundType == tree.ValueFollowing:
sqllex.Error("RANGE FOLLOWING is only supported with UNBOUNDED")
return 1
}
$$.val = &tree.WindowFrame{
Mode: tree.RANGE,
Bounds: bounds,
}
}
| ROWS frame_extent
{
$$.val = &tree.WindowFrame{
Mode: tree.ROWS,
Bounds: $2.windowFrameBounds(),
}
}
| /* EMPTY */
{
$$.val = (*tree.WindowFrame)(nil)
}
frame_extent:
frame_bound { return unimplemented(sqllex, "frame_extent") }
| BETWEEN frame_bound AND frame_bound { return unimplemented(sqllex, "frame_extent") }
frame_bound
{
startBound := $1.windowFrameBound()
switch {
case startBound.BoundType == tree.UnboundedFollowing:
sqllex.Error("frame start cannot be UNBOUNDED FOLLOWING")
return 1
case startBound.BoundType == tree.ValueFollowing:
sqllex.Error("frame starting from following row cannot end with current row")
return 1
}
$$.val = &tree.WindowFrameBounds{StartBound: startBound}
}
| BETWEEN frame_bound AND frame_bound
{
startBound := $2.windowFrameBound()
endBound := $4.windowFrameBound()
switch {
case startBound.BoundType == tree.UnboundedFollowing:
sqllex.Error("frame start cannot be UNBOUNDED FOLLOWING")
return 1
case endBound.BoundType == tree.UnboundedPreceding:
sqllex.Error("frame end cannot be UNBOUNDED PRECEDING")
return 1
case startBound.BoundType == tree.CurrentRow && endBound.BoundType == tree.ValuePreceding:
sqllex.Error("frame starting from current row cannot have preceding rows")
return 1
case startBound.BoundType == tree.ValueFollowing && endBound.BoundType == tree.ValuePreceding:
sqllex.Error("frame starting from following row cannot have preceding rows")
return 1
case startBound.BoundType == tree.ValueFollowing && endBound.BoundType == tree.CurrentRow:
sqllex.Error("frame starting from following row cannot have preceding rows")
return 1
}
$$.val = &tree.WindowFrameBounds{StartBound: startBound, EndBound: endBound}
}
// This is used for both frame start and frame end, with output set up on the
// assumption it's frame start; the frame_extent productions must reject
// invalid cases.
frame_bound:
UNBOUNDED PRECEDING { return unimplemented(sqllex, "frame_bound") }
| UNBOUNDED FOLLOWING { return unimplemented(sqllex, "frame_bound") }
| CURRENT ROW { return unimplemented(sqllex, "frame_bound") }
| a_expr PRECEDING { return unimplemented(sqllex, "frame_bound") }
| a_expr FOLLOWING { return unimplemented(sqllex, "frame_bound") }
UNBOUNDED PRECEDING
{
$$.val = &tree.WindowFrameBound{BoundType: tree.UnboundedPreceding}
}
| UNBOUNDED FOLLOWING
{
$$.val = &tree.WindowFrameBound{BoundType: tree.UnboundedFollowing}
}
| CURRENT ROW
{
$$.val = &tree.WindowFrameBound{BoundType: tree.CurrentRow}
}
| a_expr PRECEDING
{
$$.val = &tree.WindowFrameBound{
OffsetExpr: $1.expr(),
BoundType: tree.ValuePreceding,
}
}
| a_expr FOLLOWING
{
$$.val = &tree.WindowFrameBound{
OffsetExpr: $1.expr(),
BoundType: tree.ValueFollowing,
}
}

// Supporting nonterminals for expressions.

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/sem/builtins/aggregate_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ var aggregates = map[string]builtinDefinition{
ReturnType: tree.FixedReturnType(types.Int),
AggregateFunc: newCountRowsAggregate,
WindowFunc: func(params []types.T, evalCtx *tree.EvalContext) tree.WindowFunc {
return newAggregateWindow(newCountRowsAggregate(params, evalCtx))
return newFramableAggregateWindow(newCountRowsAggregate(params, evalCtx))
},
Info: "Calculates the number of rows.",
},
Expand Down Expand Up @@ -318,7 +318,7 @@ func makeAggOverloadWithReturnType(
ReturnType: retType,
AggregateFunc: f,
WindowFunc: func(params []types.T, evalCtx *tree.EvalContext) tree.WindowFunc {
return newAggregateWindow(f(params, evalCtx))
return newFramableAggregateWindow(f(params, evalCtx))
},
Info: info,
}
Expand Down
Loading

0 comments on commit 939b7f5

Please sign in to comment.