Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

executor: support window function row number #9098

Merged
merged 12 commits into from
Feb 18, 2019
32 changes: 22 additions & 10 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/pingcap/tidb/distsql"
"github.com/pingcap/tidb/domain"
"github.com/pingcap/tidb/executor/aggfuncs"
"github.com/pingcap/tidb/executor/windowfunc"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/infoschema"
Expand Down Expand Up @@ -1885,17 +1886,28 @@ func (b *executorBuilder) buildWindow(v *plannercore.PhysicalWindow) *WindowExec
for _, item := range v.PartitionBy {
groupByItems = append(groupByItems, item.Col)
}
aggDesc := aggregation.NewAggFuncDesc(b.ctx, v.WindowFuncDesc.Name, v.WindowFuncDesc.Args, false)
resultColIdx := len(v.Schema().Columns) - 1
agg := aggfuncs.Build(b.ctx, aggDesc, resultColIdx)
if agg == nil {
b.err = errors.Trace(errors.New("window evaluator only support aggregation functions without frame now"))
return nil
var processor windowProcessor
if aggregation.IsAggFuncs(v.WindowFuncDesc.Name) {
aggDesc := aggregation.NewAggFuncDesc(b.ctx, v.WindowFuncDesc.Name, v.WindowFuncDesc.Args, false)
agg := aggfuncs.Build(b.ctx, aggDesc, resultColIdx)
processor = &aggWindowProcessor{
windowFunc: agg,
partialResult: agg.AllocPartialResult(),
}
} else {
var wf windowfuncs.WindowFunc
wf, b.err = windowfuncs.Build(b.ctx, v.WindowFuncDesc, resultColIdx)
if b.err != nil {
return nil
}
processor = &noFrameWindowProcessor{
windowFunc: wf,
partialResult: wf.AllocPartialResult(),
}
}
e := &WindowExec{baseExecutor: base,
windowFunc: agg,
partialResult: agg.AllocPartialResult(),
groupChecker: newGroupChecker(b.ctx.GetSessionVars().StmtCtx, groupByItems),
return &WindowExec{baseExecutor: base,
processor: processor,
groupChecker: newGroupChecker(b.ctx.GetSessionVars().StmtCtx, groupByItems),
}
return e
}
105 changes: 85 additions & 20 deletions executor/window.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@ import (
"context"
"time"

"github.com/cznic/mathutil"
"github.com/opentracing/opentracing-go"
"github.com/pingcap/errors"
"github.com/pingcap/tidb/executor/aggfuncs"
"github.com/pingcap/tidb/executor/windowfunc"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/chunk"
)

// WindowExec is the executor for window functions. Note that it only supports aggregation without frame clause now.
// WindowExec is the executor for window functions.
type WindowExec struct {
baseExecutor

Expand All @@ -32,12 +35,11 @@ type WindowExec struct {
inputRow chunk.Row
groupRows []chunk.Row
childResults []*chunk.Chunk
windowFunc aggfuncs.AggFunc
partialResult aggfuncs.PartialResult
executed bool
meetNewGroup bool
remainingRowsInGroup int64
remainingRowsInGroup int
remainingRowsInChunk int
processor windowProcessor
}

// Close implements the Executor Close interface.
Expand Down Expand Up @@ -84,7 +86,7 @@ func (e *WindowExec) consumeOneGroup(ctx context.Context, chk *chunk.Chunk) erro
return errors.Trace(err)
}
if e.meetNewGroup {
err := e.consumeGroupRows()
err := e.consumeGroupRows(chk)
if err != nil {
return errors.Trace(err)
}
Expand All @@ -93,6 +95,7 @@ func (e *WindowExec) consumeOneGroup(ctx context.Context, chk *chunk.Chunk) erro
return errors.Trace(err)
}
}
e.remainingRowsInGroup++
e.groupRows = append(e.groupRows, e.inputRow)
if e.meetNewGroup {
e.inputRow = e.inputIter.Next()
Expand All @@ -102,16 +105,19 @@ func (e *WindowExec) consumeOneGroup(ctx context.Context, chk *chunk.Chunk) erro
return nil
}

func (e *WindowExec) consumeGroupRows() error {
func (e *WindowExec) consumeGroupRows(chk *chunk.Chunk) (err error) {
if len(e.groupRows) == 0 {
return nil
}
err := e.windowFunc.UpdatePartialResult(e.ctx, e.groupRows, e.partialResult)
e.copyChk(chk)
remained := mathutil.Min(e.remainingRowsInChunk, e.remainingRowsInGroup)
oldRemained := remained
e.groupRows, remained, err = e.processor.consumeGroupRows(e.ctx, e.groupRows, chk, remained)
if err != nil {
return errors.Trace(err)
}
e.remainingRowsInGroup += int64(len(e.groupRows))
e.groupRows = e.groupRows[:0]
e.remainingRowsInGroup -= oldRemained - remained
e.remainingRowsInChunk -= oldRemained - remained
return nil
}

Expand All @@ -121,7 +127,7 @@ func (e *WindowExec) fetchChildIfNecessary(ctx context.Context, chk *chunk.Chunk
}

// Before fetching a new batch of input, we should consume the last group rows.
err = e.consumeGroupRows()
err = e.consumeGroupRows(chk)
if err != nil {
return errors.Trace(err)
}
Expand All @@ -145,19 +151,18 @@ func (e *WindowExec) fetchChildIfNecessary(ctx context.Context, chk *chunk.Chunk
}

// appendResult2Chunk appends result of the window function to the result chunk.
func (e *WindowExec) appendResult2Chunk(chk *chunk.Chunk) error {
func (e *WindowExec) appendResult2Chunk(chk *chunk.Chunk) (err error) {
e.copyChk(chk)
for e.remainingRowsInGroup > 0 && e.remainingRowsInChunk > 0 {
// TODO: We can extend the agg func interface to avoid the `for` loop here.
err := e.windowFunc.AppendFinalResult2Chunk(e.ctx, e.partialResult, chk)
if err != nil {
return err
}
e.remainingRowsInGroup--
e.remainingRowsInChunk--
remained := mathutil.Min(e.remainingRowsInChunk, e.remainingRowsInGroup)
oldRemained := remained
e.groupRows, remained, err = e.processor.appendResult2Chunk(e.ctx, e.groupRows, chk, remained)
if err != nil {
return err
}
e.remainingRowsInGroup -= oldRemained - remained
e.remainingRowsInChunk -= oldRemained - remained
if e.remainingRowsInGroup == 0 {
e.windowFunc.ResetPartialResult(e.partialResult)
e.processor.resetPartialResult()
}
return nil
}
Expand All @@ -174,3 +179,63 @@ func (e *WindowExec) copyChk(chk *chunk.Chunk) {
chk.MakeRefTo(i, childResult, col.Index)
}
}

// windowProcessor is the interface for processing different kinds of window functions.
type windowProcessor interface {
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
consumeGroupRows(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error)
appendResult2Chunk(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error)
resetPartialResult()
}

type aggWindowProcessor struct {
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
windowFunc aggfuncs.AggFunc
partialResult aggfuncs.PartialResult
}

func (p *aggWindowProcessor) consumeGroupRows(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
err := p.windowFunc.UpdatePartialResult(ctx, rows, p.partialResult)
rows = rows[:0]
return rows, remained, err
}

func (p *aggWindowProcessor) appendResult2Chunk(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
for remained > 0 {
// TODO: We can extend the agg func interface to avoid the `for` loop here.
err := p.windowFunc.AppendFinalResult2Chunk(ctx, p.partialResult, chk)
if err != nil {
return rows, remained, err
}
remained--
}
return rows, remained, nil
}

func (p *aggWindowProcessor) resetPartialResult() {
p.windowFunc.ResetPartialResult(p.partialResult)
}

type noFrameWindowProcessor struct {
windowFunc windowfuncs.WindowFunc
partialResult windowfuncs.PartialResult
}

func (p *noFrameWindowProcessor) consumeGroupRows(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
var err error
rows, remained, err = p.windowFunc.ProcessOneChunk(ctx, rows, p.partialResult, chk, remained)
return rows, remained, err
}

func (p *noFrameWindowProcessor) appendResult2Chunk(ctx sessionctx.Context, rows []chunk.Row, chk *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
var err error
for remained > 0 {
rows, remained, err = p.windowFunc.ExhaustResult(ctx, rows, p.partialResult, chk, remained)
if err != nil {
return rows, remained, err
}
}
return rows, remained, err
}

func (p *noFrameWindowProcessor) resetPartialResult() {
p.windowFunc.ResetPartialResult(p.partialResult)
}
5 changes: 5 additions & 0 deletions executor/window_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,9 @@ func (s *testSuite2) TestWindowFunctions(c *C) {
result.Check(testkit.Rows("21", "21", "21", "21", "21", "21", "21", "21", "21"))
result = tk.MustQuery("select _tidb_rowid, sum(t.a) over() from t")
result.Check(testkit.Rows("1 7", "2 7", "3 7"))

result = tk.MustQuery("select a, row_number() over() from t")
result.Check(testkit.Rows("1 1", "4 2", "2 3"))
result = tk.MustQuery("select a, row_number() over(partition by a) from t")
result.Check(testkit.Rows("1 1", "2 1", "4 1"))
}
34 changes: 34 additions & 0 deletions executor/windowfunc/builder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package windowfuncs

import (
"github.com/pingcap/parser/ast"
"github.com/pingcap/tidb/expression/aggregation"
"github.com/pingcap/tidb/sessionctx"
"github.com/pkg/errors"
alivxxx marked this conversation as resolved.
Show resolved Hide resolved
)

// Build builds window functions according to the window functions description.
func Build(sctx sessionctx.Context, windowFuncDesc *aggregation.WindowFuncDesc, ordinal int) (WindowFunc, error) {
eurekaka marked this conversation as resolved.
Show resolved Hide resolved
switch windowFuncDesc.Name {
case ast.WindowFuncRowNumber:
return buildRowNumber(ordinal)
}
return nil, errors.Errorf("not supported window function %s", windowFuncDesc.Name)
}

func buildRowNumber(ordinal int) (WindowFunc, error) {
return &rowNumber{baseWindowFunc: baseWindowFunc{ordinal: ordinal}}, nil
}
51 changes: 51 additions & 0 deletions executor/windowfunc/func_row_number.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package windowfuncs

import (
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/chunk"
)

type rowNumber struct {
baseWindowFunc
}

type partialResult4RowNumber struct {
curIdx int64
}

func (wf *rowNumber) ProcessOneChunk(sctx sessionctx.Context, rows []chunk.Row, pr PartialResult, dest *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
p := (*partialResult4RowNumber)(pr)
processedRows, numRows := 0, len(rows)
for processedRows < numRows && processedRows < remained {
p.curIdx++
processedRows++
dest.AppendInt64(wf.ordinal, p.curIdx)
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
}
return rows[processedRows:], remained - processedRows, nil
}

func (wf *rowNumber) ExhaustResult(sctx sessionctx.Context, rows []chunk.Row, pr PartialResult, dest *chunk.Chunk, remained int) ([]chunk.Row, int, error) {
return rows, remained, nil
}

func (wf *rowNumber) AllocPartialResult() PartialResult {
return PartialResult(&partialResult4RowNumber{})
}

func (wf *rowNumber) ResetPartialResult(pr PartialResult) {
p := (*partialResult4RowNumber)(pr)
p.curIdx = 0
}
49 changes: 49 additions & 0 deletions executor/windowfunc/window_funcs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2019 PingCAP, Inc.
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package windowfuncs

import (
"unsafe"

"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/util/chunk"
)

// PartialResult represents data structure to store the partial result for the
// aggregate functions. Here we use unsafe.Pointer to allow the partial result
// to be any type.
type PartialResult unsafe.Pointer
qw4990 marked this conversation as resolved.
Show resolved Hide resolved

// WindowFunc is the interface for processing window functions.
type WindowFunc interface {
// ProcessOneChunk processes one chunk and write results to chunk.
ProcessOneChunk(sctx sessionctx.Context, rows []chunk.Row, pr PartialResult, dest *chunk.Chunk, remainedRows int) ([]chunk.Row, int, error)
zz-jason marked this conversation as resolved.
Show resolved Hide resolved
// ExhaustResult exhausts result to chunk.
ExhaustResult(sctx sessionctx.Context, rows []chunk.Row, pr PartialResult, dest *chunk.Chunk, remainedRows int) ([]chunk.Row, int, error)
// AllocPartialResult allocates a specific data structure to store the partial result.
AllocPartialResult() PartialResult
// ResetPartialResult resets the partial result.
ResetPartialResult(pr PartialResult)
}

type baseWindowFunc struct {
// args stores the input arguments for an aggregate function, we should
// call arg.EvalXXX to get the actual input data for this function.
args []expression.Expression

// ordinal stores the ordinal of the columns in the output chunk, which is
// used to append the final result of this function.
ordinal int
}
Loading