Skip to content

Commit

Permalink
Parse summarize operator
Browse files Browse the repository at this point in the history
Fixes #6
Updates #11
  • Loading branch information
zombiezen committed Feb 1, 2024
1 parent aed80e9 commit 0156667
Show file tree
Hide file tree
Showing 3 changed files with 353 additions and 0 deletions.
40 changes: 40 additions & 0 deletions parser/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,46 @@ func (op *ProjectColumn) Span() Span {
return newSpan(op.Name.NameSpan.Start, op.X.Span().End)
}

// SummarizeOperator represents a `| summarize` operator in a [TabularExpr].
// It implements [TabularOperator].
type SummarizeOperator struct {
Pipe Span
Keyword Span
Cols []*SummarizeColumn
By Span
GroupBy []*SummarizeColumn
}

func (op *SummarizeOperator) tabularOperator() {}

func (op *SummarizeOperator) Span() Span {
switch {
case len(op.GroupBy) > 0:
return newSpan(op.Pipe.Start, op.GroupBy[len(op.GroupBy)-1].Span().End)
case len(op.Cols) > 0:
return newSpan(op.Pipe.Start, op.Cols[len(op.Cols)-1].Span().End)
default:
// Not technically valid, but want to avoid a panic.
return newSpan(op.Pipe.Start, op.Keyword.End)
}
}

// A SummarizeColumn is a single column term in a [SummarizeOperator].
// It consists of an expression, optionally preceded by a column name.
// If the column name is omitted, one is derived from the expression.
type SummarizeColumn struct {
Name *Ident
Assign Span
X Expr
}

func (op *SummarizeColumn) Span() Span {
if op.Name == nil {
return op.X.Span()
}
return newSpan(op.Name.NameSpan.Start, op.X.Span().End)
}

// Expr is the interface implemented by all expression AST node types.
type Expr interface {
Node
Expand Down
110 changes: 110 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ func (p *parser) tabularExpr() (*TabularExpr, error) {
expr.Operators = append(expr.Operators, op)
}
returnedError = joinErrors(returnedError, err)
case "summarize":
op, err := p.summarizeOperator(pipeToken, operatorName)
if op != nil {
expr.Operators = append(expr.Operators, op)
}
returnedError = joinErrors(returnedError, err)
default:
returnedError = joinErrors(returnedError, &parseError{
source: p.source,
Expand Down Expand Up @@ -343,6 +349,110 @@ func (p *parser) projectOperator(pipe, keyword Token) (*ProjectOperator, error)
}
}

func (p *parser) summarizeOperator(pipe, keyword Token) (*SummarizeOperator, error) {
op := &SummarizeOperator{
Pipe: pipe.Span,
Keyword: keyword.Span,
By: nullSpan(),
}

for {
col, err := p.summarizeColumn()
if isNotFound(err) {
break
}
if col != nil {
op.Cols = append(op.Cols, col)
}
if err != nil {
return op, makeErrorOpaque(err)
}

sep, ok := p.next()
if !ok {
return op, nil
}
if sep.Kind != TokenComma {
p.prev()
break
}
}

sep, ok := p.next()
if !ok {
if len(op.Cols) == 0 {
return op, &parseError{
source: p.source,
span: sep.Span,
err: fmt.Errorf("expected expression or 'by', got EOF"),
}
}
return op, nil
}
if sep.Kind != TokenBy {
p.prev()
if len(op.Cols) == 0 {
return op, &parseError{
source: p.source,
span: sep.Span,
err: fmt.Errorf("expected expression or 'by', got %s", formatToken(p.source, sep)),
}
}
return op, nil
}
op.By = sep.Span
for {
col, err := p.summarizeColumn()
if isNotFound(err) {
return op, makeErrorOpaque(err)
}
if col != nil {
op.GroupBy = append(op.GroupBy, col)
}
if err != nil {
return op, makeErrorOpaque(err)
}

sep, ok := p.next()
if !ok {
return op, nil
}
if sep.Kind != TokenComma {
p.prev()
return op, nil
}
}
}

func (p *parser) summarizeColumn() (*SummarizeColumn, error) {
restorePos := p.pos

col := &SummarizeColumn{
Assign: nullSpan(),
}

var err error
col.Name, err = p.ident()
if err == nil {
if assign, _ := p.next(); assign.Kind == TokenAssign {
col.Assign = assign.Span
} else {
col.Name = nil
p.pos = restorePos
}
} else if !isNotFound(err) {
col.X = col.Name
col.Name = nil
return col, makeErrorOpaque(err)
}

col.X, err = p.expr()
if col.Name != nil {
err = makeErrorOpaque(err)
}
return col, err
}

// exprList parses one or more comma-separated expressions.
func (p *parser) exprList() ([]Expr, error) {
first, err := p.expr()
Expand Down
203 changes: 203 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,209 @@ func TestParse(t *testing.T) {
},
},
},
{
name: "UniqueCombination",
query: "StormEvents | summarize by State, EventType",
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
By: newSpan(24, 26),
GroupBy: []*SummarizeColumn{
{
Assign: nullSpan(),
X: &Ident{
Name: "State",
NameSpan: newSpan(27, 32),
},
},
{
Assign: nullSpan(),
X: &Ident{
Name: "EventType",
NameSpan: newSpan(34, 43),
},
},
},
},
},
},
},
{
name: "MinAndMax",
query: "StormEvents | summarize Min = min(Duration), Max = max(Duration)",
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
Cols: []*SummarizeColumn{
{
Name: &Ident{
Name: "Min",
NameSpan: newSpan(24, 27),
},
Assign: newSpan(28, 29),
X: &CallExpr{
Func: &Ident{
Name: "min",
NameSpan: newSpan(30, 33),
},
Lparen: newSpan(33, 34),
Args: []Expr{&Ident{
Name: "Duration",
NameSpan: newSpan(34, 42),
}},
Rparen: newSpan(42, 43),
},
},
{
Name: &Ident{
Name: "Max",
NameSpan: newSpan(45, 48),
},
Assign: newSpan(49, 50),
X: &CallExpr{
Func: &Ident{
Name: "max",
NameSpan: newSpan(51, 54),
},
Lparen: newSpan(54, 55),
Args: []Expr{&Ident{
Name: "Duration",
NameSpan: newSpan(55, 63),
}},
Rparen: newSpan(63, 64),
},
},
},
By: nullSpan(),
},
},
},
},
{
name: "DistinctCount",
query: "StormEvents | summarize TypesOfStorms=dcount(EventType) by State",
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
Cols: []*SummarizeColumn{
{
Name: &Ident{
Name: "TypesOfStorms",
NameSpan: newSpan(24, 37),
},
Assign: newSpan(37, 38),
X: &CallExpr{
Func: &Ident{
Name: "dcount",
NameSpan: newSpan(38, 44),
},
Lparen: newSpan(44, 45),
Args: []Expr{&Ident{
Name: "EventType",
NameSpan: newSpan(45, 54),
}},
Rparen: newSpan(54, 55),
},
},
},
By: newSpan(56, 58),
GroupBy: []*SummarizeColumn{
{
Assign: nullSpan(),
X: &Ident{
Name: "State",
NameSpan: newSpan(59, 64),
},
},
},
},
},
},
},
{
name: "ShortSummarize",
query: "StormEvents | summarize",
err: true,
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
By: nullSpan(),
},
},
},
},
{
name: "SummarizeByTerminated",
query: "StormEvents | summarize by",
err: true,
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
By: newSpan(24, 26),
},
},
},
},
{
name: "SummarizeRandomToken",
query: "StormEvents | summarize and",
err: true,
want: &TabularExpr{
Source: &TableRef{
Table: &Ident{
Name: "StormEvents",
NameSpan: newSpan(0, 11),
},
},
Operators: []TabularOperator{
&SummarizeOperator{
Pipe: newSpan(12, 13),
Keyword: newSpan(14, 23),
By: nullSpan(),
},
},
},
},
}

equateInvalidSpans := cmp.FilterValues(func(span1, span2 Span) bool {
Expand Down

0 comments on commit 0156667

Please sign in to comment.