diff --git a/packages/kbn-esql-ast/src/ast/helpers.ts b/packages/kbn-esql-ast/src/ast/helpers.ts index 74a7b5c0991e8..2520b6ffabdb2 100644 --- a/packages/kbn-esql-ast/src/ast/helpers.ts +++ b/packages/kbn-esql-ast/src/ast/helpers.ts @@ -14,6 +14,7 @@ import type { ESQLFunction, ESQLIntegerLiteral, ESQLLiteral, + ESQLParamLiteral, ESQLProperNode, } from '../types'; import { BinaryExpressionGroup } from './constants'; @@ -48,6 +49,9 @@ export const isIntegerLiteral = (node: unknown): node is ESQLIntegerLiteral => export const isDoubleLiteral = (node: unknown): node is ESQLIntegerLiteral => isLiteral(node) && node.literalType === 'double'; +export const isParamLiteral = (node: unknown): node is ESQLParamLiteral => + isLiteral(node) && node.literalType === 'param'; + export const isColumn = (node: unknown): node is ESQLColumn => isProperNode(node) && node.type === 'column'; diff --git a/packages/kbn-esql-ast/src/mutate/README.md b/packages/kbn-esql-ast/src/mutate/README.md index 7dfd3d77a1395..546032d248cca 100644 --- a/packages/kbn-esql-ast/src/mutate/README.md +++ b/packages/kbn-esql-ast/src/mutate/README.md @@ -60,3 +60,21 @@ console.log(src); // FROM index METADATA _lang, _id - `.remove()` — Remove a `LIMIT` command by index. - `.set()` — Set the limit value of a specific `LIMIT` command. - `.upsert()` — Insert a `LIMIT` command, or update the limit value if it already exists. + - `.stats` + - `.list()` — List all `STATS` commands. + - `.byIndex()` — Find a `STATS` command by index. + - `.summarize()` — Summarize all `STATS` commands. + - `.summarizeCommand()` — Summarize a specific `STATS` command. + + +## Examples + +Extract all "new" and "used" fields from all `STATS` commands: + +```ts +const query = EsqlQuery.fromSrc('FROM index | STATS a = max(b), agg(c) BY d'); +const summary = mutate.commands.stats.summarize(query); + +console.log(summary.newFields); // [ 'a', '`agg(c)`' ] +console.log(summary.usedFields); // [ 'b', 'c', 'd' ] +``` diff --git a/packages/kbn-esql-ast/src/mutate/commands/index.ts b/packages/kbn-esql-ast/src/mutate/commands/index.ts index 9e2599c493459..8068aa5d3ef94 100644 --- a/packages/kbn-esql-ast/src/mutate/commands/index.ts +++ b/packages/kbn-esql-ast/src/mutate/commands/index.ts @@ -10,5 +10,6 @@ import * as from from './from'; import * as limit from './limit'; import * as sort from './sort'; +import * as stats from './stats'; -export { from, limit, sort }; +export { from, limit, sort, stats }; diff --git a/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts b/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts new file mode 100644 index 0000000000000..950633c3a4448 --- /dev/null +++ b/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts @@ -0,0 +1,394 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import * as commands from '..'; +import { EsqlQuery } from '../../../query'; + +describe('commands.stats', () => { + describe('.list()', () => { + it('lists all "STATS" commands', () => { + const src = 'FROM index | LIMIT 1 | STATS agg() | LIMIT 2 | STATS max()'; + const query = EsqlQuery.fromSrc(src); + + const nodes = [...commands.stats.list(query.ast)]; + + expect(nodes).toMatchObject([ + { + type: 'command', + name: 'stats', + args: [ + { + type: 'function', + name: 'agg', + }, + ], + }, + { + type: 'command', + name: 'stats', + args: [ + { + type: 'function', + name: 'max', + }, + ], + }, + ]); + }); + }); + + describe('.byIndex()', () => { + it('retrieves the specific "STATS" command by index', () => { + const src = 'FROM index | LIMIT 1 | STATS agg() | LIMIT 2 | STATS max()'; + const query = EsqlQuery.fromSrc(src); + + const node1 = commands.stats.byIndex(query.ast, 1); + const node2 = commands.stats.byIndex(query.ast, 0); + + expect(node1).toMatchObject({ + type: 'command', + name: 'stats', + args: [ + { + type: 'function', + name: 'max', + }, + ], + }); + expect(node2).toMatchObject({ + type: 'command', + name: 'stats', + args: [ + { + type: 'function', + name: 'agg', + }, + ], + }); + }); + }); + + describe('.summarizeCommand()', () => { + it('returns summary of a simple field, defined through assignment', () => { + const src = 'FROM index | STATS foo = agg(bar)'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary).toMatchObject({ + command, + aggregates: { + foo: { + arg: { + type: 'function', + name: '=', + }, + field: 'foo', + column: { + type: 'column', + name: 'foo', + }, + definition: { + type: 'function', + name: 'agg', + args: [ + { + type: 'column', + name: 'bar', + }, + ], + }, + terminals: [ + { + type: 'column', + name: 'bar', + }, + ], + usedFields: new Set(['bar']), + }, + }, + }); + }); + + it('can summarize field defined without assignment', () => { + const src = 'FROM index | STATS agg( /* haha 😅 */ max(foo), bar, baz)'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary).toMatchObject({ + command, + aggregates: { + '`agg( /* haha 😅 */ max(foo), bar, baz)`': { + arg: { + type: 'function', + name: 'agg', + }, + field: '`agg( /* haha 😅 */ max(foo), bar, baz)`', + column: { + type: 'column', + name: '`agg( /* haha 😅 */ max(foo), bar, baz)`', + }, + definition: { + type: 'function', + name: 'agg', + }, + terminals: [ + { + type: 'column', + name: 'foo', + }, + { + type: 'column', + name: 'bar', + }, + { + type: 'column', + name: 'baz', + }, + ], + usedFields: new Set(['foo', 'bar', 'baz']), + }, + }, + }); + }); + + it('returns a map of stats about two fields', () => { + const src = 'FROM index | STATS foo = agg(f1) + agg(f2), a.b = agg(f3)'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary).toMatchObject({ + aggregates: { + foo: { + field: 'foo', + usedFields: new Set(['f1', 'f2']), + }, + 'a.b': { + field: 'a.b', + usedFields: new Set(['f3']), + }, + }, + }); + expect(summary.usedFields).toEqual(new Set(['f1', 'f2', 'f3'])); + }); + + it('can get de-duplicated list of used fields', () => { + const src = 'FROM index | STATS foo = agg(f1) + agg(f2), a.b = agg(f1)'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.usedFields).toEqual(new Set(['f1', 'f2'])); + }); + + describe('params', () => { + it('can use params as source field names', () => { + const src = 'FROM index | STATS foo = agg(f1.?aha) + ?aha(?nested.?param), a.b = agg(f1)'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary).toMatchObject({ + aggregates: { + foo: { + usedFields: new Set(['f1.?aha', '?nested.?param']), + }, + 'a.b': { + usedFields: new Set(['f1']), + }, + }, + }); + expect(summary.usedFields).toEqual(new Set(['f1.?aha', '?nested.?param', 'f1'])); + }); + + it('can use params as destination field names', () => { + const src = 'FROM index | STATS ?dest = agg(asdf) BY asdf'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary).toMatchObject({ + aggregates: { + '?dest': { + usedFields: new Set(['asdf']), + }, + }, + }); + expect(summary.usedFields).toEqual(new Set(['asdf'])); + }); + }); + + describe('BY option', () => { + it('can collect fields from the BY option', () => { + const src = 'FROM index | STATS max(1) BY abc'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.aggregates).toEqual({ + '`max(1)`': expect.any(Object), + }); + expect(summary.usedFields).toEqual(new Set(['abc'])); + }); + + it('returns all "grouping" fields', () => { + const src = 'FROM index | STATS max(1) BY a, b, c'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.aggregates).toEqual({ + '`max(1)`': expect.any(Object), + }); + expect(summary.grouping).toMatchObject({ + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), + }); + }); + + it('returns grouping destination fields', () => { + const src = 'FROM index | STATS max(1) BY a, b, c'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.aggregates).toEqual({ + '`max(1)`': expect.any(Object), + }); + expect(summary.grouping).toMatchObject({ + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), + }); + expect(summary.usedFields).toEqual(new Set(['a', 'b', 'c'])); + }); + + it('returns grouping "used" fields', () => { + const src = 'FROM index | STATS max(1) BY a, b, c'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.grouping).toMatchObject({ + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), + }); + expect(summary.usedFields).toEqual(new Set(['a', 'b', 'c'])); + }); + + it('can have params and quoted fields in grouping', () => { + const src = 'FROM index | STATS max(1) BY `a😎`, ?123, a.?b.?0.`😎`'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.aggregates).toEqual({ + '`max(1)`': expect.any(Object), + }); + expect(summary.grouping).toMatchObject({ + '`a😎`': expect.any(Object), + // '?123': expect.any(Object), + 'a.?b.?0.`😎`': expect.any(Object), + }); + }); + }); + }); + + describe('.summarize()', () => { + it('can summarize multiple stats commands', () => { + const src = 'FROM index | LIMIT 1 | STATS agg() | LIMIT 2 | STATS max(a, b, c), max2(d.e)'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + aggregates: { + '`agg()`': { + field: '`agg()`', + usedFields: new Set(), + }, + }, + usedFields: new Set([]), + }, + { + aggregates: { + '`max(a, b, c)`': { + field: '`max(a, b, c)`', + usedFields: new Set(['a', 'b', 'c']), + }, + '`max2(d.e)`': { + field: '`max2(d.e)`', + usedFields: new Set(['d.e']), + }, + }, + usedFields: new Set(['a', 'b', 'c', 'd.e']), + }, + ]); + }); + + it('return used fields from BY clause', () => { + const src = 'FROM index | STATS agg(1) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + usedFields: new Set(['x', 'z', 'ttt']), + }, + ]); + }); + + it('correctly returns used fields', () => { + const src = + 'FROM index | LIMIT 1 | STATS agg(a, b), agg(c, a), d = agg(e) | LIMIT 2 | STATS max(a, b, c), max2(d.e) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + usedFields: new Set(['a', 'b', 'c', 'e']), + }, + { + usedFields: new Set(['a', 'b', 'c', 'd.e', 'x', 'z', 'ttt']), + }, + ]); + }); + + it('correctly returns new fields', () => { + const src = + 'FROM index | LIMIT 1 | STATS agg(a, b), agg(c, a), d = agg(e) | LIMIT 2 | STATS max(a, b, c), max2(d.e) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + newFields: new Set(['`agg(a, b)`', '`agg(c, a)`', 'd']), + }, + { + newFields: new Set(['`max(a, b, c)`', '`max2(d.e)`', 'x', 'y', 'i']), + }, + ]); + }); + }); +}); diff --git a/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts b/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts new file mode 100644 index 0000000000000..ae766bb2369d0 --- /dev/null +++ b/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts @@ -0,0 +1,263 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import { Walker } from '../../../walker'; +import { Visitor } from '../../../visitor'; +import { LeafPrinter } from '../../../pretty_print'; +import { Builder } from '../../../builder'; +import { singleItems } from '../../../visitor/utils'; +import type { + ESQLAstQueryExpression, + ESQLColumn, + ESQLCommand, + ESQLList, + ESQLLiteral, + ESQLParamLiteral, + ESQLProperNode, + ESQLTimeInterval, +} from '../../../types'; +import * as generic from '../../generic'; +import { isColumn, isFunctionExpression, isParamLiteral } from '../../../ast/helpers'; +import type { EsqlQuery } from '../../../query'; + +/** + * Lists all "LIMIT" commands in the query AST. + * + * @param ast The root AST node to search for "LIMIT" commands. + * @returns A collection of "LIMIT" commands. + */ +export const list = (ast: ESQLAstQueryExpression): IterableIterator => { + return generic.commands.list(ast, (cmd) => cmd.name === 'stats'); +}; + +/** + * Retrieves the "LIMIT" command at the specified index in order of appearance. + * + * @param ast The root AST node to search for "LIMIT" commands. + * @param index The index of the "LIMIT" command to retrieve. + * @returns The "LIMIT" command at the specified index, if any. + */ +export const byIndex = (ast: ESQLAstQueryExpression, index: number): ESQLCommand | undefined => { + return [...list(ast)][index]; +}; + +/** + * Summary of a STATS command. + */ +export interface StatsCommandSummary { + /** + * The "STATS" command AST node from which this summary was produced. + */ + command: ESQLCommand; + + /** + * Summary of the main arguments of the "STATS" command. + */ + aggregates: Record; + + /** + * Summary of the "BY" arguments of the "STATS" command. + */ + grouping: Record; + + /** + * A formatted list of field names which were newly created by the + * STATS command. + */ + newFields: Set; + + /** + * De-duplicated list all of field names, which were used to as-is or to + * construct new fields. The fields are correctly formatted according to + * ES|QL column formatting rules. + */ + usedFields: Set; +} + +/** + * Summary of STATS command "aggregates" section (main arguments). + * + * STATS [ BY ] + */ +export interface StatsFieldSummary { + /** + * STATS command argument AST node (as was parsed). + */ + arg: ESQLProperNode; + + /** + * The field name, correctly formatted, extracted from the AST. + */ + field: string; + + /** + * A `column` or param AST node, which represents the field name. If no column + * AST node was found, a new one "virtual" column node is created. + */ + column: ESQLColumn | ESQLParamLiteral; + + /** + * The definition of the field, which is the right-hand side of the `=` + * operator, or the argument itself if no `=` operator is present. + */ + definition: ESQLProperNode; + + /** + * A list of terminal nodes that were found in the definition. + */ + terminals: Array; + + /** + * A formatted list of field names which were used for new field + * construction. For example, in the below example, `x` and `y` are the + * existing "used" fields: + * + * ``` + * STATS foo = agg(x) BY y, bar = x + * ``` + */ + usedFields: Set; +} + +const summarizeArgParts = ( + query: EsqlQuery, + arg: ESQLProperNode +): [field: string, column: ESQLColumn | ESQLParamLiteral, definition: ESQLProperNode] => { + if (isParamLiteral(arg)) { + return [LeafPrinter.param(arg), arg, arg]; + } + + if (isColumn(arg)) { + return [LeafPrinter.column(arg), arg, arg]; + } + + if (isFunctionExpression(arg) && arg.name === '=' && isColumn(arg.args[0])) { + const [column, definition] = singleItems(arg.args); + + return [ + LeafPrinter.column(column as ESQLColumn), + column as ESQLColumn, + definition as ESQLProperNode, + ]; + } + + const name = [...query.src].slice(arg.location.min, arg.location.max + 1).join(''); + const args = [Builder.identifier({ name })]; + const column = Builder.expression.column({ args }); + + return [LeafPrinter.column(column), column, arg]; +}; + +const summarizeField = (query: EsqlQuery, arg: ESQLProperNode): StatsFieldSummary => { + const [field, column, definition] = summarizeArgParts(query, arg); + const terminals: StatsFieldSummary['terminals'] = []; + const usedFields: StatsFieldSummary['usedFields'] = new Set(); + + Walker.walk(definition, { + visitLiteral(node) { + terminals.push(node); + }, + visitColumn(node) { + terminals.push(node); + usedFields.add(LeafPrinter.column(node)); + }, + visitListLiteral(node) { + terminals.push(node); + }, + visitTimeIntervalLiteral(node) { + terminals.push(node); + }, + }); + + const summary: StatsFieldSummary = { + arg, + field, + column, + definition, + terminals, + usedFields, + }; + + return summary; +}; + +/** + * Returns a summary of the STATS command. + * + * @param query Query which contains the AST and source code. + * @param command The STATS command AST node to summarize. + * @returns Summary of the STATS command. + */ +export const summarizeCommand = (query: EsqlQuery, command: ESQLCommand): StatsCommandSummary => { + const aggregates: StatsCommandSummary['aggregates'] = {}; + const grouping: StatsCommandSummary['grouping'] = {}; + const newFields: StatsCommandSummary['newFields'] = new Set(); + const usedFields: StatsCommandSummary['usedFields'] = new Set(); + + // Process main arguments, the "aggregates" part of the command. + new Visitor() + .on('visitExpression', (ctx) => { + const summary = summarizeField(query, ctx.node); + aggregates[summary.field] = summary; + newFields.add(summary.field); + for (const field of summary.usedFields) usedFields.add(field); + }) + .on('visitCommand', () => {}) + .on('visitStatsCommand', (ctx) => { + for (const _ of ctx.visitArguments()); + }) + .visitCommand(command); + + // Process the "BY" arguments, the "grouping" part of the command. + new Visitor() + .on('visitExpression', (ctx) => { + const node = ctx.node; + const summary = summarizeField(query, node); + newFields.add(summary.field); + for (const field of summary.usedFields) usedFields.add(field); + grouping[summary.field] = summary; + }) + .on('visitCommandOption', (ctx) => { + if (ctx.node.name !== 'by') return; + for (const _ of ctx.visitArguments()); + }) + .on('visitCommand', () => {}) + .on('visitStatsCommand', (ctx) => { + for (const _ of ctx.visitOptions()); + }) + .visitCommand(command); + + const summary: StatsCommandSummary = { + command, + aggregates, + grouping, + newFields, + usedFields, + }; + + return summary; +}; + +/** + * Summarizes all STATS commands in the query. + * + * @param query Query to summarize. + * @returns Returns a list of summaries for all STATS commands in the query in + * order of appearance. + */ +export const summarize = (query: EsqlQuery): StatsCommandSummary[] => { + const summaries: StatsCommandSummary[] = []; + + for (const command of list(query.ast)) { + const summary = summarizeCommand(query, command); + summaries.push(summary); + } + + return summaries; +};