diff --git a/packages/kbn-esql-ast/src/ast/helpers.ts b/packages/kbn-esql-ast/src/ast/helpers.ts index 74a7b5c0991e8..2520b6ffabdb2 100644 --- a/packages/kbn-esql-ast/src/ast/helpers.ts +++ b/packages/kbn-esql-ast/src/ast/helpers.ts @@ -14,6 +14,7 @@ import type { ESQLFunction, ESQLIntegerLiteral, ESQLLiteral, + ESQLParamLiteral, ESQLProperNode, } from '../types'; import { BinaryExpressionGroup } from './constants'; @@ -48,6 +49,9 @@ export const isIntegerLiteral = (node: unknown): node is ESQLIntegerLiteral => export const isDoubleLiteral = (node: unknown): node is ESQLIntegerLiteral => isLiteral(node) && node.literalType === 'double'; +export const isParamLiteral = (node: unknown): node is ESQLParamLiteral => + isLiteral(node) && node.literalType === 'param'; + export const isColumn = (node: unknown): node is ESQLColumn => isProperNode(node) && node.type === 'column'; diff --git a/packages/kbn-esql-ast/src/mutate/README.md b/packages/kbn-esql-ast/src/mutate/README.md index 7dfd3d77a1395..546032d248cca 100644 --- a/packages/kbn-esql-ast/src/mutate/README.md +++ b/packages/kbn-esql-ast/src/mutate/README.md @@ -60,3 +60,21 @@ console.log(src); // FROM index METADATA _lang, _id - `.remove()` — Remove a `LIMIT` command by index. - `.set()` — Set the limit value of a specific `LIMIT` command. - `.upsert()` — Insert a `LIMIT` command, or update the limit value if it already exists. + - `.stats` + - `.list()` — List all `STATS` commands. + - `.byIndex()` — Find a `STATS` command by index. + - `.summarize()` — Summarize all `STATS` commands. + - `.summarizeCommand()` — Summarize a specific `STATS` command. + + +## Examples + +Extract all "new" and "used" fields from all `STATS` commands: + +```ts +const query = EsqlQuery.fromSrc('FROM index | STATS a = max(b), agg(c) BY d'); +const summary = mutate.commands.stats.summarize(query); + +console.log(summary.newFields); // [ 'a', '`agg(c)`' ] +console.log(summary.usedFields); // [ 'b', 'c', 'd' ] +``` diff --git a/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts b/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts index 6fdafa45e3831..950633c3a4448 100644 --- a/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts +++ b/packages/kbn-esql-ast/src/mutate/commands/stats/index.test.ts @@ -111,7 +111,7 @@ describe('commands.stats', () => { name: 'bar', }, ], - fields: ['bar'], + usedFields: new Set(['bar']), }, }, }); @@ -155,7 +155,7 @@ describe('commands.stats', () => { name: 'baz', }, ], - fields: ['foo', 'bar', 'baz'], + usedFields: new Set(['foo', 'bar', 'baz']), }, }, }); @@ -172,15 +172,15 @@ describe('commands.stats', () => { aggregates: { foo: { field: 'foo', - fields: ['f1', 'f2'], + usedFields: new Set(['f1', 'f2']), }, 'a.b': { field: 'a.b', - fields: ['f3'], + usedFields: new Set(['f3']), }, }, }); - expect(summary.fields).toEqual(new Set(['f1', 'f2', 'f3'])); + expect(summary.usedFields).toEqual(new Set(['f1', 'f2', 'f3'])); }); it('can get de-duplicated list of used fields', () => { @@ -190,7 +190,7 @@ describe('commands.stats', () => { const command = commands.stats.byIndex(query.ast, 0)!; const summary = commands.stats.summarizeCommand(query, command); - expect(summary.fields).toEqual(new Set(['f1', 'f2'])); + expect(summary.usedFields).toEqual(new Set(['f1', 'f2'])); }); describe('params', () => { @@ -204,14 +204,14 @@ describe('commands.stats', () => { expect(summary).toMatchObject({ aggregates: { foo: { - fields: ['f1.?aha', '?nested.?param'], + usedFields: new Set(['f1.?aha', '?nested.?param']), }, 'a.b': { - fields: ['f1'], + usedFields: new Set(['f1']), }, }, }); - expect(summary.fields).toEqual(new Set(['f1.?aha', '?nested.?param', 'f1'])); + expect(summary.usedFields).toEqual(new Set(['f1.?aha', '?nested.?param', 'f1'])); }); it('can use params as destination field names', () => { @@ -224,11 +224,11 @@ describe('commands.stats', () => { expect(summary).toMatchObject({ aggregates: { '?dest': { - fields: ['asdf'], + usedFields: new Set(['asdf']), }, }, }); - expect(summary.fields).toEqual(new Set(['asdf'])); + expect(summary.usedFields).toEqual(new Set(['asdf'])); }); }); @@ -243,7 +243,7 @@ describe('commands.stats', () => { expect(summary.aggregates).toEqual({ '`max(1)`': expect.any(Object), }); - expect(summary.fields).toEqual(new Set(['abc'])); + expect(summary.usedFields).toEqual(new Set(['abc'])); }); it('returns all "grouping" fields', () => { @@ -257,12 +257,45 @@ describe('commands.stats', () => { '`max(1)`': expect.any(Object), }); expect(summary.grouping).toMatchObject({ - a: { type: 'column' }, - b: { type: 'column' }, - c: { type: 'column' }, + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), }); }); + it('returns grouping destination fields', () => { + const src = 'FROM index | STATS max(1) BY a, b, c'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.aggregates).toEqual({ + '`max(1)`': expect.any(Object), + }); + expect(summary.grouping).toMatchObject({ + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), + }); + expect(summary.usedFields).toEqual(new Set(['a', 'b', 'c'])); + }); + + it('returns grouping "used" fields', () => { + const src = 'FROM index | STATS max(1) BY a, b, c'; + const query = EsqlQuery.fromSrc(src); + + const command = commands.stats.byIndex(query.ast, 0)!; + const summary = commands.stats.summarizeCommand(query, command); + + expect(summary.grouping).toMatchObject({ + a: expect.any(Object), + b: expect.any(Object), + c: expect.any(Object), + }); + expect(summary.usedFields).toEqual(new Set(['a', 'b', 'c'])); + }); + it('can have params and quoted fields in grouping', () => { const src = 'FROM index | STATS max(1) BY `a😎`, ?123, a.?b.?0.`😎`'; const query = EsqlQuery.fromSrc(src); @@ -274,9 +307,9 @@ describe('commands.stats', () => { '`max(1)`': expect.any(Object), }); expect(summary.grouping).toMatchObject({ - '`a😎`': { type: 'column' }, - // '?123': { type: 'column' }, - 'a.?b.?0.`😎`': { type: 'column' }, + '`a😎`': expect.any(Object), + // '?123': expect.any(Object), + 'a.?b.?0.`😎`': expect.any(Object), }); }); }); @@ -293,23 +326,67 @@ describe('commands.stats', () => { aggregates: { '`agg()`': { field: '`agg()`', - fields: [], + usedFields: new Set(), }, }, - fields: new Set([]), + usedFields: new Set([]), }, { aggregates: { '`max(a, b, c)`': { field: '`max(a, b, c)`', - fields: ['a', 'b', 'c'], + usedFields: new Set(['a', 'b', 'c']), }, '`max2(d.e)`': { field: '`max2(d.e)`', - fields: ['d.e'], + usedFields: new Set(['d.e']), }, }, - fields: new Set(['a', 'b', 'c', 'd.e']), + usedFields: new Set(['a', 'b', 'c', 'd.e']), + }, + ]); + }); + + it('return used fields from BY clause', () => { + const src = 'FROM index | STATS agg(1) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + usedFields: new Set(['x', 'z', 'ttt']), + }, + ]); + }); + + it('correctly returns used fields', () => { + const src = + 'FROM index | LIMIT 1 | STATS agg(a, b), agg(c, a), d = agg(e) | LIMIT 2 | STATS max(a, b, c), max2(d.e) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + usedFields: new Set(['a', 'b', 'c', 'e']), + }, + { + usedFields: new Set(['a', 'b', 'c', 'd.e', 'x', 'z', 'ttt']), + }, + ]); + }); + + it('correctly returns new fields', () => { + const src = + 'FROM index | LIMIT 1 | STATS agg(a, b), agg(c, a), d = agg(e) | LIMIT 2 | STATS max(a, b, c), max2(d.e) BY x, y = z, i = max(agg(1, 2, 3, ttt))'; + const query = EsqlQuery.fromSrc(src); + const summary = commands.stats.summarize(query); + + expect(summary).toMatchObject([ + { + newFields: new Set(['`agg(a, b)`', '`agg(c, a)`', 'd']), + }, + { + newFields: new Set(['`max(a, b, c)`', '`max2(d.e)`', 'x', 'y', 'i']), }, ]); }); diff --git a/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts b/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts index 9a07549c0e0ef..ae766bb2369d0 100644 --- a/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts +++ b/packages/kbn-esql-ast/src/mutate/commands/stats/index.ts @@ -18,11 +18,12 @@ import type { ESQLCommand, ESQLList, ESQLLiteral, + ESQLParamLiteral, ESQLProperNode, ESQLTimeInterval, } from '../../../types'; import * as generic from '../../generic'; -import { isColumn, isFunctionExpression } from '../../../ast/helpers'; +import { isColumn, isFunctionExpression, isParamLiteral } from '../../../ast/helpers'; import type { EsqlQuery } from '../../../query'; /** @@ -58,18 +59,25 @@ export interface StatsCommandSummary { /** * Summary of the main arguments of the "STATS" command. */ - aggregates: Record; + aggregates: Record; /** * Summary of the "BY" arguments of the "STATS" command. */ - grouping: Record; + grouping: Record; /** - * De-duplicated list all of ES|QL-syntax formatted field names from the - * {@link aggregates} and {@link grouping} fields. + * A formatted list of field names which were newly created by the + * STATS command. */ - fields: Set; + newFields: Set; + + /** + * De-duplicated list all of field names, which were used to as-is or to + * construct new fields. The fields are correctly formatted according to + * ES|QL column formatting rules. + */ + usedFields: Set; } /** @@ -77,7 +85,7 @@ export interface StatsCommandSummary { * * STATS [ BY ] */ -export interface StatsAggregatesSummary { +export interface StatsFieldSummary { /** * STATS command argument AST node (as was parsed). */ @@ -89,10 +97,10 @@ export interface StatsAggregatesSummary { field: string; /** - * A `column` AST node, which represents the field name. If no column AST node - * was found, a new one "virtual" column node is created. + * A `column` or param AST node, which represents the field name. If no column + * AST node was found, a new one "virtual" column node is created. */ - column: ESQLColumn; + column: ESQLColumn | ESQLParamLiteral; /** * The definition of the field, which is the right-hand side of the `=` @@ -106,32 +114,50 @@ export interface StatsAggregatesSummary { terminals: Array; /** - * Correctly formatted list of field names that were found in the {@link terminals}. + * A formatted list of field names which were used for new field + * construction. For example, in the below example, `x` and `y` are the + * existing "used" fields: + * + * ``` + * STATS foo = agg(x) BY y, bar = x + * ``` */ - fields: string[]; + usedFields: Set; } const summarizeArgParts = ( query: EsqlQuery, arg: ESQLProperNode -): [column: ESQLColumn, definition: ESQLProperNode] => { +): [field: string, column: ESQLColumn | ESQLParamLiteral, definition: ESQLProperNode] => { + if (isParamLiteral(arg)) { + return [LeafPrinter.param(arg), arg, arg]; + } + + if (isColumn(arg)) { + return [LeafPrinter.column(arg), arg, arg]; + } + if (isFunctionExpression(arg) && arg.name === '=' && isColumn(arg.args[0])) { const [column, definition] = singleItems(arg.args); - return [column as ESQLColumn, definition as ESQLProperNode]; + return [ + LeafPrinter.column(column as ESQLColumn), + column as ESQLColumn, + definition as ESQLProperNode, + ]; } const name = [...query.src].slice(arg.location.min, arg.location.max + 1).join(''); const args = [Builder.identifier({ name })]; const column = Builder.expression.column({ args }); - return [column, arg]; + return [LeafPrinter.column(column), column, arg]; }; -const summarizeArg = (query: EsqlQuery, arg: ESQLProperNode): StatsAggregatesSummary => { - const [column, definition] = summarizeArgParts(query, arg); - const terminals: StatsAggregatesSummary['terminals'] = []; - const fields: StatsAggregatesSummary['fields'] = []; +const summarizeField = (query: EsqlQuery, arg: ESQLProperNode): StatsFieldSummary => { + const [field, column, definition] = summarizeArgParts(query, arg); + const terminals: StatsFieldSummary['terminals'] = []; + const usedFields: StatsFieldSummary['usedFields'] = new Set(); Walker.walk(definition, { visitLiteral(node) { @@ -139,7 +165,7 @@ const summarizeArg = (query: EsqlQuery, arg: ESQLProperNode): StatsAggregatesSum }, visitColumn(node) { terminals.push(node); - fields.push(LeafPrinter.column(node)); + usedFields.add(LeafPrinter.column(node)); }, visitListLiteral(node) { terminals.push(node); @@ -149,13 +175,13 @@ const summarizeArg = (query: EsqlQuery, arg: ESQLProperNode): StatsAggregatesSum }, }); - const summary: StatsAggregatesSummary = { + const summary: StatsFieldSummary = { arg, - field: LeafPrinter.column(column), + field, column, definition, terminals, - fields, + usedFields, }; return summary; @@ -171,14 +197,16 @@ const summarizeArg = (query: EsqlQuery, arg: ESQLProperNode): StatsAggregatesSum export const summarizeCommand = (query: EsqlQuery, command: ESQLCommand): StatsCommandSummary => { const aggregates: StatsCommandSummary['aggregates'] = {}; const grouping: StatsCommandSummary['grouping'] = {}; - const fields: StatsCommandSummary['fields'] = new Set(); + const newFields: StatsCommandSummary['newFields'] = new Set(); + const usedFields: StatsCommandSummary['usedFields'] = new Set(); // Process main arguments, the "aggregates" part of the command. new Visitor() .on('visitExpression', (ctx) => { - const summary = summarizeArg(query, ctx.node); + const summary = summarizeField(query, ctx.node); aggregates[summary.field] = summary; - for (const field of summary.fields) fields.add(field); + newFields.add(summary.field); + for (const field of summary.usedFields) usedFields.add(field); }) .on('visitCommand', () => {}) .on('visitStatsCommand', (ctx) => { @@ -188,12 +216,12 @@ export const summarizeCommand = (query: EsqlQuery, command: ESQLCommand): StatsC // Process the "BY" arguments, the "grouping" part of the command. new Visitor() - .on('visitExpression', () => {}) - .on('visitColumnExpression', (ctx) => { - const column = ctx.node; - const formatted = LeafPrinter.column(column); - grouping[formatted] = column; - fields.add(formatted); + .on('visitExpression', (ctx) => { + const node = ctx.node; + const summary = summarizeField(query, node); + newFields.add(summary.field); + for (const field of summary.usedFields) usedFields.add(field); + grouping[summary.field] = summary; }) .on('visitCommandOption', (ctx) => { if (ctx.node.name !== 'by') return; @@ -209,7 +237,8 @@ export const summarizeCommand = (query: EsqlQuery, command: ESQLCommand): StatsC command, aggregates, grouping, - fields, + newFields, + usedFields, }; return summary;