Skip to content

Commit

Permalink
[8.x] [NL-to-ESQL] autocorrect bad LIKE wildcards (#202464) (#202631)
Browse files Browse the repository at this point in the history
# Backport

This will backport the following commits from `main` to `8.x`:
- [[NL-to-ESQL] autocorrect bad LIKE wildcards
(#202464)](#202464)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Pierre
Gayvallet","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-12-03T07:26:46Z","message":"[NL-to-ESQL]
autocorrect bad LIKE wildcards (#202464)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nAdd autocorrect
for wrong `LIKE` wildcard.\r\n\r\nThe LLM can make mistake and use SQL
wildcards for LIKE operators (`_`\r\ninstead of `?` and `%` instead of
`*`)\r\n\r\n\r\nExamples\r\n\r\n**generated**\r\n```\r\nFROM logs |
WHERE message LIKE \"a%\" AND TO_UPPER(level) LIKE \"err%\" | WHERE foo
LIKE \"ba_\"\r\n```\r\n**corrected**\r\n```\r\nFROM logs | WHERE message
LIKE \"a*\" AND TO_UPPER(level) LIKE \"err*\" | WHERE foo LIKE
\"ba?\"\r\n```\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"2ace6ffcedec826f7a6c3690fa4f99a5ea63c663","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:version","Team:AI
Infra","v8.18.0"],"title":"[NL-to-ESQL] autocorrect bad LIKE
wildcards","number":202464,"url":"https://github.com/elastic/kibana/pull/202464","mergeCommit":{"message":"[NL-to-ESQL]
autocorrect bad LIKE wildcards (#202464)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nAdd autocorrect
for wrong `LIKE` wildcard.\r\n\r\nThe LLM can make mistake and use SQL
wildcards for LIKE operators (`_`\r\ninstead of `?` and `%` instead of
`*`)\r\n\r\n\r\nExamples\r\n\r\n**generated**\r\n```\r\nFROM logs |
WHERE message LIKE \"a%\" AND TO_UPPER(level) LIKE \"err%\" | WHERE foo
LIKE \"ba_\"\r\n```\r\n**corrected**\r\n```\r\nFROM logs | WHERE message
LIKE \"a*\" AND TO_UPPER(level) LIKE \"err*\" | WHERE foo LIKE
\"ba?\"\r\n```\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"2ace6ffcedec826f7a6c3690fa4f99a5ea63c663"}},"sourceBranch":"main","suggestedTargetBranches":["8.x"],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/202464","number":202464,"mergeCommit":{"message":"[NL-to-ESQL]
autocorrect bad LIKE wildcards (#202464)\n\n## Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/198942\r\n\r\nAdd autocorrect
for wrong `LIKE` wildcard.\r\n\r\nThe LLM can make mistake and use SQL
wildcards for LIKE operators (`_`\r\ninstead of `?` and `%` instead of
`*`)\r\n\r\n\r\nExamples\r\n\r\n**generated**\r\n```\r\nFROM logs |
WHERE message LIKE \"a%\" AND TO_UPPER(level) LIKE \"err%\" | WHERE foo
LIKE \"ba_\"\r\n```\r\n**corrected**\r\n```\r\nFROM logs | WHERE message
LIKE \"a*\" AND TO_UPPER(level) LIKE \"err*\" | WHERE foo LIKE
\"ba?\"\r\n```\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"2ace6ffcedec826f7a6c3690fa4f99a5ea63c663"}},{"branch":"8.x","label":"v8.18.0","branchLabelMappingKey":"^v8.18.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Pierre Gayvallet <[email protected]>
  • Loading branch information
kibanamachine and pgayvallet authored Dec 3, 2024
1 parent 8ce1638 commit 06256a3
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@

import type { ESQLAstQueryExpression } from '@kbn/esql-ast';
import type { QueryCorrection } from './types';
import { applyTimespanLiteralsCorrections } from './timespan_literals';
import { correctTimespanLiterals } from './timespan_literals';
import { correctLikeWildcards } from './like';

export type { QueryCorrection } from './types';

export const correctAll = (query: ESQLAstQueryExpression): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];
corrections.push(...applyTimespanLiteralsCorrections(query));
corrections.push(...correctTimespanLiterals(query));
corrections.push(...correctLikeWildcards(query));
return corrections;
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { parse, BasicPrettyPrinter } from '@kbn/esql-ast';
import { correctLikeWildcards } from './like';

describe('correctLikeWildcards', () => {
it('replaces badly used "_" wildcard', () => {
const query = 'FROM logs | WHERE message LIKE "ba_"';
const { root } = parse(query);
correctLikeWildcards(root);

const output = BasicPrettyPrinter.print(root);
expect(output).toEqual('FROM logs | WHERE message LIKE "ba?"');
});

it('replaces badly used "%" wildcard', () => {
const query = 'FROM logs | WHERE message LIKE "b%"';
const { root } = parse(query);
correctLikeWildcards(root);

const output = BasicPrettyPrinter.print(root);
expect(output).toEqual('FROM logs | WHERE message LIKE "b*"');
});

it('replaces multiple bad wildcards', () => {
const query = 'FROM logs | WHERE message LIKE "a__t%"';
const { root } = parse(query);
correctLikeWildcards(root);

const output = BasicPrettyPrinter.print(root);
expect(output).toEqual('FROM logs | WHERE message LIKE "a??t*"');
});

it('replaces bad wildcards in multiple commands and functions', () => {
const query =
'FROM logs | WHERE message LIKE "a%" AND TO_UPPER(level) LIKE "err%" | WHERE foo LIKE "ba_"';
const { root } = parse(query);
correctLikeWildcards(root);

const output = BasicPrettyPrinter.print(root);
expect(output).toEqual(
'FROM logs | WHERE message LIKE "a*" AND TO_UPPER(level) LIKE "err*" | WHERE foo LIKE "ba?"'
);
});

it('does not replace escaped characters', () => {
const query = 'FROM logs | WHERE message LIKE "ba\\\\_"';
const { root } = parse(query);
correctLikeWildcards(root);

const output = BasicPrettyPrinter.print(root);
expect(output).toEqual('FROM logs | WHERE message LIKE "ba\\\\_"');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { Walker, type ESQLAstQueryExpression } from '@kbn/esql-ast';
import { isLikeOperatorNode, isStringLiteralNode } from '../typeguards';
import type { ESQLLikeOperator, ESQLStringLiteral } from '../types';
import type { QueryCorrection } from './types';

/**
* Correct wrong LIKE wildcard mistakes.
* The LLM can make mistake and use SQL wildcards for LIKE operators.
*
* E.g.
* `column LIKE "ba_"` => `column LIKE "ba?"`
* `column LIKE "ba%"` => `column LIKE "ba*"`
*/
export const correctLikeWildcards = (query: ESQLAstQueryExpression): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];

Walker.walk(query, {
visitFunction: (node) => {
if (isLikeOperatorNode(node)) {
corrections.push(...checkLikeNode(node));
}
},
});

return corrections;
};

function checkLikeNode(node: ESQLLikeOperator): QueryCorrection[] {
if (node.args.length !== 2 || !isStringLiteralNode(node.args[1])) {
return [];
}
const likeExpression = node.args[1] as ESQLStringLiteral;

const initialValue = likeExpression.value;

likeExpression.value = likeExpression.value
.replaceAll(/(?<!\\)%/g, '*')
.replaceAll(/(?<!\\)_/g, '?');

if (likeExpression.value !== initialValue) {
likeExpression.name = likeExpression.value;

const correction: QueryCorrection = {
type: 'wrong_like_wildcard',
node,
description: `Replaced wrong like wildcard in LIKE operator at position ${node.location.min}`,
};
return [correction];
}

return [];
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
*/

import { parse, BasicPrettyPrinter } from '@kbn/esql-ast';
import { applyTimespanLiteralsCorrections } from './timespan_literals';
import { correctTimespanLiterals } from './timespan_literals';

describe('getTimespanLiteralsCorrections', () => {
describe('correctTimespanLiterals', () => {
describe('with DATE_TRUNC', () => {
it('replaces a timespan with a proper timespan literal', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -27,7 +27,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("month", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -40,7 +40,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 YEAR", date)';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -53,7 +53,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | EVAL truncated = DATE_TRUNC("1 year", date)';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);
const corrections = correctTimespanLiterals(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
Expand All @@ -70,7 +70,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 week")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -83,7 +83,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -96,7 +96,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY week = BUCKET(hire_date, "1 WEEK")';
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root);

Expand All @@ -109,7 +109,7 @@ describe('getTimespanLiteralsCorrections', () => {
const query = 'FROM logs | STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "hour")';
const { root } = parse(query);

const corrections = applyTimespanLiteralsCorrections(root);
const corrections = correctTimespanLiterals(root);

expect(corrections).toHaveLength(1);
expect(corrections[0]).toEqual({
Expand All @@ -129,7 +129,7 @@ describe('getTimespanLiteralsCorrections', () => {
| STATS hires = COUNT(*) BY hour = BUCKET(hire_date, "3 hour")`;
const { root } = parse(query);

applyTimespanLiteralsCorrections(root);
correctTimespanLiterals(root);

const output = BasicPrettyPrinter.print(root, { multiline: true, pipeTab: '' });

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ import { QueryCorrection } from './types';
* `BUCKET(@timestamp, "1 week")` => `BUCKET(@timestamp, 1 week)`
*
*/
export const applyTimespanLiteralsCorrections = (
query: ESQLAstQueryExpression
): QueryCorrection[] => {
export const correctTimespanLiterals = (query: ESQLAstQueryExpression): QueryCorrection[] => {
const corrections: QueryCorrection[] = [];

Walker.walk(query, {
Expand Down
23 changes: 21 additions & 2 deletions x-pack/plugins/inference/common/tasks/nl_to_esql/ast/typeguards.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,19 @@
* 2.0.
*/

import type { ESQLSingleAstItem, ESQLAstItem, ESQLFunction, ESQLLiteral } from '@kbn/esql-ast';
import type { ESQLStringLiteral, ESQLDateTruncFunction, ESQLBucketFunction } from './types';
import type {
ESQLSingleAstItem,
ESQLAstItem,
ESQLFunction,
ESQLLiteral,
ESQLColumn,
} from '@kbn/esql-ast';
import type {
ESQLStringLiteral,
ESQLDateTruncFunction,
ESQLBucketFunction,
ESQLLikeOperator,
} from './types';

export function isSingleItem(item: ESQLAstItem): item is ESQLSingleAstItem {
return Object.hasOwn(item, 'type');
Expand All @@ -16,6 +27,10 @@ export function isFunctionNode(node: ESQLAstItem): node is ESQLFunction {
return isSingleItem(node) && node.type === 'function';
}

export function isColumnNode(node: ESQLAstItem): node is ESQLColumn {
return isSingleItem(node) && node.type === 'column';
}

export function isLiteralNode(node: ESQLAstItem): node is ESQLLiteral {
return isSingleItem(node) && node.type === 'literal';
}
Expand All @@ -31,3 +46,7 @@ export function isDateTruncFunctionNode(node: ESQLAstItem): node is ESQLDateTrun
export function isBucketFunctionNode(node: ESQLAstItem): node is ESQLBucketFunction {
return isFunctionNode(node) && node.subtype === 'variadic-call' && node.name === 'bucket';
}

export function isLikeOperatorNode(node: ESQLAstItem): node is ESQLLikeOperator {
return isFunctionNode(node) && node.subtype === 'binary-expression' && node.name === 'like';
}
5 changes: 5 additions & 0 deletions x-pack/plugins/inference/common/tasks/nl_to_esql/ast/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ import { ESQLFunction, ESQLLiteral } from '@kbn/esql-ast';
*/
export type ESQLDateTruncFunction = ESQLFunction<'variadic-call', 'date_trunc'>;

/**
* represents a LIKE function node.
*/
export type ESQLLikeOperator = ESQLFunction<'binary-expression', 'like'>;

/**
* represents a BUCKET function node.
*/
Expand Down

0 comments on commit 06256a3

Please sign in to comment.