Skip to content

Commit

Permalink
[Obs AI assistant] Replace with double quotes (elastic#187327)
Browse files Browse the repository at this point in the history
## Summary

Follow up of elastic/elasticsearch#108395

### Checklist

- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

---------

Co-authored-by: Søren Louv-Jansen <[email protected]>
(cherry picked from commit 7b635f3)
  • Loading branch information
stratoula committed Jul 5, 2024
1 parent fbe21c3 commit 9a591f3
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,147 +11,165 @@ describe('correctCommonEsqlMistakes', () => {
return input.replaceAll(/[\t|\s]*\n[\t|\s]*/gms, '\n');
}

function expectQuery(input: string, expectedOutput: string) {
function expectQuery({ input, expectedOutput }: { input: string; expectedOutput: string }) {
expect(normalize(correctCommonEsqlMistakes(input).output)).toEqual(normalize(expectedOutput));
}

it('replaces aliasing via the AS keyword with the = operator', () => {
expectQuery(`FROM logs-* | STATS COUNT() AS count`, 'FROM logs-*\n| STATS count = COUNT()');

expectQuery(`FROM logs-* | STATS COUNT() as count`, 'FROM logs-*\n| STATS count = COUNT()');

expectQuery(
`FROM logs-* | STATS AVG(transaction.duration.histogram) AS avg_request_latency, PERCENTILE(transaction.duration.histogram, 95) AS p95`,
`FROM logs-*
| STATS avg_request_latency = AVG(transaction.duration.histogram), p95 = PERCENTILE(transaction.duration.histogram, 95)`
);

expectQuery(
`FROM traces-apm*
expectQuery({
input: `FROM logs-* | STATS COUNT() AS count`,
expectedOutput: 'FROM logs-*\n| STATS count = COUNT()',
});

expectQuery({
input: `FROM logs-* | STATS COUNT() as count`,
expectedOutput: 'FROM logs-*\n| STATS count = COUNT()',
});

expectQuery({
input: `FROM logs-* | STATS AVG(transaction.duration.histogram) AS avg_request_latency, PERCENTILE(transaction.duration.histogram, 95) AS p95`,
expectedOutput: `FROM logs-*
| STATS avg_request_latency = AVG(transaction.duration.histogram), p95 = PERCENTILE(transaction.duration.histogram, 95)`,
});

expectQuery({
input: `FROM traces-apm*
| WHERE @timestamp >= NOW() - 24 hours
| STATS AVG(transaction.duration.us) AS avg_duration, SUM(success) AS total_successes, COUNT(*) AS total_requests BY service.name`,
`FROM traces-apm*
expectedOutput: `FROM traces-apm*
| WHERE @timestamp >= NOW() - 24 hours
| STATS avg_duration = AVG(transaction.duration.us), total_successes = SUM(success), total_requests = COUNT(*) BY service.name`
);
| STATS avg_duration = AVG(transaction.duration.us), total_successes = SUM(success), total_requests = COUNT(*) BY service.name`,
});
});

it(`replaces " or ' escaping in FROM statements with backticks`, () => {
expectQuery(`FROM "logs-*" | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
expectQuery(`FROM 'logs-*' | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
expectQuery(`FROM logs-* | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
it("replaces ` or ' escaping in FROM statements with double quotes", () => {
expectQuery({ input: `FROM "logs-*" | LIMIT 10`, expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
expectQuery({ input: `FROM 'logs-*' | LIMIT 10`, expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
expectQuery({ input: 'FROM `logs-*` | LIMIT 10', expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
expectQuery({
input: `FROM 'logs-2024-07-01','logs-2024-07-02' | LIMIT 10`,
expectedOutput: 'FROM "logs-2024-07-01","logs-2024-07-02"\n| LIMIT 10',
});
expectQuery({
input: 'FROM `logs-2024-07-01`,`logs-2024-07-02` | LIMIT 10',
expectedOutput: 'FROM "logs-2024-07-01","logs-2024-07-02"\n| LIMIT 10',
});
expectQuery({ input: `FROM logs-* | LIMIT 10`, expectedOutput: 'FROM logs-*\n| LIMIT 10' });
});

it('replaces = as equal operator with ==', () => {
expectQuery(
`FROM logs-*\n| WHERE service.name = "foo"`,
`FROM logs-*\n| WHERE service.name == "foo"`
);

expectQuery(
`FROM logs-*\n| WHERE service.name = "foo" AND service.environment = "bar"`,
`FROM logs-*\n| WHERE service.name == "foo" AND service.environment == "bar"`
);

expectQuery(
`FROM logs-*\n| WHERE (service.name = "foo" AND service.environment = "bar") OR agent.name = "baz"`,
`FROM logs-*\n| WHERE (service.name == "foo" AND service.environment == "bar") OR agent.name == "baz"`
);

expectQuery(
`FROM logs-*\n| WHERE \`what=ever\` = "foo=bar"`,
`FROM logs-*\n| WHERE \`what=ever\` == "foo=bar"`
);
expectQuery({
input: `FROM logs-*\n| WHERE service.name = "foo"`,
expectedOutput: `FROM logs-*\n| WHERE service.name == "foo"`,
});

expectQuery({
input: `FROM logs-*\n| WHERE service.name = "foo" AND service.environment = "bar"`,
expectedOutput: `FROM logs-*\n| WHERE service.name == "foo" AND service.environment == "bar"`,
});

expectQuery({
input: `FROM logs-*\n| WHERE (service.name = "foo" AND service.environment = "bar") OR agent.name = "baz"`,
expectedOutput: `FROM logs-*\n| WHERE (service.name == "foo" AND service.environment == "bar") OR agent.name == "baz"`,
});

expectQuery({
input: `FROM logs-*\n| WHERE \`what=ever\` = "foo=bar"`,
expectedOutput: `FROM logs-*\n| WHERE \`what=ever\` == "foo=bar"`,
});
});

it('replaces single-quote escaped strings with double-quote escaped strings', () => {
expectQuery(
`FROM nyc_taxis
expectQuery({
input: `FROM nyc_taxis
| WHERE DATE_EXTRACT('hour', dropoff_datetime) >= 6 AND DATE_EXTRACT('hour', dropoff_datetime) < 10
| LIMIT 10`,
`FROM nyc_taxis
expectedOutput: `FROM nyc_taxis
| WHERE DATE_EXTRACT("hour", dropoff_datetime) >= 6 AND DATE_EXTRACT("hour", dropoff_datetime) < 10
| LIMIT 10`
);
expectQuery(
`FROM nyc_taxis
| LIMIT 10`,
});
expectQuery({
input: `FROM nyc_taxis
| WHERE DATE_EXTRACT('hour', "hh:mm a, 'of' d MMMM yyyy") >= 6 AND DATE_EXTRACT('hour', dropoff_datetime) < 10
| LIMIT 10`,
`FROM nyc_taxis
expectedOutput: `FROM nyc_taxis
| WHERE DATE_EXTRACT("hour", "hh:mm a, 'of' d MMMM yyyy") >= 6 AND DATE_EXTRACT("hour", dropoff_datetime) < 10
| LIMIT 10`
);
| LIMIT 10`,
});
});

it(`verifies if the SORT key is in KEEP, and if it's not, it will include it`, () => {
expectQuery(
'FROM logs-* \n| KEEP date \n| SORT @timestamp DESC',
'FROM logs-*\n| KEEP date, @timestamp\n| SORT @timestamp DESC'
);

expectQuery(
`FROM logs-* | KEEP date, whatever | EVAL my_truncated_date_field = DATE_TRUNC(1 year, date) | SORT @timestamp, my_truncated_date_field DESC`,
'FROM logs-*\n| KEEP date, whatever, @timestamp\n| EVAL my_truncated_date_field = DATE_TRUNC(1 year, date)\n| SORT @timestamp, my_truncated_date_field DESC'
);

expectQuery(
`FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
`FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`
);

expectQuery(
`FROM logs-* | KEEP date, whatever | RENAME whatever AS forever | SORT forever DESC`,
`FROM logs-*\n| KEEP date, whatever\n| RENAME whatever AS forever\n| SORT forever DESC`
);

expectQuery(
'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln'
);
expectQuery({
input: 'FROM logs-* \n| KEEP date \n| SORT @timestamp DESC',
expectedOutput: 'FROM logs-*\n| KEEP date, @timestamp\n| SORT @timestamp DESC',
});

expectQuery({
input: `FROM logs-* | KEEP date, whatever | EVAL my_truncated_date_field = DATE_TRUNC(1 year, date) | SORT @timestamp, my_truncated_date_field DESC`,
expectedOutput:
'FROM logs-*\n| KEEP date, whatever, @timestamp\n| EVAL my_truncated_date_field = DATE_TRUNC(1 year, date)\n| SORT @timestamp, my_truncated_date_field DESC',
});

expectQuery({
input: `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
expectedOutput: `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
});

expectQuery({
input: `FROM logs-* | KEEP date, whatever | RENAME whatever AS forever | SORT forever DESC`,
expectedOutput: `FROM logs-*\n| KEEP date, whatever\n| RENAME whatever AS forever\n| SORT forever DESC`,
});

expectQuery({
input:
'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
expectedOutput:
'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
});
});

it(`escapes the column name if SORT uses an expression`, () => {
expectQuery(
'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC',
'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC'
);

expectQuery(
'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC, @timestamp ASC',
'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC, @timestamp ASC'
);

expectQuery(
`FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
`FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`
);

expectQuery(
`FROM employees
expectQuery({
input: 'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC',
expectedOutput: 'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC',
});

expectQuery({
input: 'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC, @timestamp ASC',
expectedOutput:
'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC, @timestamp ASC',
});

expectQuery({
input: `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
expectedOutput: `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
});

expectQuery({
input: `FROM employees
| STATS my_count = COUNT() BY LEFT(last_name, 1)
| SORT \`LEFT(last_name, 1)\``,
`FROM employees
expectedOutput: `FROM employees
| STATS my_count = COUNT() BY LEFT(last_name, 1)
| SORT \`LEFT(last_name, 1)\``
);
| SORT \`LEFT(last_name, 1)\``,
});
});

it(`handles complicated queries correctly`, () => {
expectQuery(
`FROM "postgres-logs*"
expectQuery({
input: `FROM "postgres-logs*"
| GROK message "%{TIMESTAMP_ISO8601:timestamp} %{TZ} \[%{NUMBER:process_id}\]: \[%{NUMBER:log_line}\] user=%{USER:user},db=%{USER:database},app=\[%{DATA:application}\],client=%{IP:client_ip} LOG: duration: %{NUMBER:duration:float} ms statement: %{GREEDYDATA:statement}"
| EVAL "@timestamp" = TO_DATETIME(timestamp)
| WHERE statement LIKE 'SELECT%'
| STATS avg_duration = AVG(duration)`,
`FROM postgres-logs*
expectedOutput: `FROM "postgres-logs*"
| GROK message "%{TIMESTAMP_ISO8601:timestamp} %{TZ} \[%{NUMBER:process_id}\]: \[%{NUMBER:log_line}\] user=%{USER:user},db=%{USER:database},app=\[%{DATA:application}\],client=%{IP:client_ip} LOG: duration: %{NUMBER:duration:float} ms statement: %{GREEDYDATA:statement}"
| EVAL @timestamp = TO_DATETIME(timestamp)
| WHERE statement LIKE "SELECT%"
| STATS avg_duration = AVG(duration)`
);
| STATS avg_duration = AVG(duration)`,
});

expectQuery(
`FROM metrics-apm*
expectQuery({
input: `FROM metrics-apm*
| WHERE metricset.name == "service_destination" AND @timestamp > NOW() - 24 hours
| EVAL total_events = span.destination.service.response_time.count
| EVAL total_latency = span.destination.service.response_time.sum.us
Expand All @@ -161,27 +179,27 @@ describe('correctCommonEsqlMistakes', () => {
avg_latency_per_request = AVG(total_latency / total_events),
failure_rate = AVG(is_failure)
BY span.destination.service.resource`,
`FROM metrics-apm*
expectedOutput: `FROM metrics-apm*
| WHERE metricset.name == "service_destination" AND @timestamp > NOW() - 24 hours
| EVAL total_events = span.destination.service.response_time.count
| EVAL total_latency = span.destination.service.response_time.sum.us
| EVAL is_failure = CASE(event.outcome == "failure", 1, 0)
| STATS avg_throughput = AVG(total_events), avg_latency_per_request = AVG(total_latency / total_events), failure_rate = AVG(is_failure) BY span.destination.service.resource`
);
| STATS avg_throughput = AVG(total_events), avg_latency_per_request = AVG(total_latency / total_events), failure_rate = AVG(is_failure) BY span.destination.service.resource`,
});

expectQuery(
`FROM sample_data
expectQuery({
input: `FROM sample_data
| EVAL successful = CASE(
STARTS_WITH(message, "Connected to"), 1,
message == "Connection error", 0
)
| STATS success_rate = AVG(successful)`,
`FROM sample_data
expectedOutput: `FROM sample_data
| EVAL successful = CASE(
STARTS_WITH(message, "Connected to"), 1,
message == "Connection error", 0
)
| STATS success_rate = AVG(successful)`
);
| STATS success_rate = AVG(successful)`,
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -234,15 +234,13 @@ export function correctCommonEsqlMistakes(query: string): {

const formattedCommands: string[] = commands.map(({ name, command }, index) => {
let formattedCommand = command;

switch (name) {
case 'FROM':
formattedCommand = formattedCommand
.replaceAll(/FROM "(.*)"/g, 'FROM $1')
.replaceAll(/FROM '(.*)'/g, 'FROM $1')
.replaceAll(/FROM `(.*)`/g, 'FROM $1');
case 'FROM': {
formattedCommand = split(formattedCommand, ',')
.map((singlePattern) => singlePattern.replaceAll(/`/g, '"').replaceAll(/'/g, '"'))
.join(',');
break;

}
case 'WHERE':
formattedCommand = replaceSingleQuotesWithDoubleQuotes(formattedCommand);
formattedCommand = ensureEqualityOperators(formattedCommand);
Expand Down

0 comments on commit 9a591f3

Please sign in to comment.