elastic · stratoula · Jul 5, 2024 · Jul 2, 2024 · Jul 4, 2024 · Jul 4, 2024
diff --git a/...bservability_ai_assistant_app/server/functions/query/correct_common_esql_mistakes.test.ts b/...bservability_ai_assistant_app/server/functions/query/correct_common_esql_mistakes.test.ts
@@ -11,147 +11,165 @@ describe('correctCommonEsqlMistakes', () => {
     return input.replaceAll(/[\t|\s]*\n[\t|\s]*/gms, '\n');
   }
 
-  function expectQuery(input: string, expectedOutput: string) {
+  function expectQuery({ input, expectedOutput }: { input: string; expectedOutput: string }) {
     expect(normalize(correctCommonEsqlMistakes(input).output)).toEqual(normalize(expectedOutput));
   }
 
   it('replaces aliasing via the AS keyword with the = operator', () => {
-    expectQuery(`FROM logs-* | STATS COUNT() AS count`, 'FROM logs-*\n| STATS count = COUNT()');
-
-    expectQuery(`FROM logs-* | STATS COUNT() as count`, 'FROM logs-*\n| STATS count = COUNT()');
-
-    expectQuery(
-      `FROM logs-* | STATS AVG(transaction.duration.histogram) AS avg_request_latency, PERCENTILE(transaction.duration.histogram, 95) AS p95`,
-      `FROM logs-*
-      | STATS avg_request_latency = AVG(transaction.duration.histogram), p95 = PERCENTILE(transaction.duration.histogram, 95)`
-    );
-
-    expectQuery(
-      `FROM traces-apm*
+    expectQuery({
+      input: `FROM logs-* | STATS COUNT() AS count`,
+      expectedOutput: 'FROM logs-*\n| STATS count = COUNT()',
+    });
+
+    expectQuery({
+      input: `FROM logs-* | STATS COUNT() as count`,
+      expectedOutput: 'FROM logs-*\n| STATS count = COUNT()',
+    });
+
+    expectQuery({
+      input: `FROM logs-* | STATS AVG(transaction.duration.histogram) AS avg_request_latency, PERCENTILE(transaction.duration.histogram, 95) AS p95`,
+      expectedOutput: `FROM logs-*
+      | STATS avg_request_latency = AVG(transaction.duration.histogram), p95 = PERCENTILE(transaction.duration.histogram, 95)`,
+    });
+
+    expectQuery({
+      input: `FROM traces-apm*
       | WHERE @timestamp >= NOW() - 24 hours
       | STATS AVG(transaction.duration.us) AS avg_duration, SUM(success) AS total_successes, COUNT(*) AS total_requests BY service.name`,
-      `FROM traces-apm*
+      expectedOutput: `FROM traces-apm*
       | WHERE @timestamp >= NOW() - 24 hours
-      | STATS avg_duration = AVG(transaction.duration.us), total_successes = SUM(success), total_requests = COUNT(*) BY service.name`
-    );
+      | STATS avg_duration = AVG(transaction.duration.us), total_successes = SUM(success), total_requests = COUNT(*) BY service.name`,
+    });
   });
-
-  it(`replaces " or ' escaping in FROM statements with backticks`, () => {
-    expectQuery(`FROM "logs-*" | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
-    expectQuery(`FROM 'logs-*' | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
-    expectQuery(`FROM logs-* | LIMIT 10`, 'FROM logs-*\n| LIMIT 10');
+  it("replaces ` or ' escaping in FROM statements with double quotes", () => {
+    expectQuery({ input: `FROM "logs-*" | LIMIT 10`, expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
+    expectQuery({ input: `FROM 'logs-*' | LIMIT 10`, expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
+    expectQuery({ input: 'FROM `logs-*` | LIMIT 10', expectedOutput: 'FROM "logs-*"\n| LIMIT 10' });
+    expectQuery({
+      input: `FROM 'logs-2024-07-01','logs-2024-07-02' | LIMIT 10`,
+      expectedOutput: 'FROM "logs-2024-07-01","logs-2024-07-02"\n| LIMIT 10',
+    });
+    expectQuery({
+      input: 'FROM `logs-2024-07-01`,`logs-2024-07-02` | LIMIT 10',
+      expectedOutput: 'FROM "logs-2024-07-01","logs-2024-07-02"\n| LIMIT 10',
+    });
+    expectQuery({ input: `FROM logs-* | LIMIT 10`, expectedOutput: 'FROM logs-*\n| LIMIT 10' });
   });
 
   it('replaces = as equal operator with ==', () => {
-    expectQuery(
-      `FROM logs-*\n| WHERE service.name = "foo"`,
-      `FROM logs-*\n| WHERE service.name == "foo"`
-    );
-
-    expectQuery(
-      `FROM logs-*\n| WHERE service.name = "foo" AND service.environment = "bar"`,
-      `FROM logs-*\n| WHERE service.name == "foo" AND service.environment == "bar"`
-    );
-
-    expectQuery(
-      `FROM logs-*\n| WHERE (service.name = "foo" AND service.environment = "bar") OR agent.name = "baz"`,
-      `FROM logs-*\n| WHERE (service.name == "foo" AND service.environment == "bar") OR agent.name == "baz"`
-    );
-
-    expectQuery(
-      `FROM logs-*\n| WHERE \`what=ever\` = "foo=bar"`,
-      `FROM logs-*\n| WHERE \`what=ever\` == "foo=bar"`
-    );
+    expectQuery({
+      input: `FROM logs-*\n| WHERE service.name = "foo"`,
+      expectedOutput: `FROM logs-*\n| WHERE service.name == "foo"`,
+    });
+
+    expectQuery({
+      input: `FROM logs-*\n| WHERE service.name = "foo" AND service.environment = "bar"`,
+      expectedOutput: `FROM logs-*\n| WHERE service.name == "foo" AND service.environment == "bar"`,
+    });
+
+    expectQuery({
+      input: `FROM logs-*\n| WHERE (service.name = "foo" AND service.environment = "bar") OR agent.name = "baz"`,
+      expectedOutput: `FROM logs-*\n| WHERE (service.name == "foo" AND service.environment == "bar") OR agent.name == "baz"`,
+    });
+
+    expectQuery({
+      input: `FROM logs-*\n| WHERE \`what=ever\` = "foo=bar"`,
+      expectedOutput: `FROM logs-*\n| WHERE \`what=ever\` == "foo=bar"`,
+    });
   });
 
   it('replaces single-quote escaped strings with double-quote escaped strings', () => {
-    expectQuery(
-      `FROM nyc_taxis
+    expectQuery({
+      input: `FROM nyc_taxis
     | WHERE DATE_EXTRACT('hour', dropoff_datetime) >= 6 AND DATE_EXTRACT('hour', dropoff_datetime) < 10
     | LIMIT 10`,
-      `FROM nyc_taxis
+      expectedOutput: `FROM nyc_taxis
     | WHERE DATE_EXTRACT("hour", dropoff_datetime) >= 6 AND DATE_EXTRACT("hour", dropoff_datetime) < 10
-    | LIMIT 10`
-    );
-    expectQuery(
-      `FROM nyc_taxis
+    | LIMIT 10`,
+    });
+    expectQuery({
+      input: `FROM nyc_taxis
     | WHERE DATE_EXTRACT('hour', "hh:mm a, 'of' d MMMM yyyy") >= 6 AND DATE_EXTRACT('hour', dropoff_datetime) < 10
     | LIMIT 10`,
-      `FROM nyc_taxis
+      expectedOutput: `FROM nyc_taxis
     | WHERE DATE_EXTRACT("hour", "hh:mm a, 'of' d MMMM yyyy") >= 6 AND DATE_EXTRACT("hour", dropoff_datetime) < 10
-    | LIMIT 10`
-    );
+    | LIMIT 10`,
+    });
   });
 
   it(`verifies if the SORT key is in KEEP, and if it's not, it will include it`, () => {
-    expectQuery(
-      'FROM logs-* \n| KEEP date \n| SORT @timestamp DESC',
-      'FROM logs-*\n| KEEP date, @timestamp\n| SORT @timestamp DESC'
-    );
-
-    expectQuery(
-      `FROM logs-* | KEEP date, whatever | EVAL my_truncated_date_field = DATE_TRUNC(1 year, date) | SORT @timestamp, my_truncated_date_field DESC`,
-      'FROM logs-*\n| KEEP date, whatever, @timestamp\n| EVAL my_truncated_date_field = DATE_TRUNC(1 year, date)\n| SORT @timestamp, my_truncated_date_field DESC'
-    );
-
-    expectQuery(
-      `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
-      `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`
-    );
-
-    expectQuery(
-      `FROM logs-* | KEEP date, whatever | RENAME whatever AS forever | SORT forever DESC`,
-      `FROM logs-*\n| KEEP date, whatever\n| RENAME whatever AS forever\n| SORT forever DESC`
-    );
-
-    expectQuery(
-      'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
-      'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln'
-    );
+    expectQuery({
+      input: 'FROM logs-* \n| KEEP date \n| SORT @timestamp DESC',
+      expectedOutput: 'FROM logs-*\n| KEEP date, @timestamp\n| SORT @timestamp DESC',
+    });
+
+    expectQuery({
+      input: `FROM logs-* | KEEP date, whatever | EVAL my_truncated_date_field = DATE_TRUNC(1 year, date) | SORT @timestamp, my_truncated_date_field DESC`,
+      expectedOutput:
+        'FROM logs-*\n| KEEP date, whatever, @timestamp\n| EVAL my_truncated_date_field = DATE_TRUNC(1 year, date)\n| SORT @timestamp, my_truncated_date_field DESC',
+    });
+
+    expectQuery({
+      input: `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
+      expectedOutput: `FROM logs-*\n| STATS COUNT(*) BY BUCKET(@timestamp, 1m)\n| SORT \`BUCKET(@timestamp, 1m)\` DESC`,
+    });
+
+    expectQuery({
+      input: `FROM logs-* | KEEP date, whatever | RENAME whatever AS forever | SORT forever DESC`,
+      expectedOutput: `FROM logs-*\n| KEEP date, whatever\n| RENAME whatever AS forever\n| SORT forever DESC`,
+    });
+
+    expectQuery({
+      input:
+        'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
+      expectedOutput:
+        'FROM employees\n| KEEP first_name, last_name\n| RENAME first_name AS fn, last_name AS ln',
+    });
   });
 
   it(`escapes the column name if SORT uses an expression`, () => {
-    expectQuery(
-      'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC',
-      'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC'
-    );
-
-    expectQuery(
-      'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC, @timestamp ASC',
-      'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC, @timestamp ASC'
-    );
-
-    expectQuery(
-      `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
-      `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`
-    );
-
-    expectQuery(
-      `FROM employees
+    expectQuery({
+      input: 'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC',
+      expectedOutput: 'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC',
+    });
+
+    expectQuery({
+      input: 'FROM logs-* \n| STATS COUNT(*) by service.name\n| SORT COUNT(*) DESC, @timestamp ASC',
+      expectedOutput:
+        'FROM logs-*\n| STATS COUNT(*) BY service.name\n| SORT `COUNT(*)` DESC, @timestamp ASC',
+    });
+
+    expectQuery({
+      input: `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
+      expectedOutput: `FROM employees\n| KEEP first_name, last_name, height\n| SORT first_name ASC NULLS FIRST`,
+    });
+
+    expectQuery({
+      input: `FROM employees
       | STATS my_count = COUNT() BY LEFT(last_name, 1)
       | SORT \`LEFT(last_name, 1)\``,
-      `FROM employees
+      expectedOutput: `FROM employees
       | STATS my_count = COUNT() BY LEFT(last_name, 1)
-      | SORT \`LEFT(last_name, 1)\``
-    );
+      | SORT \`LEFT(last_name, 1)\``,
+    });
   });
 
   it(`handles complicated queries correctly`, () => {
-    expectQuery(
-      `FROM "postgres-logs*"
+    expectQuery({
+      input: `FROM "postgres-logs*"
       | GROK message "%{TIMESTAMP_ISO8601:timestamp} %{TZ} \[%{NUMBER:process_id}\]: \[%{NUMBER:log_line}\] user=%{USER:user},db=%{USER:database},app=\[%{DATA:application}\],client=%{IP:client_ip} LOG:  duration: %{NUMBER:duration:float} ms  statement: %{GREEDYDATA:statement}"
       | EVAL "@timestamp" = TO_DATETIME(timestamp)
       | WHERE statement LIKE 'SELECT%'
       | STATS avg_duration = AVG(duration)`,
-      `FROM postgres-logs*
+      expectedOutput: `FROM "postgres-logs*"
     | GROK message "%{TIMESTAMP_ISO8601:timestamp} %{TZ} \[%{NUMBER:process_id}\]: \[%{NUMBER:log_line}\] user=%{USER:user},db=%{USER:database},app=\[%{DATA:application}\],client=%{IP:client_ip} LOG:  duration: %{NUMBER:duration:float} ms  statement: %{GREEDYDATA:statement}"
     | EVAL @timestamp = TO_DATETIME(timestamp)
     | WHERE statement LIKE "SELECT%"
-    | STATS avg_duration = AVG(duration)`
-    );
+    | STATS avg_duration = AVG(duration)`,
+    });
 
-    expectQuery(
-      `FROM metrics-apm*
+    expectQuery({
+      input: `FROM metrics-apm*
       | WHERE metricset.name == "service_destination" AND @timestamp > NOW() - 24 hours
       | EVAL total_events = span.destination.service.response_time.count
       | EVAL total_latency = span.destination.service.response_time.sum.us
@@ -161,27 +179,27 @@ describe('correctCommonEsqlMistakes', () => {
           avg_latency_per_request = AVG(total_latency / total_events),
           failure_rate = AVG(is_failure)
         BY span.destination.service.resource`,
-      `FROM metrics-apm*
+      expectedOutput: `FROM metrics-apm*
       | WHERE metricset.name == "service_destination" AND @timestamp > NOW() - 24 hours
       | EVAL total_events = span.destination.service.response_time.count
       | EVAL total_latency = span.destination.service.response_time.sum.us
       | EVAL is_failure = CASE(event.outcome == "failure", 1, 0)
-      | STATS avg_throughput = AVG(total_events), avg_latency_per_request = AVG(total_latency / total_events), failure_rate = AVG(is_failure) BY span.destination.service.resource`
-    );
+      | STATS avg_throughput = AVG(total_events), avg_latency_per_request = AVG(total_latency / total_events), failure_rate = AVG(is_failure) BY span.destination.service.resource`,
+    });
 
-    expectQuery(
-      `FROM sample_data
+    expectQuery({
+      input: `FROM sample_data
       | EVAL successful = CASE(
           STARTS_WITH(message, "Connected to"), 1,
           message == "Connection error", 0
         )
       | STATS success_rate = AVG(successful)`,
-      `FROM sample_data
+      expectedOutput: `FROM sample_data
       | EVAL successful = CASE(
           STARTS_WITH(message, "Connected to"), 1,
           message == "Connection error", 0
         )
-      | STATS success_rate = AVG(successful)`
-    );
+      | STATS success_rate = AVG(successful)`,
+    });
   });
 });
diff --git a/...ion/observability_ai_assistant_app/server/functions/query/correct_common_esql_mistakes.ts b/...ion/observability_ai_assistant_app/server/functions/query/correct_common_esql_mistakes.ts
@@ -234,15 +234,13 @@ export function correctCommonEsqlMistakes(query: string): {
 
   const formattedCommands: string[] = commands.map(({ name, command }, index) => {
     let formattedCommand = command;
-
     switch (name) {
-      case 'FROM':
-        formattedCommand = formattedCommand
-          .replaceAll(/FROM "(.*)"/g, 'FROM $1')
-          .replaceAll(/FROM '(.*)'/g, 'FROM $1')
-          .replaceAll(/FROM `(.*)`/g, 'FROM $1');
+      case 'FROM': {
+        formattedCommand = split(formattedCommand, ',')
+          .map((singlePattern) => singlePattern.replaceAll(/`/g, '"').replaceAll(/'/g, '"'))
+          .join(',');
         break;
-
+      }
       case 'WHERE':
         formattedCommand = replaceSingleQuotesWithDoubleQuotes(formattedCommand);
         formattedCommand = ensureEqualityOperators(formattedCommand);