[8.12] [Obs AI Assistant] ES|QL improvements (#173303) (#173486)

# Backport This will backport the following commits from `main` to `8.12`: - [[Obs AI Assistant] ES|QL improvements (#173303)](#173303)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Dario Gieselaar <[email protected]>
elastic · Dec 17, 2023 · dab8881 · dab8881
1 parent bd38940
commit dab8881
Show file tree

Hide file tree

Showing 103 changed files with 2,432 additions and 575 deletions.
diff --git a/package.json b/package.json
@@ -893,7 +893,7 @@
     "canvg": "^3.0.9",
     "cbor-x": "^1.3.3",
     "chalk": "^4.1.0",
-    "cheerio": "^1.0.0-rc.10",
+    "cheerio": "^1.0.0-rc.12",
     "chroma-js": "^2.1.0",
     "classnames": "2.2.6",
     "color": "^4.2.3",
@@ -1334,6 +1334,7 @@
     "@types/chroma-js": "^2.1.0",
     "@types/chromedriver": "^81.0.5",
     "@types/classnames": "^2.2.9",
+    "@types/cli-progress": "^3.11.5",
     "@types/color": "^3.0.3",
     "@types/cytoscape": "^3.14.0",
     "@types/d3": "^3.5.43",
@@ -1496,6 +1497,7 @@
     "chance": "1.0.18",
     "chromedriver": "^119.0.1",
     "clean-webpack-plugin": "^3.0.0",
+    "cli-progress": "^3.12.0",
     "cli-table3": "^0.6.1",
     "copy-webpack-plugin": "^6.0.2",
     "cpy": "^8.1.1",

diff --git a/x-pack/plugins/observability_ai_assistant/public/application.tsx b/x-pack/plugins/observability_ai_assistant/public/application.tsx
@@ -42,6 +42,7 @@ export function Application({
         <KibanaContextProvider
           services={{
             ...coreStart,
+            ...pluginsStart,
             plugins: {
               start: pluginsStart,
             },

diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/extract_sections.ts b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/extract_sections.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import $, { AnyNode, Cheerio } from 'cheerio';
+
+export function extractSections(cheerio: Cheerio<AnyNode>) {
+  const sections: Array<{
+    title: string;
+    content: string;
+  }> = [];
+  cheerio.find('.section h3').each((index, element) => {
+    let untilNextHeader = $(element).nextUntil('h3');
+
+    if (untilNextHeader.length === 0) {
+      untilNextHeader = $(element).parents('.titlepage').nextUntil('h3');
+    }
+
+    if (untilNextHeader.length === 0) {
+      untilNextHeader = $(element).parents('.titlepage').nextAll();
+    }
+
+    const title = $(element).text().trim().replace('edit', '');
+
+    untilNextHeader.find('table').remove();
+    untilNextHeader.find('svg').remove();
+
+    const text = untilNextHeader.text();
+
+    const content = text.replaceAll(/([\n]\s*){2,}/g, '\n');
+
+    sections.push({
+      title: title === 'STATS ... BY' ? 'STATS' : title,
+      content: `${title}\n\n${content}`,
+    });
+  });
+
+  return sections;
+}
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/index.js b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/index.js
@@ -0,0 +1,10 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+require('@kbn/babel-register').install();
+
+require('./load_esql_docs');
diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts
@@ -0,0 +1,233 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+import { run } from '@kbn/dev-cli-runner';
+import $, { load } from 'cheerio';
+import { SingleBar } from 'cli-progress';
+import FastGlob from 'fast-glob';
+import Fs from 'fs/promises';
+import { once, partition } from 'lodash';
+import pLimit from 'p-limit';
+import Path from 'path';
+import git, { SimpleGitProgressEvent } from 'simple-git';
+import yargs, { Argv } from 'yargs';
+import { extractSections } from './extract_sections';
+
+yargs(process.argv.slice(2))
+  .command(
+    '*',
+    'Extract ES|QL documentation for the Observability AI Assistant',
+    (y: Argv) =>
+      y.option('logLevel', {
+        describe: 'Log level',
+        string: true,
+        default: process.env.LOG_LEVEL || 'info',
+        choices: ['info', 'debug', 'silent', 'verbose'],
+      }),
+    (argv) => {
+      run(
+        async ({ log }) => {
+          const builtDocsDir = Path.join(__dirname, '../../../../../../built-docs');
+
+          log.debug(`Looking in ${builtDocsDir} for built-docs repository`);
+
+          const dirExists = await Fs.stat(builtDocsDir);
+
+          const getProgressHandler = () => {
+            let stage: string = '';
+            let method: string = '';
+            const loader: SingleBar = new SingleBar({
+              barsize: 25,
+              format: `{phase} {bar} {percentage}%`,
+            });
+
+            const start = once(() => {
+              loader.start(100, 0, { phase: 'initializing' });
+            });
+
+            return {
+              progress: (event: SimpleGitProgressEvent) => {
+                start();
+                if (event.stage !== stage || event.method !== method) {
+                  stage = event.stage;
+                  method = event.method;
+                }
+                loader.update(event.progress, { phase: event.method + '/' + event.stage });
+              },
+              stop: () => loader.stop(),
+            };
+          };
+
+          if (!dirExists) {
+            log.info('Cloning built-docs repo. This will take a while.');
+
+            const { progress, stop } = getProgressHandler();
+            await git(Path.join(builtDocsDir, '..'), {
+              progress,
+            }).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']);
+
+            stop();
+          }
+
+          const { progress, stop } = getProgressHandler();
+
+          const builtDocsGit = git(builtDocsDir, { progress });
+
+          log.debug('Initializing simple-git');
+          await builtDocsGit.init();
+
+          log.info('Making sure built-docs is up to date');
+          await builtDocsGit.pull();
+
+          const files = FastGlob.sync(
+            `${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html`
+          );
+
+          if (!files) {
+            throw new Error('No files found');
+          }
+
+          const limiter = pLimit(10);
+
+          stop();
+
+          log.info(`Processing ${files.length} files`);
+
+          const documents: Array<Array<{ title: string; content: string }>> = await Promise.all(
+            files.map((file) =>
+              limiter(async () => {
+                const fileContents = await Fs.readFile(file);
+                const $element = load(fileContents.toString())('*');
+
+                function getSimpleText() {
+                  $element.remove('.navfooter');
+                  $element.remove('#sticky_content');
+                  $element.find('code').each(function () {
+                    $(this).replaceWith('`' + $(this).text() + '`');
+                  });
+                  return $element
+                    .find('.section,section,.part')
+                    .last()
+                    .text()
+                    .replaceAll(/([\n]\s*){2,}/g, '\n');
+                }
+
+                switch (Path.basename(file)) {
+                  case 'esql-commands.html':
+                    return extractSections($element);
+
+                  case 'esql-limitations.html':
+                    return [
+                      {
+                        title: 'Limitations',
+                        content: getSimpleText(),
+                      },
+                    ];
+
+                  case 'esql-syntax.html':
+                    return [
+                      {
+                        title: 'Syntax',
+                        content: getSimpleText(),
+                      },
+                    ];
+                  case 'esql.html':
+                    return [
+                      {
+                        title: 'Overview',
+                        content: getSimpleText().replace(
+                          /The ES\|QL documentation is organized in these sections(.*)$/,
+                          ''
+                        ),
+                      },
+                    ];
+
+                  case 'esql-functions-operators.html':
+                    const sections = extractSections($element);
+
+                    const searches = [
+                      'Binary operators',
+                      'Equality',
+                      'Inequality',
+                      'Less than',
+                      'Greater than',
+                      'Add +',
+                      'Subtract -',
+                      'Multiply *',
+                      'Divide /',
+                      'Modulus %',
+                      'Unary operators',
+                      'Logical operators',
+                      'IS NULL',
+                    ];
+
+                    const matches = [
+                      'CIDR_MATCH',
+                      'ENDS_WITH',
+                      'IN',
+                      'IS_FINITE',
+                      'IS_INFINITE',
+                      'IS_NAN',
+                      'LIKE',
+                      'RLIKE',
+                      'STARTS_WITH',
+                    ];
+
+                    const [operatorSections, allOtherSections] = partition(sections, (section) => {
+                      return (
+                        matches.includes(section.title) ||
+                        searches.some((search) =>
+                          section.title.toLowerCase().startsWith(search.toLowerCase())
+                        )
+                      );
+                    });
+
+                    return allOtherSections.concat({
+                      title: 'Operators',
+                      content: operatorSections
+                        .map(({ title, content }) => `${title}\n${content}`)
+                        .join('\n'),
+                    });
+
+                  default:
+                    break;
+                }
+                return [];
+              })
+            )
+          );
+
+          const flattened = documents.flat().filter((doc) => {
+            return !doc.title.startsWith('ES|QL');
+          });
+
+          const outDir = Path.join(__dirname, '../../server/functions/esql/docs');
+
+          log.info(`Writing ${flattened.length} documents to disk to ${outDir}`);
+
+          log.debug(`Clearing ${outDir}`);
+
+          await Fs.rm(outDir, { recursive: true });
+
+          await Fs.mkdir(outDir);
+
+          await Promise.all(
+            flattened.map((doc) =>
+              limiter(async () => {
+                const fileName = Path.join(
+                  outDir,
+                  `esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt`
+                );
+                await Fs.writeFile(fileName, doc.content);
+              })
+            )
+          );
+        },
+        { log: { defaultLevel: argv.logLevel as any }, flags: { allowUnexpected: true } }
+      );
+    }
+  )
+  .parse();