Skip to content

Commit

Permalink
[8.12] [Obs AI Assistant] ES|QL improvements (#173303) (#173486)
Browse files Browse the repository at this point in the history
# Backport

This will backport the following commits from `main` to `8.12`:
- [[Obs AI Assistant] ES|QL improvements
(#173303)](#173303)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Dario
Gieselaar","email":"[email protected]"},"sourceCommit":{"committedDate":"2023-12-17T21:23:50Z","message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<[email protected]>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905","branchLabelMapping":{"^v8.13.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix","v8.12.0","v8.12.1","v8.13.0"],"number":173303,"url":"https://github.com/elastic/kibana/pull/173303","mergeCommit":{"message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<[email protected]>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905"}},"sourceBranch":"main","suggestedTargetBranches":["8.12"],"targetPullRequestStates":[{"branch":"8.12","label":"v8.12.0","labelRegex":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v8.13.0","labelRegex":"^v8.13.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/173303","number":173303,"mergeCommit":{"message":"[Obs
AI Assistant] ES|QL improvements (#173303)\n\nUse a combination of
system prompt + recall for ES|QL to allow for:\r\n\r\n- faster
responses\r\n- wider support of functions (e.g. any documented function
is now\r\nsupported, in theory)\r\n\r\nPerformance for the current ES|QL
scenarios is similar.\r\n\r\nSome things I've noticed:\r\n- ~for
whatever reason it is much harder to get the LLM to not make\r\ncertain
mistakes. I've tried mentioning some mistakes close to the end\r\nof the
conversation, but will explore other options.~ I am again opting\r\nfor
repeating common mistakes at the start of the message, but this
time\r\nI'm allowing the LLM to pick them.\r\n- if we can prevent it
from making \"known\" mistakes, we can figure out a\r\nway to have the
LLM store known mistakes. This is also useful for
other\r\nfunctions\r\n- another thing I want to figure out is if more
examples improves for\r\ncommands and custom documentation improves
performance. but later.\r\n\r\n---------\r\n\r\nCo-authored-by:
kibanamachine
<[email protected]>","sha":"d54109d8f8243cae00f6871c73d3bdc19f4f9905"}}]}]
BACKPORT-->

Co-authored-by: Dario Gieselaar <[email protected]>
  • Loading branch information
kibanamachine and dgieselaar authored Dec 17, 2023
1 parent bd38940 commit dab8881
Show file tree
Hide file tree
Showing 103 changed files with 2,432 additions and 575 deletions.
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@
"canvg": "^3.0.9",
"cbor-x": "^1.3.3",
"chalk": "^4.1.0",
"cheerio": "^1.0.0-rc.10",
"cheerio": "^1.0.0-rc.12",
"chroma-js": "^2.1.0",
"classnames": "2.2.6",
"color": "^4.2.3",
Expand Down Expand Up @@ -1334,6 +1334,7 @@
"@types/chroma-js": "^2.1.0",
"@types/chromedriver": "^81.0.5",
"@types/classnames": "^2.2.9",
"@types/cli-progress": "^3.11.5",
"@types/color": "^3.0.3",
"@types/cytoscape": "^3.14.0",
"@types/d3": "^3.5.43",
Expand Down Expand Up @@ -1496,6 +1497,7 @@
"chance": "1.0.18",
"chromedriver": "^119.0.1",
"clean-webpack-plugin": "^3.0.0",
"cli-progress": "^3.12.0",
"cli-table3": "^0.6.1",
"copy-webpack-plugin": "^6.0.2",
"cpy": "^8.1.1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export function Application({
<KibanaContextProvider
services={{
...coreStart,
...pluginsStart,
plugins: {
start: pluginsStart,
},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import $, { AnyNode, Cheerio } from 'cheerio';

export function extractSections(cheerio: Cheerio<AnyNode>) {
const sections: Array<{
title: string;
content: string;
}> = [];
cheerio.find('.section h3').each((index, element) => {
let untilNextHeader = $(element).nextUntil('h3');

if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextUntil('h3');
}

if (untilNextHeader.length === 0) {
untilNextHeader = $(element).parents('.titlepage').nextAll();
}

const title = $(element).text().trim().replace('edit', '');

untilNextHeader.find('table').remove();
untilNextHeader.find('svg').remove();

const text = untilNextHeader.text();

const content = text.replaceAll(/([\n]\s*){2,}/g, '\n');

sections.push({
title: title === 'STATS ... BY' ? 'STATS' : title,
content: `${title}\n\n${content}`,
});
});

return sections;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

require('@kbn/babel-register').install();

require('./load_esql_docs');
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { run } from '@kbn/dev-cli-runner';
import $, { load } from 'cheerio';
import { SingleBar } from 'cli-progress';
import FastGlob from 'fast-glob';
import Fs from 'fs/promises';
import { once, partition } from 'lodash';
import pLimit from 'p-limit';
import Path from 'path';
import git, { SimpleGitProgressEvent } from 'simple-git';
import yargs, { Argv } from 'yargs';
import { extractSections } from './extract_sections';

yargs(process.argv.slice(2))
.command(
'*',
'Extract ES|QL documentation for the Observability AI Assistant',
(y: Argv) =>
y.option('logLevel', {
describe: 'Log level',
string: true,
default: process.env.LOG_LEVEL || 'info',
choices: ['info', 'debug', 'silent', 'verbose'],
}),
(argv) => {
run(
async ({ log }) => {
const builtDocsDir = Path.join(__dirname, '../../../../../../built-docs');

log.debug(`Looking in ${builtDocsDir} for built-docs repository`);

const dirExists = await Fs.stat(builtDocsDir);

const getProgressHandler = () => {
let stage: string = '';
let method: string = '';
const loader: SingleBar = new SingleBar({
barsize: 25,
format: `{phase} {bar} {percentage}%`,
});

const start = once(() => {
loader.start(100, 0, { phase: 'initializing' });
});

return {
progress: (event: SimpleGitProgressEvent) => {
start();
if (event.stage !== stage || event.method !== method) {
stage = event.stage;
method = event.method;
}
loader.update(event.progress, { phase: event.method + '/' + event.stage });
},
stop: () => loader.stop(),
};
};

if (!dirExists) {
log.info('Cloning built-docs repo. This will take a while.');

const { progress, stop } = getProgressHandler();
await git(Path.join(builtDocsDir, '..'), {
progress,
}).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']);

stop();
}

const { progress, stop } = getProgressHandler();

const builtDocsGit = git(builtDocsDir, { progress });

log.debug('Initializing simple-git');
await builtDocsGit.init();

log.info('Making sure built-docs is up to date');
await builtDocsGit.pull();

const files = FastGlob.sync(
`${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html`
);

if (!files) {
throw new Error('No files found');
}

const limiter = pLimit(10);

stop();

log.info(`Processing ${files.length} files`);

const documents: Array<Array<{ title: string; content: string }>> = await Promise.all(
files.map((file) =>
limiter(async () => {
const fileContents = await Fs.readFile(file);
const $element = load(fileContents.toString())('*');

function getSimpleText() {
$element.remove('.navfooter');
$element.remove('#sticky_content');
$element.find('code').each(function () {
$(this).replaceWith('`' + $(this).text() + '`');
});
return $element
.find('.section,section,.part')
.last()
.text()
.replaceAll(/([\n]\s*){2,}/g, '\n');
}

switch (Path.basename(file)) {
case 'esql-commands.html':
return extractSections($element);

case 'esql-limitations.html':
return [
{
title: 'Limitations',
content: getSimpleText(),
},
];

case 'esql-syntax.html':
return [
{
title: 'Syntax',
content: getSimpleText(),
},
];
case 'esql.html':
return [
{
title: 'Overview',
content: getSimpleText().replace(
/The ES\|QL documentation is organized in these sections(.*)$/,
''
),
},
];

case 'esql-functions-operators.html':
const sections = extractSections($element);

const searches = [
'Binary operators',
'Equality',
'Inequality',
'Less than',
'Greater than',
'Add +',
'Subtract -',
'Multiply *',
'Divide /',
'Modulus %',
'Unary operators',
'Logical operators',
'IS NULL',
];

const matches = [
'CIDR_MATCH',
'ENDS_WITH',
'IN',
'IS_FINITE',
'IS_INFINITE',
'IS_NAN',
'LIKE',
'RLIKE',
'STARTS_WITH',
];

const [operatorSections, allOtherSections] = partition(sections, (section) => {
return (
matches.includes(section.title) ||
searches.some((search) =>
section.title.toLowerCase().startsWith(search.toLowerCase())
)
);
});

return allOtherSections.concat({
title: 'Operators',
content: operatorSections
.map(({ title, content }) => `${title}\n${content}`)
.join('\n'),
});

default:
break;
}
return [];
})
)
);

const flattened = documents.flat().filter((doc) => {
return !doc.title.startsWith('ES|QL');
});

const outDir = Path.join(__dirname, '../../server/functions/esql/docs');

log.info(`Writing ${flattened.length} documents to disk to ${outDir}`);

log.debug(`Clearing ${outDir}`);

await Fs.rm(outDir, { recursive: true });

await Fs.mkdir(outDir);

await Promise.all(
flattened.map((doc) =>
limiter(async () => {
const fileName = Path.join(
outDir,
`esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt`
);
await Fs.writeFile(fileName, doc.content);
})
)
);
},
{ log: { defaultLevel: argv.logLevel as any }, flags: { allowUnexpected: true } }
);
}
)
.parse();
Loading

0 comments on commit dab8881

Please sign in to comment.