diff --git a/package.json b/package.json index acc2350484030..0d5469e2b0cc5 100644 --- a/package.json +++ b/package.json @@ -893,7 +893,7 @@ "canvg": "^3.0.9", "cbor-x": "^1.3.3", "chalk": "^4.1.0", - "cheerio": "^1.0.0-rc.10", + "cheerio": "^1.0.0-rc.12", "chroma-js": "^2.1.0", "classnames": "2.2.6", "color": "^4.2.3", @@ -1334,6 +1334,7 @@ "@types/chroma-js": "^2.1.0", "@types/chromedriver": "^81.0.5", "@types/classnames": "^2.2.9", + "@types/cli-progress": "^3.11.5", "@types/color": "^3.0.3", "@types/cytoscape": "^3.14.0", "@types/d3": "^3.5.43", @@ -1496,6 +1497,7 @@ "chance": "1.0.18", "chromedriver": "^119.0.1", "clean-webpack-plugin": "^3.0.0", + "cli-progress": "^3.12.0", "cli-table3": "^0.6.1", "copy-webpack-plugin": "^6.0.2", "cpy": "^8.1.1", diff --git a/x-pack/plugins/observability_ai_assistant/public/application.tsx b/x-pack/plugins/observability_ai_assistant/public/application.tsx index 9ae03a3d72f6a..15ed0243bd921 100644 --- a/x-pack/plugins/observability_ai_assistant/public/application.tsx +++ b/x-pack/plugins/observability_ai_assistant/public/application.tsx @@ -42,6 +42,7 @@ export function Application({ ) { + const sections: Array<{ + title: string; + content: string; + }> = []; + cheerio.find('.section h3').each((index, element) => { + let untilNextHeader = $(element).nextUntil('h3'); + + if (untilNextHeader.length === 0) { + untilNextHeader = $(element).parents('.titlepage').nextUntil('h3'); + } + + if (untilNextHeader.length === 0) { + untilNextHeader = $(element).parents('.titlepage').nextAll(); + } + + const title = $(element).text().trim().replace('edit', ''); + + untilNextHeader.find('table').remove(); + untilNextHeader.find('svg').remove(); + + const text = untilNextHeader.text(); + + const content = text.replaceAll(/([\n]\s*){2,}/g, '\n'); + + sections.push({ + title: title === 'STATS ... BY' ? 'STATS' : title, + content: `${title}\n\n${content}`, + }); + }); + + return sections; +} diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/index.js b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/index.js new file mode 100644 index 0000000000000..b9f96574c6e61 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/index.js @@ -0,0 +1,10 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +require('@kbn/babel-register').install(); + +require('./load_esql_docs'); diff --git a/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts new file mode 100644 index 0000000000000..20bb43cd49d11 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/scripts/load_esql_docs/load_esql_docs.ts @@ -0,0 +1,233 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { run } from '@kbn/dev-cli-runner'; +import $, { load } from 'cheerio'; +import { SingleBar } from 'cli-progress'; +import FastGlob from 'fast-glob'; +import Fs from 'fs/promises'; +import { once, partition } from 'lodash'; +import pLimit from 'p-limit'; +import Path from 'path'; +import git, { SimpleGitProgressEvent } from 'simple-git'; +import yargs, { Argv } from 'yargs'; +import { extractSections } from './extract_sections'; + +yargs(process.argv.slice(2)) + .command( + '*', + 'Extract ES|QL documentation for the Observability AI Assistant', + (y: Argv) => + y.option('logLevel', { + describe: 'Log level', + string: true, + default: process.env.LOG_LEVEL || 'info', + choices: ['info', 'debug', 'silent', 'verbose'], + }), + (argv) => { + run( + async ({ log }) => { + const builtDocsDir = Path.join(__dirname, '../../../../../../built-docs'); + + log.debug(`Looking in ${builtDocsDir} for built-docs repository`); + + const dirExists = await Fs.stat(builtDocsDir); + + const getProgressHandler = () => { + let stage: string = ''; + let method: string = ''; + const loader: SingleBar = new SingleBar({ + barsize: 25, + format: `{phase} {bar} {percentage}%`, + }); + + const start = once(() => { + loader.start(100, 0, { phase: 'initializing' }); + }); + + return { + progress: (event: SimpleGitProgressEvent) => { + start(); + if (event.stage !== stage || event.method !== method) { + stage = event.stage; + method = event.method; + } + loader.update(event.progress, { phase: event.method + '/' + event.stage }); + }, + stop: () => loader.stop(), + }; + }; + + if (!dirExists) { + log.info('Cloning built-docs repo. This will take a while.'); + + const { progress, stop } = getProgressHandler(); + await git(Path.join(builtDocsDir, '..'), { + progress, + }).clone(`https://github.com/elastic/built-docs`, builtDocsDir, ['--depth', '1']); + + stop(); + } + + const { progress, stop } = getProgressHandler(); + + const builtDocsGit = git(builtDocsDir, { progress }); + + log.debug('Initializing simple-git'); + await builtDocsGit.init(); + + log.info('Making sure built-docs is up to date'); + await builtDocsGit.pull(); + + const files = FastGlob.sync( + `${builtDocsDir}/html/en/elasticsearch/reference/master/esql*.html` + ); + + if (!files) { + throw new Error('No files found'); + } + + const limiter = pLimit(10); + + stop(); + + log.info(`Processing ${files.length} files`); + + const documents: Array> = await Promise.all( + files.map((file) => + limiter(async () => { + const fileContents = await Fs.readFile(file); + const $element = load(fileContents.toString())('*'); + + function getSimpleText() { + $element.remove('.navfooter'); + $element.remove('#sticky_content'); + $element.find('code').each(function () { + $(this).replaceWith('`' + $(this).text() + '`'); + }); + return $element + .find('.section,section,.part') + .last() + .text() + .replaceAll(/([\n]\s*){2,}/g, '\n'); + } + + switch (Path.basename(file)) { + case 'esql-commands.html': + return extractSections($element); + + case 'esql-limitations.html': + return [ + { + title: 'Limitations', + content: getSimpleText(), + }, + ]; + + case 'esql-syntax.html': + return [ + { + title: 'Syntax', + content: getSimpleText(), + }, + ]; + case 'esql.html': + return [ + { + title: 'Overview', + content: getSimpleText().replace( + /The ES\|QL documentation is organized in these sections(.*)$/, + '' + ), + }, + ]; + + case 'esql-functions-operators.html': + const sections = extractSections($element); + + const searches = [ + 'Binary operators', + 'Equality', + 'Inequality', + 'Less than', + 'Greater than', + 'Add +', + 'Subtract -', + 'Multiply *', + 'Divide /', + 'Modulus %', + 'Unary operators', + 'Logical operators', + 'IS NULL', + ]; + + const matches = [ + 'CIDR_MATCH', + 'ENDS_WITH', + 'IN', + 'IS_FINITE', + 'IS_INFINITE', + 'IS_NAN', + 'LIKE', + 'RLIKE', + 'STARTS_WITH', + ]; + + const [operatorSections, allOtherSections] = partition(sections, (section) => { + return ( + matches.includes(section.title) || + searches.some((search) => + section.title.toLowerCase().startsWith(search.toLowerCase()) + ) + ); + }); + + return allOtherSections.concat({ + title: 'Operators', + content: operatorSections + .map(({ title, content }) => `${title}\n${content}`) + .join('\n'), + }); + + default: + break; + } + return []; + }) + ) + ); + + const flattened = documents.flat().filter((doc) => { + return !doc.title.startsWith('ES|QL'); + }); + + const outDir = Path.join(__dirname, '../../server/functions/esql/docs'); + + log.info(`Writing ${flattened.length} documents to disk to ${outDir}`); + + log.debug(`Clearing ${outDir}`); + + await Fs.rm(outDir, { recursive: true }); + + await Fs.mkdir(outDir); + + await Promise.all( + flattened.map((doc) => + limiter(async () => { + const fileName = Path.join( + outDir, + `esql-${doc.title.replaceAll(' ', '-').toLowerCase()}.txt` + ); + await Fs.writeFile(fileName, doc.content); + }) + ) + ); + }, + { log: { defaultLevel: argv.logLevel as any }, flags: { allowUnexpected: true } } + ); + } + ) + .parse(); diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql.ts b/x-pack/plugins/observability_ai_assistant/server/functions/esql.ts deleted file mode 100644 index 88997452c0ad8..0000000000000 --- a/x-pack/plugins/observability_ai_assistant/server/functions/esql.ts +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import dedent from 'dedent'; -import { Observable } from 'rxjs'; -import type { FunctionRegistrationParameters } from '.'; -import { - type CreateChatCompletionResponseChunk, - FunctionVisibility, - MessageRole, -} from '../../common/types'; -import { processOpenAiStream } from '../../common/utils/process_openai_stream'; -import { streamIntoObservable } from '../service/util/stream_into_observable'; - -export function registerEsqlFunction({ - client, - registerFunction, - resources, -}: FunctionRegistrationParameters) { - registerFunction( - { - name: 'execute_query', - contexts: ['core'], - visibility: FunctionVisibility.User, - description: 'Execute an ES|QL query', - parameters: { - type: 'object', - additionalProperties: false, - properties: { - query: { - type: 'string', - }, - }, - required: ['query'], - } as const, - }, - async ({ arguments: { query } }) => { - const response = await ( - await resources.context.core - ).elasticsearch.client.asCurrentUser.transport.request({ - method: 'POST', - path: '_query', - body: { - query, - }, - }); - - return { content: response }; - } - ); - - registerFunction( - { - name: 'esql', - contexts: ['core'], - description: `This function answers ES|QL related questions including query generation and syntax/command questions.`, - visibility: FunctionVisibility.System, - parameters: { - type: 'object', - additionalProperties: false, - properties: { - switch: { - type: 'boolean', - }, - }, - } as const, - }, - async ({ messages, connectorId }, signal) => { - const systemMessage = dedent(`You are a helpful assistant for Elastic ES|QL. - Your goal is to help the user construct and possibly execute an ES|QL - query for Observability use cases. - - ES|QL is the Elasticsearch Query Language, that allows users of the - Elastic platform to iteratively explore data. An ES|QL query consists - of a series of commands, separated by pipes. Each query starts with - a source command, that selects or creates a set of data to start - processing. This source command is then followed by one or more - processing commands, which can transform the data returned by the - previous command. - - ES|QL is not Elasticsearch SQL, nor is it anything like SQL. SQL - commands are not available in ES|QL. Its close equivalent is SPL - (Search Processing Language). Make sure you reply using only - the context of this conversation. - - # Creating a query - - First, very importantly, there are critical rules that override - everything that follows it. Always repeat these rules, verbatim. - - 1. ES|QL is not Elasticsearch SQL. Do not apply Elasticsearch SQL - commands, functions and concepts. Only use information available - in the context of this conversation. - 2. Use a WHERE clause as early and often as possible, because - it limits the number of documents that need to be evaluated. - 3. Use EVAL to create new columns that require mathemetical - operations or non-aggregation functions like CASE, ROUND or - DATE_EXTRACT. YOU MUST DO THIS before using these operations - in a STATS command. - 4. DO NOT UNDER ANY CIRCUMSTANCES: - - wrap a data source in single or double quotes when using FROM - - use COUNT(*) or COUNT(). A single argument (field name) is - required, like COUNT(my.field.name). - - use the AS keyword. Create a new column by using the = operator. - this is wrong: STATS SUM(field) AS sum_field. - - When constructing a query, break it down into the following steps. - Ask these questions out loud so the user can see your reasoning. - Remember, these rules are for you, not for the user. - - - What are the critical rules I need to think of? - - What data source is the user requesting? What command should I - select for this data source? Don't use any quotes to wrap the - source. - - Does the data set need to be filtered? Use the WHERE clause for - this, as it improves performance. - - Do I need to add columns that use math or other non-aggregation - functions like CASE using the EVAL command before I run the STATS - BY command with aggregation functions? - - If I run a STATS command, what columns are available after the - command? - - What are the steps needed to get the result that the user needs? - Break each operation down into its own step. Reason about what data - is the outcome of each command or function. - - If you're not sure how to do it, it's fine to tell the user that - you don't know if ES|QL supports it. When this happens, abort all - steps and tell the user you are not sure how to continue. - - Format ALL of your responses as follows, including the dashes. - ALWAYS start your message with two dashes and then the rules: - - \`\`\` - -- - Sure, let's remember the critical rules: - - -- - Let's break down the query step-by-step: - - - \`\`\`esql - - \`\`\` - \`\`\` - - Always format a complete query as follows: - \`\`\`esql - ... - \`\`\` - - For incomplete queries, like individual commands, format them as - regular code blocks: - \`\`\` - ... - \`\`\` - - # Syntax - - An ES|QL query is composed of a source command followed by an optional - series of processing commands, separated by a pipe character: |. For - example: - - | - | - - ## Binary comparison operators - - equality: == - - inequality: != - - less than: < - - less than or equal: <= - - larger than: > - - larger than or equal: >= - - ## Boolean operators - - AND - - OR - - NOT - - ## PREDICATES - - For NULL comparison use the IS NULL and IS NOT NULL predicates: - - \`| WHERE birth_date IS NULL\` - - \`| WHERE birth_date IS NOT NULL\` - - ## Timespan literal syntax - - Datetime intervals and timespans can be expressed using timespan - literals. Timespan literals are a combination of a number and a - qualifier. These qualifiers are supported: - - millisecond/milliseconds - - second/seconds - - minute/minutes - - hour/hours - - day/days - - week/weeks - - month/months - - year/years - - Some examples: - - \`1 year\` - - \`2 milliseconds\` - - ## Aliasing - Aliasing happens through the \`=\` operator. Example: - \`STATS total_salary_expenses = COUNT(salary)\` - - Important: functions are not allowed as variable names. - - # Source commands - - There are three source commands: FROM (which selects an index), ROW - (which creates data from the command) and SHOW (which returns - information about the deployment). You do not support SHOW for now. - - ### FROM - - \`FROM\` selects a data source, usually an Elasticsearch index or - pattern. You can also specify multiple indices. DO NOT UNDER ANY - CIRCUMSTANCES wrap an index or pattern in single or double quotes - as such: \`FROM "my_index.pattern-*"\`. - Some examples: - - - \`FROM employees\` - - \`FROM employees.annual_salaries-*\` - - \`FROM employees*,my-alias,my-index.with-a-dot*\` - - # Processing commands - - Note that the following processing commands are available in ES|QL, - but not supported in this context: - - ENRICH,GROK,MV_EXPAND,RENAME - - ### DISSECT - - \`DISSECT\` enables you to extract structured data out of a string. - It matches the string against a delimiter-based pattern, and extracts - the specified keys as columns. It uses the same syntax as the - Elasticsearch Dissect Processor. DO NOT UNDER ANY CIRCUMSTANCES use - single quotes instead of double quotes. Some examples: - - - \`ROW a = "foo bar" | DISSECT a "%{b} %{c}";\` - - \`ROW a = "foo bar baz" | DISSECT a "%{b} %{?c} %{d}";\` - - ### DROP - - \`DROP\` removes columns. Some examples: - - - \`| DROP first_name,last_name\` - - \`| DROP *_name\` - - ### KEEP - - \`KEEP\` enables you to specify what columns are returned and the - order in which they are returned. Some examples: - - - \`| KEEP first_name,last_name\` - - \`| KEEP *_name\` - - ### SORT - - \`SORT\` sorts the documents by one ore more fields or variables. - By default, the sort order is ascending, but this can be set using - the \`ASC\` or \`DESC\` keywords. Some examples: - - - \`| SORT my_field\` - - \`| SORT height DESC\` - - DO NOT UNDER ANY CIRCUMSTANCES use functions or math as part of the - sort statement. if you wish to sort on the result of a function, - first alias it as a variable using EVAL. - This is wrong: \`| SORT AVG(cpu)\`. - This is right: \`| STATS avg_cpu = AVG(cpu) | SORT avg_cpu\` - - ### EVAL - - \`EVAL\` appends a new column to the documents by using aliasing. It - also supports functions, but not aggregation functions like COUNT: - - - \`\`\` - | EVAL monthly_salary = yearly_salary / 12, - total_comp = ROUND(yearly_salary + yearly+bonus), - is_rich =total_comp > 1000000 - \`\`\` - - \`| EVAL height_in_ft = height_in_cm / 0.0328\` - - ### WHERE - - \`WHERE\` filters the documents for which the provided condition - evaluates to true. Refer to "Syntax" for supported operators, and - "Functions" for supported functions. When using WHERE, make sure - that the columns in your statement are still available. Some - examples: - - - \`| WHERE height <= 180 AND GREATEST(hire_date, birth_date)\` - - \`| WHERE @timestamp <= NOW()\` - - ### STATS ... BY - - \`STATS ... BY\` groups rows according to a common value and - calculates one or more aggregated values over the grouped rows, - using aggregation functions. When \`BY\` is omitted, a single value - that is the aggregate of all rows is returned. Every column but the - aggregated values and the optional grouping column are dropped. - Mention the retained columns when explaining the STATS command. - - DO NOT UNDER ANY CIRCUMSTANCES use non-aggregation functions (like - CASE or DATE_EXTRACT) or mathemetical operators in the STATS - command. YOU MUST USE an EVAL command before the STATS command - to append the new calculated column. - - Some examples: - - - \`| STATS count = COUNT(emp_no) BY languages\` - - \`| STATS salary = AVG(salary)\` - - \`| EVAL monthly_salary = salary / 12 | STATS avg_monthly_salary = AVG(monthly_salary) BY emp_country\` - - ### LIMIT - - Limits the rows returned. Only supports a number as input. Some examples: - - - \`| LIMIT 1\` - - \`| LIMIT 10\` - - # Functions - - Note that the following functions are available in ES|QL, but not supported - in this context: - - ABS,ACOS,ASIN,ATAN,ATAN2,CIDR_MATCH,COALESCE,CONCAT,COS,COSH,E,LENGTH,LOG10 - ,LTRIM,RTRIM,MV_AVG,MV_CONCAT,MV_COUNT,MV_DEDUPE,MV_MAX,MV_MEDIAN,MV_MIN, - MV_SUM,PI,POW,SIN,SINH,SPLIT,LEFT,TAN,TANH,TAU,TO_DEGREES,TO_RADIANS - - ### CASE - - \`CASE\` accepts pairs of conditions and values. The function returns - the value that belongs to the first condition that evaluates to true. If - the number of arguments is odd, the last argument is the default value which - is returned when no condition matches. Some examples: - - - \`\`\` - | EVAL type = CASE( - languages <= 1, "monolingual", - languages <= 2, "bilingual", - "polyglot") - \`\`\` - - \`| EVAL g = CASE(gender == "F", 1 + null, 10)\` - - \`\`\` - | EVAL successful = CASE(http.response.status_code == 200, 1, 0), failed = CASE(http.response.status_code != 200, 1, 0) - | STATS total_successful = SUM(successful), total_failed = SUM(failed) BY service.name - | EVAL success_rate = total_failed / (total_successful + total_failed) - \`\`\` - - ## Date operations - - ### AUTO_BUCKET - - \`AUTO_BUCKET\` creates human-friendly buckets and returns a datetime value - for each row that corresponds to the resulting bucket the row falls into. - Combine AUTO_BUCKET with STATS ... BY to create a date histogram. - You provide a target number of buckets, a start date, and an end date, - and it picks an appropriate bucket size to generate the target number of - buckets or fewer. If you don't have a start and end date, provide placeholder - values. Some examples: - - - \`| EVAL bucket=AUTO_BUCKET(@timestamp), 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z")\` - - \`| EVAL bucket=AUTO_BUCKET(my_date_field), 100, , )\` - - \`| EVAL bucket=AUTO_BUCKET(@timestamp), 100, NOW() - 15 minutes, NOW())\` - - ### DATE_EXTRACT - - \`DATE_EXTRACT\` parts of a date, like year, month, day, hour. The supported - field types are those provided by java.time.temporal.ChronoField. - Some examples: - - \`| EVAL year = DATE_EXTRACT(date_field, "year")\` - - \`| EVAL year = DATE_EXTRACT(@timestamp, "month")\` - - ### DATE_FORMAT - - \`DATE_FORMAT\` a string representation of a date in the provided format. - Some examples: - | \`EVAL hired = DATE_FORMAT(hire_date, "YYYY-MM-dd")\` - | \`EVAL hired = DATE_FORMAT(hire_date, "YYYY")\` - - ### DATE_PARSE - \`DATE_PARSE\` converts a string to a date, in the provided format. - - \`| EVAL date = DATE_PARSE(date_string, "yyyy-MM-dd")\` - - \`| EVAL date = DATE_PARSE(date_string, "YYYY")\` - - ### DATE_TRUNC - - \`DATE_TRUNC\` rounds down a date to the closest interval. Intervals - can be expressed using the timespan literal syntax. Use this together - with STATS ... BY to group data into time buckets with a fixed interval. - Some examples: - - - \`| EVAL year_hired = DATE_TRUNC(1 year, hire_date)\` - - \`| EVAL month_logged = DATE_TRUNC(1 month, @timestamp)\` - - \`| EVAL bucket = DATE_TRUNC(1 minute, @timestamp) | STATS avg_salary = AVG(salary) BY bucket\` - - \`| EVAL bucket = DATE_TRUNC(4 hours, @timestamp) | STATS max_salary MAX(salary) BY bucket\` - - ### NOW - - \`NOW\` returns current date and time. Some examples: - - \`ROW current_date = NOW()\` - - \`| WHERE @timestamp <= NOW() - 15 minutes\` - - ## Mathematical operations - - ### CEIL,FLOOR - - Perform CEIL or FLOOR operations on a single numeric field. - Some examples: - - \`| EVAL ceiled = CEIL(my.number)\` - - \`| EVAL floored = FLOOR(my.other.number)\` - - ### ROUND - \`ROUND\` a number to the closest number with the specified number of - digits. Defaults to 0 digits if no number of digits is provided. If the - specified number of digits is negative, rounds to the number of digits - left of the decimal point. Some examples: - - - \`| EVAL height_ft = ROUND(height * 3.281, 1)\` - - \`| EVAL percent = ROUND(0.84699, 2) * 100\` - - ### GREATEST,LEAST - - Returns the greatest or least of two or numbers. Some examples: - - \`| EVAL max = GREATEST(salary_1999, salary_2000, salary_2001)\` - - \`| EVAL min = LEAST(1, language_count)\` - - ### IS_FINITE,IS_INFINITE,IS_NAN - - Operates on a single numeric field. Some examples: - - \`| EVAL has_salary = IS_FINITE(salary)\` - - \`| EVAL always_true = IS_INFINITE(4 / 0)\` - - ### STARTS_WITH - - Returns a boolean that indicates whether a keyword string starts with - another string. Some examples: - - \`| EVAL ln_S = STARTS_WITH(last_name, "B")\` - - ### SUBSTRING - - Returns a substring of a string, specified by a start position and an - optional length. Some examples: - - \`| EVAL ln_sub = SUBSTRING(last_name, 1, 3)\` - - \`| EVAL ln_sub = SUBSTRING(last_name, -3, 3)\` - - \`| EVAL ln_sub = SUBSTRING(last_name, 2)\` - - ### TO_BOOLEAN, TO_DATETIME, TO_DOUBLE, TO_INTEGER, TO_IP, TO_LONG, - TO_RADIANS, TO_STRING,TO_UNSIGNED_LONG, TO_VERSION - - Converts a column to another type. Some examples: - - \`| EVAL version = TO_VERSION("1.2.3")\` - - \`| EVAL as_bool = TO_BOOLEAN(my_boolean_string)\` - - \`| EVAL percent = TO_DOUBLE(part) / TO_DOUBLE(total)\` - - ### TRIM - - Trims leading and trailing whitespace. Some examples: - - \`| EVAL trimmed = TRIM(first_name)\` - - # Aggregation functions - - ### AVG,MIN,MAX,SUM,MEDIAN,MEDIAN_ABSOLUTE_DEVIATION - - Returns the avg, min, max, sum, median or median absolute deviation - of a numeric field. Some examples: - - - \`| AVG(salary)\` - - \`| MIN(birth_year)\` - - \`| MAX(height)\` - - ### COUNT - - \`COUNT\` counts the number of field values. It requires a single - argument, and does not support wildcards. One single argument is - required. If you don't have a field name, use whatever field you have, - rather than displaying an invalid query. - - Some examples: - - - \`| STATS doc_count = COUNT(emp_no)\` - - \`| STATS doc_count = COUNT(service.name) BY service.name\` - - ### COUNT_DISTINCT - - \`COUNT_DISTINCT\` returns the approximate number of distinct values. - Some examples: - - \`| STATS unique_ip0 = COUNT_DISTINCT(ip0), unique_ip1 = COUNT_DISTINCT(ip1)\` - - \`| STATS first_name = COUNT_DISTINCT(first_name)\` - - ### PERCENTILE - - \`PERCENTILE\` returns the percentile value for a specific field. - Some examples: - - \`| STATS p50 = PERCENTILE(salary, 50)\` - - \`| STATS p99 = PERCENTILE(salary, 99)\` - - `); - - const source$ = streamIntoObservable( - await client.chat({ - connectorId, - messages: [ - { - '@timestamp': new Date().toISOString(), - message: { role: MessageRole.System, content: systemMessage }, - }, - ...messages.slice(1), - ], - signal, - stream: true, - }) - ).pipe(processOpenAiStream()); - - return new Observable((subscriber) => { - let cachedContent: string = ''; - - function includesDivider() { - const firstDividerIndex = cachedContent.indexOf('--'); - return firstDividerIndex !== -1 && cachedContent.lastIndexOf('--') !== firstDividerIndex; - } - - source$.subscribe({ - next: (message) => { - if (includesDivider()) { - subscriber.next(message); - } - cachedContent += message.choices[0].delta.content || ''; - }, - complete: () => { - if (!includesDivider()) { - subscriber.next({ - created: 0, - id: '', - model: '', - object: 'chat.completion.chunk', - choices: [ - { - delta: { - content: cachedContent, - }, - }, - ], - }); - } - subscriber.complete(); - }, - error: (error) => { - subscriber.error(error); - }, - }); - }); - } - ); -} diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-abs.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-abs.txt new file mode 100644 index 0000000000000..f27b33d6d1473 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-abs.txt @@ -0,0 +1,8 @@ +ABS + + +Returns the absolute value. +FROM employees +| KEEP first_name, last_name, height +| EVAL abs_height = ABS(0.0 - height) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-acos.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-acos.txt new file mode 100644 index 0000000000000..0274cdfac205a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-acos.txt @@ -0,0 +1,11 @@ +ACOS + +Syntax +Parameters +n +Numeric expression. If null, the function returns null. +DescriptionReturns the arccosine of n as an +angle, expressed in radians.Supported types +Example +ROW a=.9 +| EVAL acos=ACOS(a) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-asin.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-asin.txt new file mode 100644 index 0000000000000..6e36b7d776cd0 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-asin.txt @@ -0,0 +1,7 @@ +ASIN + + +Inverse sine trigonometric function. +ROW a=.9 +| EVAL asin=ASIN(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan.txt new file mode 100644 index 0000000000000..78a6c0b4a4f71 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan.txt @@ -0,0 +1,7 @@ +ATAN + + +Inverse tangent trigonometric function. +ROW a=12.9 +| EVAL atan=ATAN(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan2.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan2.txt new file mode 100644 index 0000000000000..c194e8ee9f0a7 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-atan2.txt @@ -0,0 +1,8 @@ +ATAN2 + + +The angle between the positive x-axis and the +ray from the origin to the point (x , y) in the Cartesian plane. +ROW y=12.9, x=.6 +| EVAL atan2=ATAN2(y, x) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-auto_bucket.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-auto_bucket.txt new file mode 100644 index 0000000000000..1263cccd6d519 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-auto_bucket.txt @@ -0,0 +1,27 @@ +AUTO_BUCKET + +Creates human-friendly buckets and returns a datetime value for each row that +corresponds to the resulting bucket the row falls into. Combine AUTO_BUCKET +with STATS ... BY to create a date histogram.You provide a target number of buckets, a start date, and an end date, and it +picks an appropriate bucket size to generate the target number of buckets or +fewer. For example, this asks for at most 20 buckets over a whole year, which +picks monthly buckets: +ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z") +| EVAL bucket=AUTO_BUCKET(date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") +The goal isn’t to provide exactly the target number of buckets, it’s to pick a +range that people are comfortable with that provides at most the target number of +buckets.If you ask for more buckets then AUTO_BUCKET can pick a smaller range. For example, +asking for at most 100 buckets in a year will get you week long buckets: +ROW date=TO_DATETIME("1985-07-09T00:00:00.000Z") +| EVAL bucket=AUTO_BUCKET(date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") +AUTO_BUCKET does not filter any rows. It only uses the provided time range to +pick a good bucket size. For rows with a date outside of the range, it returns a +datetime that corresponds to a bucket outside the range. Combine AUTO_BUCKET +with WHERE to filter rows.A more complete example might look like: +FROM employees +| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" +| EVAL bucket = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") +| STATS AVG(salary) BY bucket +| SORT bucket +AUTO_BUCKET does not create buckets that don’t match any documents. That’s +why the example above is missing 1985-03-01 and other dates. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-avg.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-avg.txt new file mode 100644 index 0000000000000..40667a199cc1f --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-avg.txt @@ -0,0 +1,6 @@ +AVG + +The average of a numeric field. +FROM employees +| STATS AVG(height) +The result is always a double not matter the input type. \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-case.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-case.txt new file mode 100644 index 0000000000000..35c9278ba2fd5 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-case.txt @@ -0,0 +1,22 @@ +CASE + +Syntax +CASE(condition1, value1[, ..., conditionN, valueN][, default_value]) +Parameters +conditionX +A condition. +valueX +The value that’s returned when the corresponding condition is the first to +evaluate to true. +default_value +The default value that’s is returned when no condition matches. +DescriptionAccepts pairs of conditions and values. The function returns the value that +belongs to the first condition that evaluates to true.If the number of arguments is odd, the last argument is the default value which +is returned when no condition matches. If the number of arguments is even, and +no condition matches, the function returns null.Example +FROM employees +| EVAL type = CASE( + languages <= 1, "monolingual", + languages <= 2, "bilingual", + "polyglot") +| KEEP emp_no, languages, type diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ceil.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ceil.txt new file mode 100644 index 0000000000000..685ec6690772d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ceil.txt @@ -0,0 +1,10 @@ +CEIL + + +Round a number up to the nearest integer. +ROW a=1.8 +| EVAL a=CEIL(a) +This is a noop for long (including unsigned) and integer. + For double this picks the the closest double value to the integer ala + Math.ceil. +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-coalesce.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-coalesce.txt new file mode 100644 index 0000000000000..568f045de8d64 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-coalesce.txt @@ -0,0 +1,5 @@ +COALESCE + +Returns the first non-null value. +ROW a=null, b="b" +| EVAL COALESCE(a, b) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-concat.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-concat.txt new file mode 100644 index 0000000000000..e92c691a7e520 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-concat.txt @@ -0,0 +1,6 @@ +CONCAT + +Concatenates two or more strings. +FROM employees +| KEEP first_name, last_name, height +| EVAL fullname = CONCAT(first_name, " ", last_name) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cos.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cos.txt new file mode 100644 index 0000000000000..db7252d5966c4 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cos.txt @@ -0,0 +1,7 @@ +COS + + +Cosine trigonometric function. Input expected in radians. +ROW a=1.8 +| EVAL cos=COS(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cosh.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cosh.txt new file mode 100644 index 0000000000000..22f221bbe18bb --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-cosh.txt @@ -0,0 +1,7 @@ +COSH + + +Cosine hyperbolic function. +ROW a=1.8 +| EVAL cosh=COSH(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count.txt new file mode 100644 index 0000000000000..555b80f2d532d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count.txt @@ -0,0 +1,10 @@ +COUNT + +Counts field values. +FROM employees +| STATS COUNT(height) +Can take any field type as input and the result is always a long not matter +the input type.To count the number of rows, use COUNT(*): +FROM employees +| STATS count = COUNT(*) BY languages +| SORT languages DESC diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count_distinct.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count_distinct.txt new file mode 100644 index 0000000000000..dbe422e6703b5 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-count_distinct.txt @@ -0,0 +1,28 @@ +COUNT_DISTINCT + +The approximate number of distinct values. +FROM hosts +| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1) +Can take any field type as input and the result is always a long not matter +the input type.Counts are approximateeditComputing exact counts requires loading values into a set and returning its +size. This doesn’t scale when working on high-cardinality sets and/or large +values as the required memory usage and the need to communicate those +per-shard sets between nodes would utilize too many resources of the cluster.This COUNT_DISTINCT function is based on the +HyperLogLog++ +algorithm, which counts based on the hashes of the values with some interesting +properties: +configurable precision, which decides on how to trade memory for accuracy, +excellent accuracy on low-cardinality sets, +fixed memory usage: no matter if there are tens or billions of unique values, +memory usage only depends on the configured precision. +For a precision threshold of c, the implementation that we are using requires +about c * 8 bytes.The following chart shows how the error varies before and after the threshold:For all 3 thresholds, counts have been accurate up to the configured threshold. +Although not guaranteed, this is likely to be the case. Accuracy in practice depends +on the dataset in question. In general, most datasets show consistently good +accuracy. Also note that even with a threshold as low as 100, the error +remains very low (1-6% as seen in the above graph) even when counting millions of items.The HyperLogLog++ algorithm depends on the leading zeros of hashed +values, the exact distributions of hashes in a dataset can affect the +accuracy of the cardinality.Precision is configurableeditThe COUNT_DISTINCT function takes an optional second parameter to configure the +precision discussed previously. +FROM hosts +| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_extract.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_extract.txt new file mode 100644 index 0000000000000..442c9e6c9d719 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_extract.txt @@ -0,0 +1,6 @@ +DATE_EXTRACT + +Extracts parts of a date, like year, month, day, hour. +The supported field types are those provided by java.time.temporal.ChronoField. +ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06") +| EVAL year = DATE_EXTRACT("year", date) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_format.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_format.txt new file mode 100644 index 0000000000000..b772cac90a0ee --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_format.txt @@ -0,0 +1,7 @@ +DATE_FORMAT + +Returns a string representation of a date in the provided format. If no format +is specified, the yyyy-MM-dd'T'HH:mm:ss.SSSZ format is used. +FROM employees +| KEEP first_name, last_name, hire_date +| EVAL hired = DATE_FORMAT("YYYY-MM-dd", hire_date) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_parse.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_parse.txt new file mode 100644 index 0000000000000..f03321af7eb93 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_parse.txt @@ -0,0 +1,16 @@ +DATE_PARSE + +Syntax +DATE_PARSE([format,] date_string) +Parameters +format +The date format. Refer to the +DateTimeFormatter +documentation for the syntax. If null, the function returns null. +date_string +Date expression as a string. If null or an empty string, the function returns +null. +DescriptionReturns a date by parsing the second argument using the format specified in the +first argument.Example +ROW date_string = "2022-05-06" +| EVAL date = DATE_PARSE("yyyy-MM-dd", date_string) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_trunc.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_trunc.txt new file mode 100644 index 0000000000000..773a2a9ce9513 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-date_trunc.txt @@ -0,0 +1,8 @@ +DATE_TRUNC + +Rounds down a date to the closest interval. Intervals can be expressed using the +timespan literal syntax. +FROM employees +| EVAL year_hired = DATE_TRUNC(1 year, hire_date) +| STATS COUNT(emp_no) BY year_hired +| SORT year_hired diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-dissect.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-dissect.txt new file mode 100644 index 0000000000000..c4f99757f0b57 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-dissect.txt @@ -0,0 +1,25 @@ +DISSECT + +Syntax +DISSECT input "pattern" [APPEND_SEPARATOR=""] +Parameters +input +The column that contains the string you want to structure. If the column has +multiple values, DISSECT will process each value. +pattern +A dissect pattern. + +A string used as the separator between appended values, when using the append modifier. +DescriptionDISSECT enables you to extract +structured data out of a string. DISSECT matches the string against a +delimiter-based pattern, and extracts the specified keys as columns.Refer to Process data with DISSECT for the syntax of dissect patterns.ExamplesThe following example parses a string that contains a timestamp, some text, and +an IP address: +ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" +| DISSECT a "%{date} - %{msg} - %{ip}" +| KEEP date, msg, ip +By default, DISSECT outputs keyword string columns. To convert to another +type, use Type conversion functions: +ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" +| DISSECT a "%{date} - %{msg} - %{ip}" +| KEEP date, msg, ip +| EVAL date = TO_DATETIME(date) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-drop.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-drop.txt new file mode 100644 index 0000000000000..88d5af4315d9f --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-drop.txt @@ -0,0 +1,14 @@ +DROP + +Syntax +DROP columns +Parameters +columns +A comma-separated list of columns to remove. Supports wildcards. +DescriptionThe DROP processing command removes one or more columns.Examples +FROM employees +| DROP height +Rather than specify each column by name, you can use wildcards to drop all +columns with a name that matches a pattern: +FROM employees +| DROP height* diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-e.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-e.txt new file mode 100644 index 0000000000000..7b56bebff6dce --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-e.txt @@ -0,0 +1,5 @@ +E + + +Euler’s number. +ROW E() diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-enrich.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-enrich.txt new file mode 100644 index 0000000000000..94b18473228cb --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-enrich.txt @@ -0,0 +1,47 @@ +ENRICH + +Syntax +ENRICH policy [ON match_field] [WITH [new_name1 = ]field1, [new_name2 = ]field2, ...] +Parameters +policy +The name of the enrich policy. You need to create +and execute the enrich policy first. +match_field +The match field. ENRICH uses its value to look for records in the enrich +index. If not specified, the match will be performed on the column with the same +name as the match_field defined in the enrich policy. +fieldX +The enrich fields from the enrich index that are added to the result as new +columns. If a column with the same name as the enrich field already exists, the +existing column will be replaced by the new column. If not specified, each of +the enrich fields defined in the policy is added +new_nameX +Enables you to change the name of the column that’s added for each of the enrich +fields. Defaults to the enrich field name. +DescriptionENRICH enables you to add data from existing indices as new columns using an +enrich policy. Refer to Data enrichment for information about setting up a +policy. +Before you can use ENRICH, you need to create +and execute an enrich policy. +ExamplesThe following example uses the languages_policy enrich policy to add a new +column for each enrich field defined in the policy. The match is performed using +the match_field defined in the enrich policy and +requires that the input table has a column with the same name (language_code +in this example). ENRICH will look for records in the +enrich index based on the match field value. +ROW language_code = "1" +| ENRICH languages_policy +To use a column with a different name than the match_field defined in the +policy as the match field, use ON : +ROW a = "1" +| ENRICH languages_policy ON a +By default, each of the enrich fields defined in the policy is added as a +column. To explicitly select the enrich fields that are added, use +WITH , , ...: +ROW a = "1" +| ENRICH languages_policy ON a WITH language_name +You can rename the columns that are added using WITH new_name=: +ROW a = "1" +| ENRICH languages_policy ON a WITH name = language_name +In case of name collisions, the newly created columns will override existing +columns. \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-eval.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-eval.txt new file mode 100644 index 0000000000000..23aa334bdbb71 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-eval.txt @@ -0,0 +1,23 @@ +EVAL + +Syntax +EVAL column1 = value1[, ..., columnN = valueN] +Parameters +columnX +The column name. +valueX +The value for the column. Can be a literal, an expression, or a +function. +DescriptionThe EVAL processing command enables you to append new columns with calculated +values. EVAL supports various functions for calculating values. Refer to +Functions for more information.Examples +FROM employees +| SORT emp_no +| KEEP first_name, last_name, height +| EVAL height_feet = height * 3.281, height_cm = height * 100 +If the specified column already exists, the existing column will be dropped, and +the new column will be appended to the table: +FROM employees +| SORT emp_no +| KEEP first_name, last_name, height +| EVAL height = height * 3.281 diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-floor.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-floor.txt new file mode 100644 index 0000000000000..b27997edd8bff --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-floor.txt @@ -0,0 +1,10 @@ +FLOOR + + +Round a number down to the nearest integer. +ROW a=1.8 +| EVAL a=FLOOR(a) +This is a noop for long (including unsigned) and integer. + For double this picks the the closest double value to the integer ala + Math.floor. +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-from.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-from.txt new file mode 100644 index 0000000000000..15a471cb44ead --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-from.txt @@ -0,0 +1,29 @@ +FROM + +Syntax +FROM index_pattern [METADATA fields] +Parameters +index_pattern +A list of indices, data streams or aliases. Supports wildcards and date math. +fields +A comma-separated list of metadata fields to retrieve. +DescriptionThe FROM source command returns a table with data from a data stream, index, +or alias. Each row in the resulting table represents a document. Each column +corresponds to a field, and can be accessed by the name of that field. +By default, an ES|QL query without an explicit LIMIT uses an implicit +limit of 500. This applies to FROM too. A FROM command without LIMIT: +FROM employees +is executed as: +FROM employees +| LIMIT 500 +Examples +FROM employees +You can use date math to refer to indices, aliases +and data streams. This can be useful for time series data, for example to access +today’s index: +FROM +Use comma-separated lists or wildcards to query multiple data streams, indices, +or aliases: +FROM employees-00001,other-employees-* +Use the METADATA directive to enable metadata fields: +FROM employees [METADATA _id] diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-greatest.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-greatest.txt new file mode 100644 index 0000000000000..119ecd48edf4c --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-greatest.txt @@ -0,0 +1,11 @@ +GREATEST + + +Returns the maximum value from many columns. This is similar to MV_MAX +except it’s intended to run on multiple columns at once. +ROW a = 10, b = 20 +| EVAL g = GREATEST(a, b) +When run on keyword or text fields, this’ll return the last string + in alphabetical order. When run on boolean columns this will return + true if any values are true. +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-grok.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-grok.txt new file mode 100644 index 0000000000000..57d3cb32b7270 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-grok.txt @@ -0,0 +1,28 @@ +GROK + +Syntax +GROK input "pattern" +Parameters +input +The column that contains the string you want to structure. If the column has +multiple values, GROK will process each value. +pattern +A grok pattern. +DescriptionGROK enables you to extract +structured data out of a string. GROK matches the string against patterns, +based on regular expressions, and extracts the specified patterns as columns.Refer to Process data with GROK for the syntax of grok patterns.ExamplesThe following example parses a string that contains a timestamp, an IP address, +an email address, and a number: +ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" +| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}" +| KEEP date, ip, email, num +By default, GROK outputs keyword string columns. int and float types can +be converted by appending :type to the semantics in the pattern. For example +{NUMBER:num:int}: +ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" +| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| KEEP date, ip, email, num +For other type conversions, use Type conversion functions: +ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" +| GROK a "%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}" +| KEEP date, ip, email, num +| EVAL date = TO_DATETIME(date) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-keep.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-keep.txt new file mode 100644 index 0000000000000..466a421cb247a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-keep.txt @@ -0,0 +1,19 @@ +KEEP + +Syntax +KEEP columns +Parameters +columns:: +A comma-separated list of columns to keep. Supports wildcards.DescriptionThe KEEP processing command enables you to specify what columns are returned +and the order in which they are returned.ExamplesThe columns are returned in the specified order: +FROM employees +| KEEP emp_no, first_name, last_name, height +Rather than specify each column by name, you can use wildcards to return all +columns with a name that matches a pattern: +FROM employees +| KEEP h* +The asterisk wildcard (*) by itself translates to all columns that do not +match the other arguments. This query will first return all columns with a name +that starts with h, followed by all other columns: +FROM employees +| KEEP h*, * diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-least.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-least.txt new file mode 100644 index 0000000000000..d598679f84f57 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-least.txt @@ -0,0 +1,11 @@ +LEAST + + +Returns the minimum value from many columns. This is similar to MV_MIN +except it’s intended to run on multiple columns at once. +ROW a = 10, b = 20 +| EVAL l = LEAST(a, b) +When run on keyword or text fields, this’ll return the first string + in alphabetical order. When run on boolean columns this will return + false if any values are false. +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-left.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-left.txt new file mode 100644 index 0000000000000..2edc0378aa312 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-left.txt @@ -0,0 +1,10 @@ +LEFT + + +Return the substring that extracts length chars from the string starting from the left. +FROM employees +| KEEP last_name +| EVAL left = LEFT(last_name, 3) +| SORT last_name ASC +| LIMIT 5 +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-length.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-length.txt new file mode 100644 index 0000000000000..29e22edcb4176 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-length.txt @@ -0,0 +1,6 @@ +LENGTH + +Returns the character length of a string. +FROM employees +| KEEP first_name, last_name, height +| EVAL fn_length = LENGTH(first_name) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limit.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limit.txt new file mode 100644 index 0000000000000..520b8e5547e9f --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limit.txt @@ -0,0 +1,24 @@ +LIMIT + +Syntax +LIMIT max_number_of_rows +Parameters +max_number_of_rows +The maximum number of rows to return. +DescriptionThe LIMIT processing command enables you to limit the number of rows that are +returned. +Queries do not return more than 10,000 rows, regardless of the LIMIT command’s +value.This limit only applies to the number of rows that are retrieved by the query. +Queries and aggregations run on the full data set.To overcome this limitation: +Reduce the result set size by modifying the query to only return relevant +data. Use WHERE to select a smaller subset of the data. +Shift any post-query processing to the query itself. You can use the ES|QL +STATS ... BY command to aggregate data in the query. +The default and maximum limits can be changed using these dynamic cluster +settings: +esql.query.result_truncation_default_size +esql.query.result_truncation_max_size +Example +FROM employees +| SORT emp_no ASC +| LIMIT 5 diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limitations.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limitations.txt new file mode 100644 index 0000000000000..c5ae151eab121 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-limitations.txt @@ -0,0 +1,149 @@ + +You are looking at preliminary documentation for a future release. +Not what you want? See the +current release documentation. +Elastic Docs +›Elasticsearch Guide [master] +›ES|QL +« ES|QL task management +ES|QL examples » +ES|QL limitationsedit +Result set size limitedit +By default, an ES|QL query returns up to 500 rows. You can increase the number +of rows up to 10,000 using the `LIMIT` command. +Queries do not return more than 10,000 rows, regardless of the `LIMIT` command’s +value. +This limit only applies to the number of rows that are retrieved by the query. +Queries and aggregations run on the full data set. +To overcome this limitation: +Reduce the result set size by modifying the query to only return relevant +data. Use `WHERE` to select a smaller subset of the data. +Shift any post-query processing to the query itself. You can use the ES|QL +`STATS ... BY` command to aggregate data in the query. +The default and maximum limits can be changed using these dynamic cluster +settings: +`esql.query.result_truncation_default_size` +`esql.query.result_truncation_max_size` +Field typesedit +Supported typesedit +ES|QL currently supports the following field types: +`alias` +`boolean` +`date` +`double` (`float`, `half_float`, `scaled_float` are represented as `double`) +`ip` +`keyword` family including `keyword`, `constant_keyword`, and `wildcard` +`int` (`short` and `byte` are represented as `int`) +`long` +`null` +`text` +`unsigned_long` +`version` +Spatial types +`geo_point` +`point` +Unsupported typesedit +ES|QL does not yet support the following field types: +TSDB metrics +`counter` +`position` +`aggregate_metric_double` +Spatial types +`geo_shape` +`shape` +Date/time +`date_nanos` +`date_range` +Other types +`binary` +`completion` +`dense_vector` +`double_range` +`flattened` +`float_range` +`histogram` +`integer_range` +`ip_range` +`long_range` +`nested` +`rank_feature` +`rank_features` +`search_as_you_type` +Querying a column with an unsupported type returns an error. If a column with an +unsupported type is not explicitly used in a query, it is returned with `null` +values, with the exception of nested fields. Nested fields are not returned at +all. +Full-text search is not supportededit +Because of the way ES|QL treats `text` values, +full-text search is not yet supported. Queries on `text` fields are like queries +on `keyword` fields: they are case-sensitive and need to match the full string. +For example, after indexing a field of type `text` with the value `Elasticsearch +query language`, the following `WHERE` clause does not match because the `LIKE` +operator is case-sensitive: +| WHERE field LIKE "elasticsearch query language" +The following `WHERE` clause does not match either, because the `LIKE` operator +tries to match the whole string: +| WHERE field LIKE "Elasticsearch" +As a workaround, use wildcards and regular expressions. For example: +| WHERE field RLIKE "[Ee]lasticsearch.*" +`text` fields behave like `keyword` fieldsedit +While ES|QL supports `text` fields, ES|QL does not treat these fields +like the Search API does. ES|QL queries do not query or aggregate the +analyzed string. Instead, an ES|QL query will try to get a `text` +field’s subfield of the keyword family type and query/aggregate +that. If it’s not possible to retrieve a `keyword` subfield, ES|QL will get the +string from a document’s `_source`. If the `_source` cannot be retrieved, for +example when using synthetic source, `null` is returned. +Note that ES|QL’s retrieval of `keyword` subfields may have unexpected +consequences. An ES|QL query on a `text` field is case-sensitive. Furthermore, +a subfield may have been mapped with a normalizer, which can +transform the original string. Or it may have been mapped with `ignore_above`, +which can truncate the string. None of these mapping operations are applied to +an ES|QL query, which may lead to false positives or negatives. +To avoid these issues, a best practice is to be explicit about the field that +you query, and query `keyword` sub-fields instead of `text` fields. +Time series data streams are not supportededit +ES|QL does not support querying time series data streams (TSDS). +Cross-cluster search is not supportededit +ES|QL does not support cross-cluster search. +Date math limitationsedit +Date math expressions work well when the leftmost expression is a datetime, for +example: +now() + 1 year - 2hour + ... +But using parentheses or putting the datetime to the right is not always supported yet. For example, the following expressions fail: +1year + 2hour + now() +now() + (1year + 2hour) +Date math does not allow subtracting two datetimes, for example: +now() - 2023-10-26 +Enrich limitationsedit +The ES|QL `ENRICH` command only supports enrich policies of type `match`. +Furthermore, `ENRICH` only supports enriching on a column of type `keyword`. +Dissect limitationsedit +The `DISSECT` command does not support reference keys. +Grok limitationsedit +The `GROK` command does not support configuring custom +patterns, or multiple patterns. The `GROK` command is not +subject to Grok watchdog settings. +Multivalue limitationsedit +ES|QL supports multivalued fields, but functions +return `null` when applied to a multivalued field, unless documented otherwise. +Work around this limitation by converting the field to single value with one of +the multivalue functions. +Timezone supportedit +ES|QL only supports the UTC timezone. +Kibana limitationsedit +The user interface to filter data is not enabled when Discover is in ES|QL +mode. To filter data, write a query that uses the `WHERE` command +instead. +In ES|QL mode, clicking a field in the field list in Discover does not show +quick statistics for that field. +Discover shows no more than 10,000 rows. This limit only applies to the number +of rows that are retrieved by the query and displayed in Discover. Queries and +aggregations run on the full data set. +Discover shows no more than 50 columns. If a query returns +more than 50 columns, Discover only shows the first 50. +Querying many many indices at once without any filters can cause an error in +kibana which looks like `[esql] > Unexpected error from Elasticsearch: The +content length (536885793) is bigger than the maximum allowed string +(536870888)`. The response from ES|QL is too long. Use `DROP` or +`KEEP` to limit the number of fields returned. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-log10.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-log10.txt new file mode 100644 index 0000000000000..dabc5bc7c05c7 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-log10.txt @@ -0,0 +1,8 @@ +LOG10 + + +Returns the log base 10. The input can be any numeric value, the return value +is always a double.Logs of negative numbers are NaN. Logs of infinites are infinite, as is the log of 0. +ROW d = 1000.0 +| EVAL s = LOG10(d) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ltrim.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ltrim.txt new file mode 100644 index 0000000000000..350b0b1e369ba --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-ltrim.txt @@ -0,0 +1,10 @@ +LTRIM + + +Removes leading whitespaces from strings. +ROW message = " some text ", color = " red " +| EVAL message = LTRIM(message) +| EVAL color = LTRIM(color) +| EVAL message = CONCAT("'", message, "'") +| EVAL color = CONCAT("'", color, "'") +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-max.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-max.txt new file mode 100644 index 0000000000000..22b23e89c7438 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-max.txt @@ -0,0 +1,5 @@ +MAX + +The maximum value of a numeric field. +FROM employees +| STATS MAX(languages) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median.txt new file mode 100644 index 0000000000000..df0aa45577b2e --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median.txt @@ -0,0 +1,9 @@ +MEDIAN + +The value that is greater than half of all values and less than half of +all values, also known as the 50% PERCENTILE. +FROM employees +| STATS MEDIAN(salary), PERCENTILE(salary, 50) +Like PERCENTILE, MEDIAN is usually approximate. +MEDIAN is also non-deterministic. +This means you can get slightly different results using the same data. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median_absolute_deviation.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median_absolute_deviation.txt new file mode 100644 index 0000000000000..bf7329c62fb3b --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-median_absolute_deviation.txt @@ -0,0 +1,14 @@ +MEDIAN_ABSOLUTE_DEVIATION + +The median absolute deviation, a measure of variability. It is a robust +statistic, meaning that it is useful for describing data that may have outliers, +or may not be normally distributed. For such data it can be more descriptive than +standard deviation.It is calculated as the median of each data point’s deviation from the median of +the entire sample. That is, for a random variable X, the median absolute deviation +is median(|median(X) - Xi|). +FROM employees +| STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary) +Like PERCENTILE, MEDIAN_ABSOLUTE_DEVIATION is + usually approximate. +MEDIAN_ABSOLUTE_DEVIATION is also non-deterministic. +This means you can get slightly different results using the same data. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-min.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-min.txt new file mode 100644 index 0000000000000..97e021e3153ab --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-min.txt @@ -0,0 +1,5 @@ +MIN + +The minimum value of a numeric field. +FROM employees +| STATS MIN(languages) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_avg.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_avg.txt new file mode 100644 index 0000000000000..ee144a77697df --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_avg.txt @@ -0,0 +1,7 @@ +MV_AVG + +Converts a multivalued field into a single valued field containing the average +of all of the values. For example: +ROW a=[3, 5, 1, 6] +| EVAL avg_a = MV_AVG(a) +The output type is always a double and the input type can be any number. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_concat.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_concat.txt new file mode 100644 index 0000000000000..1d579bee82a43 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_concat.txt @@ -0,0 +1,11 @@ +MV_CONCAT + + +Converts a multivalued string field into a single valued field containing the +concatenation of all values separated by a delimiter: +ROW a=["foo", "zoo", "bar"] +| EVAL j = MV_CONCAT(a, ", ") +If you want to concat non-string fields call TO_STRING on them first: +ROW a=[10, 9, 8] +| EVAL j = MV_CONCAT(TO_STRING(a), ", ") +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_count.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_count.txt new file mode 100644 index 0000000000000..e6149e02c9fd9 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_count.txt @@ -0,0 +1,8 @@ +MV_COUNT + + +Converts a multivalued field into a single valued field containing a count of the number +of values: +ROW a=["foo", "zoo", "bar"] +| EVAL count_a = MV_COUNT(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_dedupe.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_dedupe.txt new file mode 100644 index 0000000000000..e844e01bdc626 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_dedupe.txt @@ -0,0 +1,8 @@ +MV_DEDUPE + + +Removes duplicates from a multivalued field. For example: +ROW a=["foo", "foo", "bar", "foo"] +| EVAL dedupe_a = MV_DEDUPE(a) +Supported types: +MV_DEDUPE may, but won’t always, sort the values in the field. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_expand.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_expand.txt new file mode 100644 index 0000000000000..b8e757914b0b0 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_expand.txt @@ -0,0 +1,11 @@ +MV_EXPAND + +Syntax +MV_EXPAND column +Parameters +column +The multivalued column to expand. +DescriptionThe MV_EXPAND processing command expands multivalued columns into one row per +value, duplicating other columns.Example +ROW a=[1,2,3], b="b", j=["a","b"] +| MV_EXPAND a diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_max.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_max.txt new file mode 100644 index 0000000000000..91ff991dfcb42 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_max.txt @@ -0,0 +1,11 @@ +MV_MAX + + +Converts a multivalued field into a single valued field containing the maximum value. For example: +ROW a=[3, 5, 1] +| EVAL max_a = MV_MAX(a) +It can be used by any field type, including keyword fields. In that case picks the +last string, comparing their utf-8 representation byte by byte: +ROW a=["foo", "zoo", "bar"] +| EVAL max_a = MV_MAX(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_median.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_median.txt new file mode 100644 index 0000000000000..293e86b022c5d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_median.txt @@ -0,0 +1,10 @@ +MV_MEDIAN + +Converts a multivalued field into a single valued field containing the median value. For example: +ROW a=[3, 5, 1] +| EVAL median_a = MV_MEDIAN(a) +It can be used by any numeric field type and returns a value of the same type. If the +row has an even number of values for a column the result will be the average of the +middle two entries. If the field is not floating point then the average rounds down: +ROW a=[3, 7, 1, 6] +| EVAL median_a = MV_MEDIAN(a) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_min.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_min.txt new file mode 100644 index 0000000000000..577464825cd72 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_min.txt @@ -0,0 +1,11 @@ +MV_MIN + + +Converts a multivalued field into a single valued field containing the minimum value. For example: +ROW a=[2, 1] +| EVAL min_a = MV_MIN(a) +It can be used by any field type, including keyword fields. In that case picks the +first string, comparing their utf-8 representation byte by byte: +ROW a=["foo", "bar"] +| EVAL min_a = MV_MIN(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_sum.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_sum.txt new file mode 100644 index 0000000000000..34973728b2710 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-mv_sum.txt @@ -0,0 +1,7 @@ +MV_SUM + +Converts a multivalued field into a single valued field containing the sum +of all of the values. For example: +ROW a=[3, 5, 6] +| EVAL sum_a = MV_SUM(a) +The input type can be any number and the output type is the same as the input type. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-now.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-now.txt new file mode 100644 index 0000000000000..2145d6b17add7 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-now.txt @@ -0,0 +1,4 @@ +NOW + +Returns current date and time. +ROW current_date = NOW() diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-numeric-fields.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-numeric-fields.txt new file mode 100644 index 0000000000000..f08b10515aa55 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-numeric-fields.txt @@ -0,0 +1,12 @@ +Numeric fields + +auto_bucket can also operate on numeric fields like this: +FROM employees +| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" +| EVAL bs = AUTO_BUCKET(salary, 20, 25324, 74999) +| SORT hire_date, salary +| KEEP hire_date, salary, bs +Unlike the example above where you are intentionally filtering on a date range, +you rarely want to filter on a numeric range. So you have find the min and max +separately. We don’t yet have an easy way to do that automatically. Improvements +coming! \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-operators.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-operators.txt new file mode 100644 index 0000000000000..29204aad6a3f6 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-operators.txt @@ -0,0 +1,181 @@ +Binary operators +Binary operators + + +Equality +Equality + + +Supported types: + +Inequality != +Inequality != + + +Supported types: + +Less than < +Less than < + + +Supported types: + +Less than or equal to <= +Less than or equal to <= + + +Supported types: + +Greater than > +Greater than > + + +Supported types: + +Greater than or equal to >= +Greater than or equal to >= + + +Supported types: + +Add + +Add + + + +Supported types: + +Subtract - +Subtract - + + +Supported types: + +Multiply * +Multiply * + + +Supported types: + +Divide / +Divide / + + +Supported types: + +Modulus % +Modulus % + + +Supported types: + +Unary operators +Unary operators + +The only unary operators is negation (-): +Supported types: + +Logical operators +Logical operators + +The following logical operators are supported: +AND +OR +NOT + +IS NULL and IS NOT NULL predicates +IS NULL and IS NOT NULL predicates + +For NULL comparison, use the IS NULL and IS NOT NULL predicates: +FROM employees +| WHERE birth_date IS NULL +| KEEP first_name, last_name +| SORT first_name +| LIMIT 3 +FROM employees +| WHERE is_rehired IS NOT NULL +| STATS COUNT(emp_no) + +CIDR_MATCH +CIDR_MATCH + +Returns true if the provided IP is contained in one of the provided CIDR +blocks.CIDR_MATCH accepts two or more arguments. The first argument is the IP +address of type ip (both IPv4 and IPv6 are supported). Subsequent arguments +are the CIDR blocks to test the IP against. +FROM hosts +| WHERE CIDR_MATCH(ip, "127.0.0.2/32", "127.0.0.3/32") + +ENDS_WITH +ENDS_WITH + + +Returns a boolean that indicates whether a keyword string ends with another +string: +FROM employees +| KEEP last_name +| EVAL ln_E = ENDS_WITH(last_name, "d") +Supported types: + +IN +IN + +The IN operator allows testing whether a field or expression equals +an element in a list of literals, fields or expressions: +ROW a = 1, b = 4, c = 3 +| WHERE c-a IN (3, b / 2, a) + +IS_FINITE +IS_FINITE + +Returns a boolean that indicates whether its input is a finite number. +ROW d = 1.0 +| EVAL s = IS_FINITE(d/0) + +IS_INFINITE +IS_INFINITE + +Returns a boolean that indicates whether its input is infinite. +ROW d = 1.0 +| EVAL s = IS_INFINITE(d/0) + +IS_NAN +IS_NAN + +Returns a boolean that indicates whether its input is not a number. +ROW d = 1.0 +| EVAL s = IS_NAN(d) + +LIKE +LIKE + +Use LIKE to filter data based on string patterns using wildcards. LIKE +usually acts on a field placed on the left-hand side of the operator, but it can +also act on a constant (literal) expression. The right-hand side of the operator +represents the pattern.The following wildcard characters are supported: +* matches zero or more characters. +? matches one character. +FROM employees +| WHERE first_name LIKE "?b*" +| KEEP first_name, last_name + +RLIKE +RLIKE + +Use RLIKE to filter data based on string patterns using using +regular expressions. RLIKE usually acts on a field placed on +the left-hand side of the operator, but it can also act on a constant (literal) +expression. The right-hand side of the operator represents the pattern. +FROM employees +| WHERE first_name RLIKE ".leja.*" +| KEEP first_name, last_name + +STARTS_WITH +STARTS_WITH + + +Returns a boolean that indicates whether a keyword string starts with another +string: +FROM employees +| KEEP last_name +| EVAL ln_S = STARTS_WITH(last_name, "B") +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-overview.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-overview.txt new file mode 100644 index 0000000000000..ea9b24ba48464 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-overview.txt @@ -0,0 +1,45 @@ + +ES|QLedit +This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +The Elasticsearch Query Language (ES|QL) provides a powerful way to filter, transform, +and analyze data stored in Elasticsearch, and in the future in other runtimes. It is +designed to be easy to learn and use, by end users, SRE teams, application +developers, and administrators. +Users can author ES|QL queries to find specific events, perform statistical +analysis, and generate visualizations. It supports a wide range of commands and +functions that enable users to perform various data operations, such as +filtering, aggregation, time-series analysis, and more. +The Elasticsearch Query Language (ES|QL) makes use of "pipes" (|) to manipulate and +transform data in a step-by-step fashion. This approach allows users to compose +a series of operations, where the output of one operation becomes the input for +the next, enabling complex data transformations and analysis. +The ES|QL Compute Engineedit +ES|QL is more than a language: it represents a significant investment in new +compute capabilities within Elasticsearch. To achieve both the functional and performance +requirements for ES|QL, it was necessary to build an entirely new compute +architecture. ES|QL search, aggregation, and transformation functions are +directly executed within Elasticsearch itself. Query expressions are not +transpiled to Query DSL for execution. This approach allows ES|QL to be +extremely performant and versatile. +The new ES|QL execution engine was designed with performance in mind — it +operates on blocks at a time instead of per row, targets vectorization and cache +locality, and embraces specialization and multi-threading. It is a separate +component from the existing Elasticsearch aggregation framework with different +performance characteristics. +The ES|QL documentation is organized in these sections: +Getting started +A tutorial to help you get started with ES|QL. +Learning ES|QL +Reference documentation for the ES|QL syntax, +commands, and functions and +operators. Information about working with metadata +fields and multivalued fields. And guidance for +data processing with DISSECT and +GROK and data enrichment with ENRICH. +Using ES|QL +An overview of using the REST API, Using ES|QL in Kibana, +Using ES|QL in Elastic Security, and Task management. +Limitations +The current limitations of ES|QL. +Examples +A few examples of what you can do with ES|QL. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-percentile.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-percentile.txt new file mode 100644 index 0000000000000..432990415fc94 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-percentile.txt @@ -0,0 +1,32 @@ +PERCENTILE + +The value at which a certain percentage of observed values occur. For example, +the 95th percentile is the value which is greater than 95% of the observed values and +the 50th percentile is the MEDIAN. +FROM employees +| STATS p0 = PERCENTILE(salary, 0) + , p50 = PERCENTILE(salary, 50) + , p99 = PERCENTILE(salary, 99) +PERCENTILE is (usually) approximateeditThere are many different algorithms to calculate percentiles. The naive +implementation simply stores all the values in a sorted array. To find the 50th +percentile, you simply find the value that is at my_array[count(my_array) * 0.5].Clearly, the naive implementation does not scale — the sorted array grows +linearly with the number of values in your dataset. To calculate percentiles +across potentially billions of values in an Elasticsearch cluster, approximate +percentiles are calculated.The algorithm used by the percentile metric is called TDigest (introduced by +Ted Dunning in +Computing Accurate Quantiles using T-Digests).When using this metric, there are a few guidelines to keep in mind: +Accuracy is proportional to q(1-q). This means that extreme percentiles (e.g. 99%) +are more accurate than less extreme percentiles, such as the median +For small sets of values, percentiles are highly accurate (and potentially +100% accurate if the data is small enough). +As the quantity of values in a bucket grows, the algorithm begins to approximate +the percentiles. It is effectively trading accuracy for memory savings. The +exact level of inaccuracy is difficult to generalize, since it depends on your +data distribution and volume of data being aggregated +The following chart shows the relative error on a uniform distribution depending +on the number of collected values and the requested percentile:It shows how precision is better for extreme percentiles. The reason why error diminishes +for large number of values is that the law of large numbers makes the distribution of +values more and more uniform and the t-digest tree can do a better job at summarizing +it. It would not be the case on more skewed distributions. +PERCENTILE is also non-deterministic. +This means you can get slightly different results using the same data. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pi.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pi.txt new file mode 100644 index 0000000000000..169af57566903 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pi.txt @@ -0,0 +1,5 @@ +PI + + +The ratio of a circle’s circumference to its diameter. +ROW PI() diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pow.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pow.txt new file mode 100644 index 0000000000000..d74c58b7e0b6a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-pow.txt @@ -0,0 +1,13 @@ +POW + + +Returns the value of a base (first argument) raised to the power of an exponent (second argument). +Both arguments must be numeric. The output is always a double. Note that it is still possible to overflow +a double result here; in that case, null will be returned. +ROW base = 2.0, exponent = 2 +| EVAL result = POW(base, exponent) +Fractional exponentseditThe exponent can be a fraction, which is similar to performing a root. +For example, the exponent of 0.5 will give the square root of the base: +ROW base = 4, exponent = 0.5 +| EVAL s = POW(base, exponent) +Table of supported input and output typeseditFor clarity, the following table describes the output result type for all combinations of numeric input types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-processing-commands.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-processing-commands.txt new file mode 100644 index 0000000000000..125e55bfb2996 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-processing-commands.txt @@ -0,0 +1,17 @@ +Processing commands + +ES|QL processing commands change an input table by adding, removing, or changing +rows and columns. +ES|QL supports these processing commands: +DISSECT +DROP +ENRICH +EVAL +GROK +KEEP +LIMIT +MV_EXPAND +RENAME +SORT +STATS ... BY +WHERE diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rename.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rename.txt new file mode 100644 index 0000000000000..27ca77b68d0fd --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rename.txt @@ -0,0 +1,18 @@ +RENAME + +Syntax +RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] +Parameters +old_nameX +The name of a column you want to rename. +new_nameX +The new name of the column. +DescriptionThe RENAME processing command renames one or more columns. If a column with +the new name already exists, it will be replaced by the new column.Examples +FROM employees +| KEEP first_name, last_name, still_hired +| RENAME still_hired AS employed +Multiple columns can be renamed with a single RENAME command: +FROM employees +| KEEP first_name, last_name +| RENAME first_name AS fn, last_name AS ln diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-replace.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-replace.txt new file mode 100644 index 0000000000000..cf9102ab9471e --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-replace.txt @@ -0,0 +1,7 @@ +REPLACE + +The function substitutes in the string (1st argument) any match of the regular expression (2nd argument) with the replacement string (3rd argument).If any of the arguments are NULL, the result is NULL. +This example replaces an occurrence of the word "World" with the word "Universe": +ROW str = "Hello World" +| EVAL str = REPLACE(str, "World", "Universe") +| KEEP str diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-right.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-right.txt new file mode 100644 index 0000000000000..2cd30d32cd7c1 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-right.txt @@ -0,0 +1,10 @@ +RIGHT + + +Return the substring that extracts length chars from the string starting from the right. +FROM employees +| KEEP last_name +| EVAL right = RIGHT(last_name, 3) +| SORT last_name ASC +| LIMIT 5 +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-round.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-round.txt new file mode 100644 index 0000000000000..8ecb92ecc81be --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-round.txt @@ -0,0 +1,8 @@ +ROUND + +Rounds a number to the closest number with the specified number of digits. +Defaults to 0 digits if no number of digits is provided. If the specified number +of digits is negative, rounds to the number of digits left of the decimal point. +FROM employees +| KEEP first_name, last_name, height +| EVAL height_ft = ROUND(height * 3.281, 1) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-row.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-row.txt new file mode 100644 index 0000000000000..fa57a1bcc5660 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-row.txt @@ -0,0 +1,17 @@ +ROW + +Syntax +ROW column1 = value1[, ..., columnN = valueN] +Parameters +columnX +The column name. +valueX +The value for the column. Can be a literal, an expression, or a +function. +DescriptionThe ROW source command produces a row with one or more columns with values +that you specify. This can be useful for testing.Examples +ROW a = 1, b = "two", c = null +Use square brackets to create multi-value columns: +ROW a = [2, 1] +ROW supports the use of functions: +ROW a = ROUND(1.23, 0) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rtrim.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rtrim.txt new file mode 100644 index 0000000000000..63b6c428cadf2 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-rtrim.txt @@ -0,0 +1,10 @@ +RTRIM + + +Removes trailing whitespaces from strings. +ROW message = " some text ", color = " red " +| EVAL message = RTRIM(message) +| EVAL color = RTRIM(color) +| EVAL message = CONCAT("'", message, "'") +| EVAL color = CONCAT("'", color, "'") +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-show.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-show.txt new file mode 100644 index 0000000000000..cb7fab0300435 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-show.txt @@ -0,0 +1,15 @@ +SHOW + +Syntax +SHOW item +Parameters +item +Can be INFO or FUNCTIONS. +DescriptionThe SHOW source command returns information about the deployment and +its capabilities: +Use SHOW INFO to return the deployment’s version, build date and hash. +Use SHOW FUNCTIONS to return a list of all supported functions and a +synopsis of each function. +Examples +SHOW functions +| WHERE STARTS_WITH(name, "is_") diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sin.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sin.txt new file mode 100644 index 0000000000000..1c64c76791ea8 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sin.txt @@ -0,0 +1,7 @@ +SIN + + +Sine trigonometric function. Input expected in radians. +ROW a=1.8 +| EVAL sin=SIN(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sinh.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sinh.txt new file mode 100644 index 0000000000000..7a9d8a0bbcab7 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sinh.txt @@ -0,0 +1,7 @@ +SINH + + +Sine hyperbolic function. +ROW a=1.8 +| EVAL sinh=SINH(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sort.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sort.txt new file mode 100644 index 0000000000000..e1838ca567241 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sort.txt @@ -0,0 +1,29 @@ +SORT + +Syntax +SORT column1 [ASC/DESC][NULLS FIRST/NULLS LAST][, ..., columnN [ASC/DESC][NULLS FIRST/NULLS LAST]] +Parameters +columnX +The column to sort on. +DescriptionThe SORT processing command sorts a table on one or more columns.The default sort order is ascending. Use ASC or DESC to specify an explicit +sort order.Two rows with the same sort key are considered equal. You can provide additional +sort expressions to act as tie breakers.Sorting on multivalued columns uses the lowest value when sorting ascending and +the highest value when sorting descending.By default, null values are treated as being larger than any other value. With +an ascending sort order, null values are sorted last, and with a descending +sort order, null values are sorted first. You can change that by providing +NULLS FIRST or NULLS LAST.Examples +FROM employees +| KEEP first_name, last_name, height +| SORT height +Explicitly sorting in ascending order with ASC: +FROM employees +| KEEP first_name, last_name, height +| SORT height DESC +Providing additional sort expressions to act as tie breakers: +FROM employees +| KEEP first_name, last_name, height +| SORT height DESC, first_name ASC +Sorting null values first using NULLS FIRST: +FROM employees +| KEEP first_name, last_name, height +| SORT first_name ASC NULLS FIRST diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-source-commands.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-source-commands.txt new file mode 100644 index 0000000000000..8065c0e0bbfb0 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-source-commands.txt @@ -0,0 +1,7 @@ +Source commands + +An ES|QL source command produces a table, typically with data from Elasticsearch. An ES|QL query must start with a source command. +ES|QL supports these source commands: +FROM +ROW +SHOW diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-split.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-split.txt new file mode 100644 index 0000000000000..8dbd7e74731e1 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-split.txt @@ -0,0 +1,7 @@ +SPLIT + +Split a single valued string into multiple strings. For example: +ROW words="foo;bar;baz;qux;quux;corge" +| EVAL word = SPLIT(words, ";") +Which splits "foo;bar;baz;qux;quux;corge" on ; and returns an array: +Only single byte delimiters are currently supported. diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sqrt.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sqrt.txt new file mode 100644 index 0000000000000..6b5c8a56a0d43 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sqrt.txt @@ -0,0 +1,8 @@ +SQRT + + +Returns the square root of a number. The input can be any numeric value, the return value +is always a double.Square roots of negative numbers are NaN. Square roots of infinites are infinite. +ROW d = 100.0 +| EVAL s = SQRT(d) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-stats.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-stats.txt new file mode 100644 index 0000000000000..2a1edb4966d15 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-stats.txt @@ -0,0 +1,50 @@ +STATS ... BY + +Syntax +STATS [column1 =] expression1[, ..., [columnN =] expressionN] [BY grouping_column1[, ..., grouping_columnN]] +Parameters +columnX +The name by which the aggregated value is returned. If omitted, the name is +equal to the corresponding expression (expressionX). +expressionX +An expression that computes an aggregated value. +grouping_columnX +The column containing the values to group by. +DescriptionThe STATS ... BY processing command groups rows according to a common value +and calculate one or more aggregated values over the grouped rows. If BY is +omitted, the output table contains exactly one row with the aggregations applied +over the entire dataset.The following aggregation functions are supported: +AVG +COUNT +COUNT_DISTINCT +MAX +MEDIAN +MEDIAN_ABSOLUTE_DEVIATION +MIN +PERCENTILE +SUM +STATS without any groups is much much faster than adding a group. +Grouping on a single column is currently much more optimized than grouping + on many columns. In some tests we have seen grouping on a single keyword + column to be five times faster than grouping on two keyword columns. Do + not try to work around this by combining the two columns together with + something like CONCAT and then grouping - that is not going to be + faster. +ExamplesCalculating a statistic and grouping by the values of another column: +FROM employees +| STATS count = COUNT(emp_no) BY languages +| SORT languages +Omitting BY returns one row with the aggregations applied over the entire +dataset: +FROM employees +| STATS avg_lang = AVG(languages) +It’s possible to calculate multiple values: +FROM employees +| STATS avg_lang = AVG(languages), max_lang = MAX(languages) +It’s also possible to group by multiple values (only supported for long and +keyword family fields): +FROM employees +| EVAL hired = DATE_FORMAT("YYYY", hire_date) +| STATS avg_salary = AVG(salary) BY hired, languages.long +| EVAL avg_salary = ROUND(avg_salary) +| SORT hired, languages.long diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-substring.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-substring.txt new file mode 100644 index 0000000000000..a113ad838cc8a --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-substring.txt @@ -0,0 +1,17 @@ +SUBSTRING + +Returns a substring of a string, specified by a start position and an optional +length. This example returns the first three characters of every last name: +FROM employees +| KEEP last_name +| EVAL ln_sub = SUBSTRING(last_name, 1, 3) +A negative start position is interpreted as being relative to the end of the +string. This example returns the last three characters of of every last name: +FROM employees +| KEEP last_name +| EVAL ln_sub = SUBSTRING(last_name, -3, 3) +If length is omitted, substring returns the remainder of the string. This +example returns all characters except for the first: +FROM employees +| KEEP last_name +| EVAL ln_sub = SUBSTRING(last_name, 2) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sum.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sum.txt new file mode 100644 index 0000000000000..bc24875fce219 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-sum.txt @@ -0,0 +1,5 @@ +SUM + +The sum of a numeric field. +FROM employees +| STATS SUM(languages) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-syntax.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-syntax.txt new file mode 100644 index 0000000000000..8656c63f308f3 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-syntax.txt @@ -0,0 +1,94 @@ + +ES|QL syntax referenceedit +Basic syntaxedit +An ES|QL query is composed of a source command followed +by an optional series of processing commands, +separated by a pipe character: `|`. For example: +source-command +| processing-command1 +| processing-command2 +The result of a query is the table produced by the final processing command. +For an overview of all supported commands, functions, and operators, refer to Commands and Functions and operators. +For readability, this documentation puts each processing command on a new +line. However, you can write an ES|QL query as a single line. The following +query is identical to the previous one: +source-command | processing-command1 | processing-command2 +Identifiersedit +The identifiers can be used as they are and don’t require quoting, unless +containing special characters, in which case they must be quoted with +backticks (```). What "special characters" means is command dependent. +For FROM, KEEP, DROP, +RENAME, MV_EXPAND and +ENRICH these are: `=`, ```, `,`, ` ` (space), `|` , +`[`, `]`, `\t` (TAB), `\r` (CR), `\n` (LF); one `/` is allowed unquoted, but +a sequence of two or more require quoting. +The rest of the commands - those allowing for identifiers be used in +expressions - require quoting if the identifier contains characters other than +letters, numbers and `_` and doesn’t start with a letter, `_` or `@`. +For instance: +// Retain just one field +FROM index +| KEEP 1.field +is legal. However, if same field is to be used with an EVAL, +it’d have to be quoted: +// Copy one field +FROM index +| EVAL my_field = `1.field` +Literalsedit +ES|QL currently supports numeric and string literals. +String literalsedit +A string literal is a sequence of unicode characters delimited by double +quotes (`"`). +// Filter by a string value +FROM index +| WHERE first_name == "Georgi" +If the literal string itself contains quotes, these need to be escaped (`\\"`). +ES|QL also supports the triple-quotes (`"""`) delimiter, for convenience: +ROW name = """Indiana "Indy" Jones""" +The special characters CR, LF and TAB can be provided with the usual escaping: +`\r`, `\n`, `\t`, respectively. +Numerical literalsedit +The numeric literals are accepted in decimal and in the scientific notation +with the exponent marker (`e` or `E`), starting either with a digit, decimal +point `.` or the negative sign `-`: +1969 -- integer notation +3.14 -- decimal notation +.1234 -- decimal notation starting with decimal point +4E5 -- scientific notation (with exponent marker) +1.2e-3 -- scientific notation with decimal point +-.1e2 -- scientific notation starting with the negative sign +The integer numeric literals are implicitly converted to the `integer`, `long` +or the `double` type, whichever can first accommodate the literal’s value. +The floating point literals are implicitly converted the `double` type. +To obtain constant values of different types, use one of the numeric +conversion functions. +Commentsedit +ES|QL uses C++ style comments: +double slash `//` for single line comments +`/*` and `*/` for block comments +// Query the employees index +FROM employees +| WHERE height > 2 +FROM /* Query the employees index */ employees +| WHERE height > 2 +FROM employees +/* Query the + * employees + * index */ +| WHERE height > 2 +Timespan literalsedit +Datetime intervals and timespans can be expressed using timespan literals. +Timespan literals are a combination of a number and a qualifier. These +qualifiers are supported: +`millisecond`/`milliseconds` +`second`/`seconds` +`minute`/`minutes` +`hour`/`hours` +`day`/`days` +`week`/`weeks` +`month`/`months` +`year`/`years` +Timespan literals are not whitespace sensitive. These expressions are all valid: +`1day` +`1 day` +`1 day` diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tan.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tan.txt new file mode 100644 index 0000000000000..63b752c8c6248 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tan.txt @@ -0,0 +1,7 @@ +TAN + + +Tangent trigonometric function. Input expected in radians. +ROW a=1.8 +| EVAL tan=TAN(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tanh.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tanh.txt new file mode 100644 index 0000000000000..83493cec93984 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tanh.txt @@ -0,0 +1,7 @@ +TANH + + +Tangent hyperbolic function. +ROW a=1.8 +| EVAL tanh=TANH(a) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tau.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tau.txt new file mode 100644 index 0000000000000..b60a1bde563d2 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-tau.txt @@ -0,0 +1,5 @@ +TAU + + +The ratio of a circle’s circumference to its radius. +ROW TAU() diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_boolean.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_boolean.txt new file mode 100644 index 0000000000000..a6259f436f076 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_boolean.txt @@ -0,0 +1,10 @@ +TO_BOOLEAN + +Converts an input value to a boolean value.The input can be a single- or multi-valued field or an expression. The input +type must be of a string or numeric type.A string value of "true" will be case-insensitive converted to the Boolean +true. For anything else, including the empty string, the function will +return false. For example: +ROW str = ["true", "TRuE", "false", "", "yes", "1"] +| EVAL bool = TO_BOOLEAN(str) +The numerical value of 0 will be converted to false, anything else will be +converted to true.Alias: TO_BOOL \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_cartesianpoint.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_cartesianpoint.txt new file mode 100644 index 0000000000000..e50645f73fb45 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_cartesianpoint.txt @@ -0,0 +1,8 @@ +TO_CARTESIANPOINT + +Converts an input value to a point value.The input can be a single- or multi-valued field or an expression. +The input type must be a string or a cartesian point.A string will only be successfully converted if it respects the +WKT Point format: +row wkt = ["POINT(4297.11 -1475.53)", "POINT(7580.93 2272.77)"] +| mv_expand wkt +| eval pt = to_cartesianpoint(wkt) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_datetime.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_datetime.txt new file mode 100644 index 0000000000000..d985309c75c62 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_datetime.txt @@ -0,0 +1,17 @@ +TO_DATETIME + +Converts an input value to a date value.The input can be a single- or multi-valued field or an expression. The input +type must be of a string or numeric type.A string will only be successfully converted if it’s respecting the format +yyyy-MM-dd'T'HH:mm:ss.SSS'Z' (to convert dates in other formats, use DATE_PARSE). For example: +ROW string = ["1953-09-02T00:00:00.000Z", "1964-06-02T00:00:00.000Z", "1964-06-02 00:00:00"] +| EVAL datetime = TO_DATETIME(string) +Note that in this example, the last value in the source multi-valued +field has not been converted. The reason being that if the date format is not +respected, the conversion will result in a null value. When this happens a +Warning header is added to the response. The header will provide information +on the source of the failure:"Line 1:112: evaluation of [TO_DATETIME(string)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.IllegalArgumentException: failed to parse date field [1964-06-02 00:00:00] with format [yyyy-MM-dd'T'HH:mm:ss.SSS'Z']"If the input parameter is of a numeric type, its value will be interpreted as +milliseconds since the Unix epoch. +For example: +ROW int = [0, 1] +| EVAL dt = TO_DATETIME(int) +Alias: TO_DT \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_degrees.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_degrees.txt new file mode 100644 index 0000000000000..e6df993088477 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_degrees.txt @@ -0,0 +1,7 @@ +TO_DEGREES + +Converts a number in radians +to degrees.The input can be a single- or multi-valued field or an expression. The input +type must be of a numeric type and result is always double.Example: +ROW rad = [1.57, 3.14, 4.71] +| EVAL deg = TO_DEGREES(rad) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_double.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_double.txt new file mode 100644 index 0000000000000..2e1dd75654abe --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_double.txt @@ -0,0 +1,12 @@ +TO_DOUBLE + +Converts an input value to a double value.The input can be a single- or multi-valued field or an expression. The input +type must be of a boolean, date, string or numeric type.Example: +ROW str1 = "5.20128E11", str2 = "foo" +| EVAL dbl = TO_DOUBLE("520128000000"), dbl1 = TO_DOUBLE(str1), dbl2 = TO_DOUBLE(str2) +Note that in this example, the last conversion of the string isn’t +possible. When this happens, the result is a null value. In this case a +Warning header is added to the response. The header will provide information +on the source of the failure:"Line 1:115: evaluation of [TO_DOUBLE(str2)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: For input string: \"foo\""If the input parameter is of a date type, its value will be interpreted as +milliseconds since the Unix epoch, +converted to double.Boolean true will be converted to double 1.0, false to 0.0.Alias: TO_DBL \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_geopoint.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_geopoint.txt new file mode 100644 index 0000000000000..4aabac29f8c6d --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_geopoint.txt @@ -0,0 +1,7 @@ +TO_GEOPOINT + +Converts an input value to a geo_point value.The input can be a single- or multi-valued field or an expression. +The input type must be a string or a geo_point.A string will only be successfully converted if it respects the +WKT Point format: +row wkt = "POINT(42.97109630194 14.7552534413725)" +| eval pt = to_geopoint(wkt) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_integer.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_integer.txt new file mode 100644 index 0000000000000..eab5669791922 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_integer.txt @@ -0,0 +1,12 @@ +TO_INTEGER + +Converts an input value to an integer value.The input can be a single- or multi-valued field or an expression. The input +type must be of a boolean, date, string or numeric type.Example: +ROW long = [5013792, 2147483647, 501379200000] +| EVAL int = TO_INTEGER(long) +Note that in this example, the last value of the multi-valued field cannot +be converted as an integer. When this happens, the result is a null value. +In this case a Warning header is added to the response. The header will +provide information on the source of the failure:"Line 1:61: evaluation of [TO_INTEGER(long)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"org.elasticsearch.xpack.ql.QlIllegalArgumentException: [501379200000] out of [integer] range"If the input parameter is of a date type, its value will be interpreted as +milliseconds since the Unix epoch, +converted to integer.Boolean true will be converted to integer 1, false to 0.Alias: TO_INT \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_ip.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_ip.txt new file mode 100644 index 0000000000000..0e8f2fd1274a1 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_ip.txt @@ -0,0 +1,10 @@ +TO_IP + +Converts an input string to an IP value.The input can be a single- or multi-valued field or an expression.Example: +ROW str1 = "1.1.1.1", str2 = "foo" +| EVAL ip1 = TO_IP(str1), ip2 = TO_IP(str2) +| WHERE CIDR_MATCH(ip1, "1.0.0.0/8") +Note that in the example above the last conversion of the string isn’t +possible. When this happens, the result is a null value. In this case a +Warning header is added to the response. The header will provide information +on the source of the failure:"Line 1:68: evaluation of [TO_IP(str2)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.IllegalArgumentException: 'foo' is not an IP string literal." \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_long.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_long.txt new file mode 100644 index 0000000000000..37a73209c7043 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_long.txt @@ -0,0 +1,12 @@ +TO_LONG + +Converts an input value to a long value.The input can be a single- or multi-valued field or an expression. The input +type must be of a boolean, date, string or numeric type.Example: +ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo" +| EVAL long1 = TO_LONG(str1), long2 = TO_LONG(str2), long3 = TO_LONG(str3) +Note that in this example, the last conversion of the string isn’t +possible. When this happens, the result is a null value. In this case a +Warning header is added to the response. The header will provide information +on the source of the failure:"Line 1:113: evaluation of [TO_LONG(str3)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: For input string: \"foo\""If the input parameter is of a date type, its value will be interpreted as +milliseconds since the Unix epoch, +converted to long.Boolean true will be converted to long 1, false to 0. \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_radians.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_radians.txt new file mode 100644 index 0000000000000..d0118e1fa0271 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_radians.txt @@ -0,0 +1,7 @@ +TO_RADIANS + +Converts a number in degrees to +radians.The input can be a single- or multi-valued field or an expression. The input +type must be of a numeric type and result is always double.Example: +ROW deg = [90.0, 180.0, 270.0] +| EVAL rad = TO_RADIANS(deg) diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_string.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_string.txt new file mode 100644 index 0000000000000..4cb34acc5e054 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_string.txt @@ -0,0 +1,10 @@ +TO_STRING + + +Converts a field into a string. For example: +ROW a=10 +| EVAL j = TO_STRING(a) +It also works fine on multivalued fields: +ROW a=[10, 9, 8] +| EVAL j = TO_STRING(a) +Alias: TO_STRSupported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_unsigned_long.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_unsigned_long.txt new file mode 100644 index 0000000000000..ae42a582079b3 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_unsigned_long.txt @@ -0,0 +1,12 @@ +TO_UNSIGNED_LONG + +Converts an input value to an unsigned long value.The input can be a single- or multi-valued field or an expression. The input +type must be of a boolean, date, string or numeric type.Example: +ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo" +| EVAL long1 = TO_UNSIGNED_LONG(str1), long2 = TO_ULONG(str2), long3 = TO_UL(str3) +Note that in this example, the last conversion of the string isn’t +possible. When this happens, the result is a null value. In this case a +Warning header is added to the response. The header will provide information +on the source of the failure:"Line 1:133: evaluation of [TO_UL(str3)] failed, treating result as null. Only first 20 failures recorded."A following header will contain the failure reason and the offending value:"java.lang.NumberFormatException: Character f is neither a decimal digit number, decimal point, nor \"e\" notation exponential mark."If the input parameter is of a date type, its value will be interpreted as +milliseconds since the Unix epoch, +converted to unsigned long.Boolean true will be converted to unsigned long 1, false to 0.Alias: TO_ULONG, TO_UL \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_version.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_version.txt new file mode 100644 index 0000000000000..84ec4214c6279 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-to_version.txt @@ -0,0 +1,6 @@ +TO_VERSION + + +Converts an input string to a version value. For example: +ROW v = TO_VERSION("1.2.3") +The input can be a single- or multi-valued field or an expression.Alias: TO_VERSupported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-trim.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-trim.txt new file mode 100644 index 0000000000000..5a7e51b0aa2ff --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-trim.txt @@ -0,0 +1,8 @@ +TRIM + + +Removes leading and trailing whitespaces from strings. +ROW message = " some text ", color = " red " +| EVAL message = TRIM(message) +| EVAL color = TRIM(color) +Supported types: diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-where.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-where.txt new file mode 100644 index 0000000000000..f03e351e933f8 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/docs/esql-where.txt @@ -0,0 +1,51 @@ +WHERE + +Syntax +WHERE expression +Parameters +expression +A boolean expression. +DescriptionThe WHERE processing command produces a table that contains all the rows from +the input table for which the provided condition evaluates to true.Examples +FROM employees +| KEEP first_name, last_name, still_hired +| WHERE still_hired == true +Which, if still_hired is a boolean field, can be simplified to: +FROM employees +| KEEP first_name, last_name, still_hired +| WHERE still_hired +WHERE supports various functions. For example the +LENGTH function: +FROM employees +| KEEP first_name, last_name, height +| WHERE LENGTH(first_name) < 4 +For a complete list of all functions, refer to Functions and operators.For NULL comparison, use the IS NULL and IS NOT NULL predicates: +FROM employees +| WHERE birth_date IS NULL +| KEEP first_name, last_name +| SORT first_name +| LIMIT 3 +FROM employees +| WHERE is_rehired IS NOT NULL +| STATS COUNT(emp_no) +Use LIKE to filter data based on string patterns using wildcards. LIKE +usually acts on a field placed on the left-hand side of the operator, but it can +also act on a constant (literal) expression. The right-hand side of the operator +represents the pattern.The following wildcard characters are supported: +* matches zero or more characters. +? matches one character. +FROM employees +| WHERE first_name LIKE "?b*" +| KEEP first_name, last_name +Use RLIKE to filter data based on string patterns using using +regular expressions. RLIKE usually acts on a field placed on +the left-hand side of the operator, but it can also act on a constant (literal) +expression. The right-hand side of the operator represents the pattern. +FROM employees +| WHERE first_name RLIKE ".leja.*" +| KEEP first_name, last_name +The IN operator allows testing whether a field or expression equals +an element in a list of literals, fields or expressions: +ROW a = 1, b = 4, c = 3 +| WHERE c-a IN (3, b / 2, a) +For a complete list of all operators, refer to Operators. \ No newline at end of file diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/index.ts b/x-pack/plugins/observability_ai_assistant/server/functions/esql/index.ts new file mode 100644 index 0000000000000..dcc26adc52e43 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/index.ts @@ -0,0 +1,305 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import Fs from 'fs'; +import { keyBy, mapValues, once, pick } from 'lodash'; +import pLimit from 'p-limit'; +import Path from 'path'; +import { lastValueFrom, Observable } from 'rxjs'; +import { promisify } from 'util'; +import type { FunctionRegistrationParameters } from '..'; +import { + CreateChatCompletionResponseChunk, + FunctionVisibility, + MessageRole, +} from '../../../common/types'; +import { concatenateOpenAiChunks } from '../../../common/utils/concatenate_openai_chunks'; +import { processOpenAiStream } from '../../../common/utils/process_openai_stream'; +import { streamIntoObservable } from '../../service/util/stream_into_observable'; + +const readFile = promisify(Fs.readFile); +const readdir = promisify(Fs.readdir); + +const loadSystemMessage = once(async () => { + const data = await readFile(Path.join(__dirname, './system_message.txt')); + return data.toString('utf-8'); +}); + +const loadEsqlDocs = once(async () => { + const dir = Path.join(__dirname, './docs'); + const files = (await readdir(dir)).filter((file) => Path.extname(file) === '.txt'); + + if (!files.length) { + return {}; + } + + const limiter = pLimit(10); + return keyBy( + await Promise.all( + files.map((file) => + limiter(async () => { + const data = (await readFile(Path.join(dir, file))).toString('utf-8'); + const filename = Path.basename(file, '.txt'); + + const keyword = filename + .replace('esql-', '') + .replace('agg-', '') + .replaceAll('-', '_') + .toUpperCase(); + + return { + keyword: keyword === 'STATS_BY' ? 'STATS' : keyword, + data, + }; + }) + ) + ), + 'keyword' + ); +}); + +export function registerEsqlFunction({ + client, + registerFunction, + resources, +}: FunctionRegistrationParameters) { + registerFunction( + { + name: 'execute_query', + contexts: ['core'], + visibility: FunctionVisibility.User, + description: 'Execute an ES|QL query', + parameters: { + type: 'object', + additionalProperties: false, + properties: { + query: { + type: 'string', + }, + }, + required: ['query'], + } as const, + }, + async ({ arguments: { query } }) => { + const response = await ( + await resources.context.core + ).elasticsearch.client.asCurrentUser.transport.request({ + method: 'POST', + path: '_query', + body: { + query, + }, + }); + + return { content: response }; + } + ); + + registerFunction( + { + name: 'esql', + contexts: ['core'], + description: `This function answers ES|QL related questions including query generation and syntax/command questions.`, + visibility: FunctionVisibility.System, + parameters: { + type: 'object', + additionalProperties: false, + properties: { + switch: { + type: 'boolean', + }, + }, + } as const, + }, + async ({ messages, connectorId }, signal) => { + const [systemMessage, esqlDocs] = await Promise.all([loadSystemMessage(), loadEsqlDocs()]); + + const withEsqlSystemMessage = (message?: string) => [ + { + '@timestamp': new Date().toISOString(), + message: { role: MessageRole.System, content: `${systemMessage}\n${message ?? ''}` }, + }, + ...messages.slice(1), + ]; + + const source$ = streamIntoObservable( + await client.chat({ + connectorId, + messages: withEsqlSystemMessage(), + signal, + stream: true, + functions: [ + { + name: 'get_esql_info', + description: + 'Use this function to get more information about syntax, commands and examples. Take a deep breath and reason about what commands and functions you expect to use. Do you need to group data? Request `STATS`. Extract data? Request `DISSECT` AND `GROK`. Convert a column based on a set of conditionals? Request `EVAL` and `CASE`.', + parameters: { + type: 'object', + properties: { + commands: { + type: 'array', + items: { + type: 'string', + }, + description: 'A list of processing or source commands', + }, + functions: { + type: 'array', + items: { + type: 'string', + }, + description: 'A list of functions.', + }, + }, + required: ['commands', 'functions'], + }, + }, + ], + functionCall: 'get_esql_info', + }) + ).pipe(processOpenAiStream(), concatenateOpenAiChunks()); + + const response = await lastValueFrom(source$); + + const args = JSON.parse(response.message.function_call.arguments) as { + commands: string[]; + functions: string[]; + }; + + const keywords = args.commands.concat(args.functions).concat('SYNTAX').concat('OVERVIEW'); + + const messagesToInclude = mapValues(pick(esqlDocs, keywords), ({ data }) => data); + + const esqlResponse$: Observable = streamIntoObservable( + await client.chat({ + messages: [ + ...withEsqlSystemMessage( + `Format every ES|QL query as Markdown: + \`\`\`esql + + \`\`\` + + Prefer to use commands and functions for which you have documentation. + + Pay special attention to these instructions. Not following these instructions to the tee + will lead to excruciating consequences for the user. + + #1 + Directive: ONLY use aggregation functions in STATS commands, and use ONLY aggregation functions in stats commands, NOT in SORT or EVAL. + Rationale: Only aggregation functions are supported in STATS commands, and aggregation functions are only supported in STATS commands. + Action: Create new columns using EVAL first and then aggregate over them in STATS commands. Do not use aggregation functions anywhere else, such as SORT or EVAL. + Example: EVAL is_failure_as_number = CASE(event.outcome == "failure", 1, 0) | STATS total_failures = SUM(is_failure_as_number) BY my_grouping_name + + #2 + Directive: Use the \`=\` operator to create new columns in STATS and EVAL, DO NOT UNDER ANY CIRCUMSTANCES use \`AS\`. + Rationale: The \`=\` operator is used for aliasing. Using \`AS\` leads to syntax errors. + Action: When creating a new column in a command, use the = operator. + Example: STATS total_requests = COUNT(*) + + #3 + Directive: Use placeholder values for information that is missing. + Rationale: It is critical to generate a syntactically valid query. + Action: When you don't know the arguments to a function because information is missing, use placeholder values. + Example: "Here's an ES|QL query that generates a timeseries of 50 buckets calculating the average duration. I've used + "2023-01-23T12:15:00.000Z" and "2023-01-23T12:30:00.000Z" as placeholder values. Replace them with the start + and end date that work for your use case." + + #4 + Directive: Wrap string literals in double quotes. + Rationale: It is critical to generate a syntactically valid query. + Action: When using string literals in function calls, wrap them in double quotes, not single quotes. + Example: DATE_EXTRACT("year", @timestamp) + + At the start of every message, YOU MUST, for every instruction that is relevant to the query you want to construct, + repeat its directives, verbatim, at the start of every message. Exclude the rationales, actions, and examples. Follow + it up by using a delimiter: -- + + Example: + + #1: + #2: + #3: + + -- + + Here is an ES|QL query that you can use: + + ` + ), + { + '@timestamp': new Date().toISOString(), + message: { + role: MessageRole.Assistant, + content: '', + function_call: { + name: 'get_esql_info', + arguments: JSON.stringify(args), + trigger: MessageRole.Assistant as const, + }, + }, + }, + { + '@timestamp': new Date().toISOString(), + message: { + role: MessageRole.User, + name: 'get_esql_info', + content: JSON.stringify({ + documentation: messagesToInclude, + }), + }, + }, + ], + connectorId, + functions: [], + signal, + stream: true, + }) + ).pipe(processOpenAiStream()); + + return esqlResponse$.pipe((source) => { + return new Observable((subscriber) => { + let cachedContent: string = ''; + + function includesDivider() { + const firstDividerIndex = cachedContent.indexOf('--'); + return firstDividerIndex !== -1; + } + + source.subscribe({ + next: (message) => { + if (includesDivider()) { + subscriber.next(message); + } + cachedContent += message.choices[0].delta.content || ''; + }, + complete: () => { + if (!includesDivider()) { + subscriber.next({ + created: 0, + id: '', + model: '', + object: 'chat.completion.chunk', + choices: [ + { + delta: { + content: cachedContent, + }, + }, + ], + }); + } + subscriber.complete(); + }, + error: (error) => { + subscriber.error(error); + }, + }); + }); + }); + } + ); +} diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/esql/system_message.txt b/x-pack/plugins/observability_ai_assistant/server/functions/esql/system_message.txt new file mode 100644 index 0000000000000..7dca13c490180 --- /dev/null +++ b/x-pack/plugins/observability_ai_assistant/server/functions/esql/system_message.txt @@ -0,0 +1,220 @@ +You are a helpful assistant for generating and executing ES|QL queries. +Your goal is to help the user construct and possibly execute an ES|QL +query for the Observability use cases, which often involve metrics, logs +and traces. + +ES|QL is the Elasticsearch Query Language, that allows users of the +Elastic platform to iteratively explore data. An ES|QL query consists +of a series of commands, separated by pipes. Each query starts with +a source command, that selects or creates a set of data to start +processing. This source command is then followed by one or more +processing commands, which can transform the data returned by the +previous command. + +ES|QL is not Elasticsearch SQL, nor is it anything like SQL. SQL +commands are not available in ES|QL. Make sure you write a query +using ONLY commands specified in this conversation. + +# Syntax + +An ES|QL query is composed of a source command followed by an optional +series of processing commands, separated by a pipe character: |. For +example: + + | + | + +Binary operators: ==, !=, <, <=, >, >=. +Logical operators are supported: AND, OR, NOT +Predicates: IS NULL, IS NOT NULL +Timestamp literal syntax: NOW() - 15 days, 24 hours, 1 week + +## Source commands + +Source commands select a data source. There are three source commands: +FROM (which selects an index), ROW (which creates data from the command) +and SHOW (which returns information about the deployment). + +## Processing commands + +ES|QL processing commands change an input table by adding, removing, or +changing rows and columns. The following commands are available: + +- DISSECT: extracts structured data out of a string, using a dissect +pattern. +- DROP: drops one or more columns +- ENRICH: adds data from existing indices as new columns +- EVAL: adds a new column with calculated values. Supported functions for + EVAL are: + - Mathematical functions + - String functions + - Date-time functions + - Type conversation functions + - Conditional functions and expressions + - Multi-value functions +Aggregation functions are not supported for EVAL. +- GROK: extracts structured data out of a string, using a grok pattern +- KEEP: keeps one or more columns, drop the ones that are not kept +- LIMIT: returns the first n number of rows. The maximum value for this +is 10000. +- MV_EXPAND: expands multi-value columns into a single row per value +- RENAME: renames a column +- SORT: sorts the row in a table +- STATS ... BY: groups rows according to a common value and calculates +one or more aggregated values over the grouped rows. This commands only + supports aggregation functions, and no other functions or operators. +- WHERE: produces a table that contains all the rows from the input table + for which the provided condition returns true. WHERE supports the same + functions as EVAL. + +## Functions and operators + +### Aggregation functions +- AVG +- COUNT +- COUNT_DISTINCT +- MAX +- MEDIAN +- MEDIAN_ABSOLUTE_DEVIATION +- MIN +- PERCENTILE +- SUM + +### Mathematical functions + +- ABS +- ACOS +- ASIN +- ATAN +- ATAN2 +- CEIL +- COS +- COSH +- E +- FLOOR +- LOG10 +- PI +- POW +- ROUND +- SIN +- SINH +- SQRT +- TAN +- TANH +- TAU + +### String functions +- CONCAT +- LEFT +- LENGTH +- LTRIM +- REPLACE +- RIGHT +- RTRIM +- SPLIT +- SUBSTRING +- TRIM + +### Date-time functions +- AUTO_BUCKET +- DATE_EXTRACT +- DATE_FORMAT +- DATE_PARSE +- DATE_TRUNC +- NOW + +### Type conversion functions +- TO_BOOLEAN +- TO_DATETIME +- TO_DEGREES +- TO_DOUBLE +- TO_INTEGER +- TO_IP +- TO_LONG +- TO_RADIANS +- TO_STRING +- TO_UNSIGNED_LONG +- TO_VERSION + +### Conditional functions and expressions +- CASE +- COALESCE +- GREATEST +- LEAST + +### Multivalue functions +- MV_AVG +- MV_CONCAT +- MV_COUNT +- MV_DEDUPE +- MV_MAX +- MV_MEDIAN +- MV_MIN +- MV_SUM + +### Operators +- Binary operators +- Logical operators +- IS NULL and IS NOT NULL predicates +- CIDR_MATCH +- ENDS_WITH +- IN +- IS_FINITE +- IS_INFINITE +- IS_NAN +- LIKE +- RLIKE +- STARTS_WITH + +Here are some example queries: + +FROM employees +| WHERE still_hired == true +| EVAL hired = DATE_FORMAT("YYYY", hire_date) +| STATS avg_salary = AVG(salary) BY languages +| EVAL avg_salary = ROUND(avg_salary) +| EVAL lang_code = TO_STRING(languages) +| ENRICH languages_policy ON lang_code WITH lang = language_name +| WHERE lang IS NOT NULL +| KEEP avg_salary, lang +| SORT avg_salary ASC +| LIMIT 3 + +FROM employees + | EVAL trunk_worked_seconds = avg_worked_seconds / 100000000 * 100000000 + | STATS c = count(languages.long) BY languages.long, trunk_worked_seconds + | SORT c desc, languages.long, trunk_worked_seconds + +ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" +| DISSECT a "%{date} - %{msg} - %{ip}" +| KEEP date, msg, ip +| EVAL date = TO_DATETIME(date) + +FROM employees +| WHERE first_name LIKE "?b*" +| KEEP first_name, last_name + +FROM employees +| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" +| EVAL bucket = AUTO_BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") +| STATS AVG(salary) BY bucket +| SORT bucket + +ROW a = 1, b = "two", c = null + +FROM employees +| EVAL is_recent_hire = CASE(hire_date <= "2023-01-01T00:00:00Z", 1, 0) +| STATS total_recent_hires = SUM(is_recent_hire), total_hires = COUNT(*) BY country +| EVAL recent_hiring_rate = total_recent_hires / total_hires + +FROM logs-* +| WHERE @timestamp <= NOW() - 24 hours +// divide data in 1 hour buckets +| EVAL bucket = DATE_TRUNC(1 hour, @timestamp) +// convert a keyword field into a numeric field to aggregate over it +| EVAL is_5xx = CASE(http.response.status_code >= 500, 1, 0) +// count total events and failed events to calculate a rate +| STATS total_events = COUNT(*), total_failures = SUM(is_5xx) BY host.hostname, bucket +| EVAL failure_rate_per_host = total_failures / total_events +| DROP total_events, total_failures + diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/get_dataset_info.ts b/x-pack/plugins/observability_ai_assistant/server/functions/get_dataset_info.ts index bd48e1bda2f05..7cb35e09f78e4 100644 --- a/x-pack/plugins/observability_ai_assistant/server/functions/get_dataset_info.ts +++ b/x-pack/plugins/observability_ai_assistant/server/functions/get_dataset_info.ts @@ -63,15 +63,19 @@ export function registerGetDatasetInfoFunction({ if (index === '') { return { - indices, - fields: [], + content: { + indices, + fields: [], + }, }; } if (indices.length === 0) { return { - indices, - fields: [], + content: { + indices, + fields: [], + }, }; } diff --git a/x-pack/plugins/observability_ai_assistant/server/functions/index.ts b/x-pack/plugins/observability_ai_assistant/server/functions/index.ts index 6300852cbc064..b25e69c53689c 100644 --- a/x-pack/plugins/observability_ai_assistant/server/functions/index.ts +++ b/x-pack/plugins/observability_ai_assistant/server/functions/index.ts @@ -54,18 +54,22 @@ export const registerFunctions: ChatRegistrationFunction = async ({ If a function call fails, DO NOT UNDER ANY CIRCUMSTANCES execute it again. Ask the user for guidance and offer them options. Note that ES|QL (the Elasticsearch query language, which is NOT Elasticsearch SQL, but a new piped language) is the preferred query language. + + Use the "get_dataset_info" function if it is not clear what fields or indices the user means, or if you want to get more information about the mappings. - If the user asks about a query, or ES|QL, always call the "esql" function. DO NOT UNDER ANY CIRCUMSTANCES generate ES|QL queries yourself. Even if the "recall" function was used before that, follow it up with the "esql" function.` + If the user asks about a query, or ES|QL, always call the "esql" function. DO NOT UNDER ANY CIRCUMSTANCES generate ES|QL queries or explain anything about the ES|QL query language yourself. + Even if the "recall" function was used before that, follow it up with the "esql" function. If a query fails, do not attempt to correct it yourself. Again you should call the "esql" function, + even if it has been called before. + + If the "get_dataset_info" function returns no data, and the user asks for a query, generate a query anyway with the "esql" function, but be explicit about it potentially being incorrect. + ` ); if (isReady) { description += `You can use the "summarize" functions to store new information you have learned in a knowledge database. Once you have established that you did not know the answer to a question, and the user gave you this information, it's important that you create a summarisation of what you have learned and store it in the knowledge database. Don't create a new summarization if you see a similar summarization in the conversation, instead, update the existing one by re-using its ID. Additionally, you can use the "recall" function to retrieve relevant information from the knowledge database. - `; - description += `Here are principles you MUST adhere to, in order: - - DO NOT make any assumptions about where and how users have stored their data. ALWAYS first call get_dataset_info function with empty string to get information about available indices. Once you know about available indices you MUST use this function again to get a list of available fields for specific index. If user provides an index name make sure its a valid index first before using it to retrieve the field list by calling this function with an empty string! `; registerSummarizationFunction(registrationParameters); diff --git a/yarn.lock b/yarn.lock index 35bd058e4bc4b..ed4360edcf41d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8887,6 +8887,13 @@ resolved "https://registry.yarnpkg.com/@types/classnames/-/classnames-2.2.9.tgz#d868b6febb02666330410fe7f58f3c4b8258be7b" integrity sha512-MNl+rT5UmZeilaPxAVs6YaPC2m6aA8rofviZbhbxpPpl61uKodfdQVsBtgJGTqGizEf02oW3tsVe7FYB8kK14A== +"@types/cli-progress@^3.11.5": + version "3.11.5" + resolved "https://registry.yarnpkg.com/@types/cli-progress/-/cli-progress-3.11.5.tgz#9518c745e78557efda057e3f96a5990c717268c3" + integrity sha512-D4PbNRbviKyppS5ivBGyFO29POlySLmA2HyUFE4p5QGazAMM3CwkKWcvTl8gvElSuxRh6FPKL8XmidX873ou4g== + dependencies: + "@types/node" "*" + "@types/clone@~2.1.1": version "2.1.1" resolved "https://registry.yarnpkg.com/@types/clone/-/clone-2.1.1.tgz#9b880d0ce9b1f209b5e0bd6d9caa38209db34024" @@ -12887,7 +12894,7 @@ cheerio-select@^2.1.0: domhandler "^5.0.3" domutils "^3.0.1" -cheerio@^1.0.0-rc.10, cheerio@^1.0.0-rc.3: +cheerio@^1.0.0-rc.12, cheerio@^1.0.0-rc.3: version "1.0.0-rc.12" resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683" integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q== @@ -13046,10 +13053,17 @@ cli-cursor@^3.1.0: dependencies: restore-cursor "^3.1.0" +cli-progress@^3.12.0: + version "3.12.0" + resolved "https://registry.yarnpkg.com/cli-progress/-/cli-progress-3.12.0.tgz#807ee14b66bcc086258e444ad0f19e7d42577942" + integrity sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A== + dependencies: + string-width "^4.2.3" + cli-spinners@^2.2.0, cli-spinners@^2.5.0: - version "2.5.0" - resolved "https://registry.yarnpkg.com/cli-spinners/-/cli-spinners-2.5.0.tgz#12763e47251bf951cb75c201dfa58ff1bcb2d047" - integrity sha512-PC+AmIuK04E6aeSs/pUccSujsTzBhu4HzC2dL+CfJB/Jcc2qTRbEwZQDfIUpt2Xl8BodYBEq8w4fc0kU2I9DjQ== + version "2.9.2" + resolved "https://registry.yarnpkg.com/cli-spinners/-/cli-spinners-2.9.2.tgz#1773a8f4b9c4d6ac31563df53b3fc1d79462fe41" + integrity sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg== cli-table3@^0.6.1, cli-table3@~0.6.1: version "0.6.1"