diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/docs_from_directory_loader.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/docs_from_directory_loader.ts new file mode 100644 index 0000000000000..71100d68b73ad --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/docs_from_directory_loader.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Document } from 'langchain/document'; + +/** + * Mock LangChain `Document`s from `knowledge_base/esql/docs`, loaded from a LangChain `DirectoryLoader` + */ +export const mockEsqlDocsFromDirectoryLoader: Document[] = [ + { + pageContent: + '[[esql-agg-avg]]\n=== `AVG`\nThe average of a numeric field.\n\n[source.merge.styled,esql]\n----\ninclude::{esql-specs}/stats.csv-spec[tag=avg]\n----\n[%header.monospaced.styled,format=dsv,separator=|]\n|===\ninclude::{esql-specs}/stats.csv-spec[tag=avg-result]\n|===\n\nThe result is always a `double` not matter the input type.\n', + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/docs/aggregation_functions/avg.asciidoc', + }, + }, +]; + +/** + * Mock LangChain `Document`s from `knowledge_base/esql/language_definition`, loaded from a LangChain `DirectoryLoader` + */ +export const mockEsqlLanguageDocsFromDirectoryLoader: Document[] = [ + { + pageContent: + "lexer grammar EsqlBaseLexer;\n\nDISSECT : 'dissect' -> pushMode(EXPRESSION);\nDROP : 'drop' -> pushMode(SOURCE_IDENTIFIERS);\nENRICH : 'enrich' -> pushMode(SOURCE_IDENTIFIERS);\nEVAL : 'eval' -> pushMode(EXPRESSION);\nEXPLAIN : 'explain' -> pushMode(EXPLAIN_MODE);\nFROM : 'from' -> pushMode(SOURCE_IDENTIFIERS);\nGROK : 'grok' -> pushMode(EXPRESSION);\nINLINESTATS : 'inlinestats' -> pushMode(EXPRESSION);\nKEEP : 'keep' -> pushMode(SOURCE_IDENTIFIERS);\nLIMIT : 'limit' -> pushMode(EXPRESSION);\nMV_EXPAND : 'mv_expand' -> pushMode(SOURCE_IDENTIFIERS);\nPROJECT : 'project' -> pushMode(SOURCE_IDENTIFIERS);\nRENAME : 'rename' -> pushMode(SOURCE_IDENTIFIERS);\nROW : 'row' -> pushMode(EXPRESSION);\nSHOW : 'show' -> pushMode(EXPRESSION);\nSORT : 'sort' -> pushMode(EXPRESSION);\nSTATS : 'stats' -> pushMode(EXPRESSION);\nWHERE : 'where' -> pushMode(EXPRESSION);\nUNKNOWN_CMD : ~[ \\r\\n\\t[\\]/]+ -> pushMode(EXPRESSION);\n\nLINE_COMMENT\n : '//' ~[\\r\\n]* '\\r'? '\\n'? -> channel(HIDDEN)\n ;\n\nMULTILINE_COMMENT\n : '/*' (MULTILINE_COMMENT|.)*? '*/' -> channel(HIDDEN)\n ;\n\nWS\n : [ \\r\\n\\t]+ -> channel(HIDDEN)\n ;\n\n\nmode EXPLAIN_MODE;\nEXPLAIN_OPENING_BRACKET : '[' -> type(OPENING_BRACKET), pushMode(DEFAULT_MODE);\nEXPLAIN_PIPE : '|' -> type(PIPE), popMode;\nEXPLAIN_WS : WS -> channel(HIDDEN);\nEXPLAIN_LINE_COMMENT : LINE_COMMENT -> channel(HIDDEN);\nEXPLAIN_MULTILINE_COMMENT : MULTILINE_COMMENT -> channel(HIDDEN);\n\nmode EXPRESSION;\n\nPIPE : '|' -> popMode;\n\nfragment DIGIT\n : [0-9]\n ;\n\nfragment LETTER\n : [A-Za-z]\n ;\n\nfragment ESCAPE_SEQUENCE\n : '\\\\' [tnr\"\\\\]\n ;\n\nfragment UNESCAPED_CHARS\n : ~[\\r\\n\"\\\\]\n ;\n\nfragment EXPONENT\n : [Ee] [+-]? DIGIT+\n ;\n\nSTRING\n : '\"' (ESCAPE_SEQUENCE | UNESCAPED_CHARS)* '\"'\n | '\"\"\"' (~[\\r\\n])*? '\"\"\"' '\"'? '\"'?\n ;\n\nINTEGER_LITERAL\n : DIGIT+\n ;\n\nDECIMAL_LITERAL\n : DIGIT+ DOT DIGIT*\n | DOT DIGIT+\n | DIGIT+ (DOT DIGIT*)? EXPONENT\n | DOT DIGIT+ EXPONENT\n ;\n\nBY : 'by';\n\nAND : 'and';\nASC : 'asc';\nASSIGN : '=';\nCOMMA : ',';\nDESC : 'desc';\nDOT : '.';\nFALSE : 'false';\nFIRST : 'first';\nLAST : 'last';\nLP : '(';\nIN: 'in';\nIS: 'is';\nLIKE: 'like';\nNOT : 'not';\nNULL : 'null';\nNULLS : 'nulls';\nOR : 'or';\nPARAM: '?';\nRLIKE: 'rlike';\nRP : ')';\nTRUE : 'true';\nINFO : 'info';\nFUNCTIONS : 'functions';\n\nEQ : '==';\nNEQ : '!=';\nLT : '<';\nLTE : '<=';\nGT : '>';\nGTE : '>=';\n\nPLUS : '+';\nMINUS : '-';\nASTERISK : '*';\nSLASH : '/';\nPERCENT : '%';\n\n// Brackets are funny. We can happen upon a CLOSING_BRACKET in two ways - one\n// way is to start in an explain command which then shifts us to expression\n// mode. Thus, the two popModes on CLOSING_BRACKET. The other way could as\n// the start of a multivalued field constant. To line up with the double pop\n// the explain mode needs, we double push when we see that.\nOPENING_BRACKET : '[' -> pushMode(EXPRESSION), pushMode(EXPRESSION);\nCLOSING_BRACKET : ']' -> popMode, popMode;\n\n\nUNQUOTED_IDENTIFIER\n : LETTER (LETTER | DIGIT | '_')*\n // only allow @ at beginning of identifier to keep the option to allow @ as infix operator in the future\n // also, single `_` and `@` characters are not valid identifiers\n | ('_' | '@') (LETTER | DIGIT | '_')+\n ;\n\nQUOTED_IDENTIFIER\n : '`' ( ~'`' | '``' )* '`'\n ;\n\nEXPR_LINE_COMMENT\n : LINE_COMMENT -> channel(HIDDEN)\n ;\n\nEXPR_MULTILINE_COMMENT\n : MULTILINE_COMMENT -> channel(HIDDEN)\n ;\n\nEXPR_WS\n : WS -> channel(HIDDEN)\n ;\n\n\n\nmode SOURCE_IDENTIFIERS;\n\nSRC_PIPE : '|' -> type(PIPE), popMode;\nSRC_OPENING_BRACKET : '[' -> type(OPENING_BRACKET), pushMode(SOURCE_IDENTIFIERS), pushMode(SOURCE_IDENTIFIERS);\nSRC_CLOSING_BRACKET : ']' -> popMode, popMode, type(CLOSING_BRACKET);\nSRC_COMMA : ',' -> type(COMMA);\nSRC_ASSIGN : '=' -> type(ASSIGN);\nAS : 'as';\nMETADATA: 'metadata';\nON : 'on';\nWITH : 'with';\n\nSRC_UNQUOTED_IDENTIFIER\n : SRC_UNQUOTED_IDENTIFIER_PART+\n ;\n\nfragment SRC_UNQUOTED_IDENTIFIER_PART\n : ~[=`|,[\\]/ \\t\\r\\n]+\n | '/' ~[*/] // allow single / but not followed by another / or * which would start a comment\n ;\n\nSRC_QUOTED_IDENTIFIER\n : QUOTED_IDENTIFIER\n ;\n\nSRC_LINE_COMMENT\n : LINE_COMMENT -> channel(HIDDEN)\n ;\n\nSRC_MULTILINE_COMMENT\n : MULTILINE_COMMENT -> channel(HIDDEN)\n ;\n\nSRC_WS\n : WS -> channel(HIDDEN)\n ;\n", + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/language_definition/esql_base_lexer.g4', + }, + }, + { + pageContent: + "DISSECT=1\nDROP=2\nENRICH=3\nEVAL=4\nEXPLAIN=5\nFROM=6\nGROK=7\nINLINESTATS=8\nKEEP=9\nLIMIT=10\nMV_EXPAND=11\nPROJECT=12\nRENAME=13\nROW=14\nSHOW=15\nSORT=16\nSTATS=17\nWHERE=18\nUNKNOWN_CMD=19\nLINE_COMMENT=20\nMULTILINE_COMMENT=21\nWS=22\nEXPLAIN_WS=23\nEXPLAIN_LINE_COMMENT=24\nEXPLAIN_MULTILINE_COMMENT=25\nPIPE=26\nSTRING=27\nINTEGER_LITERAL=28\nDECIMAL_LITERAL=29\nBY=30\nAND=31\nASC=32\nASSIGN=33\nCOMMA=34\nDESC=35\nDOT=36\nFALSE=37\nFIRST=38\nLAST=39\nLP=40\nIN=41\nIS=42\nLIKE=43\nNOT=44\nNULL=45\nNULLS=46\nOR=47\nPARAM=48\nRLIKE=49\nRP=50\nTRUE=51\nINFO=52\nFUNCTIONS=53\nEQ=54\nNEQ=55\nLT=56\nLTE=57\nGT=58\nGTE=59\nPLUS=60\nMINUS=61\nASTERISK=62\nSLASH=63\nPERCENT=64\nOPENING_BRACKET=65\nCLOSING_BRACKET=66\nUNQUOTED_IDENTIFIER=67\nQUOTED_IDENTIFIER=68\nEXPR_LINE_COMMENT=69\nEXPR_MULTILINE_COMMENT=70\nEXPR_WS=71\nAS=72\nMETADATA=73\nON=74\nWITH=75\nSRC_UNQUOTED_IDENTIFIER=76\nSRC_QUOTED_IDENTIFIER=77\nSRC_LINE_COMMENT=78\nSRC_MULTILINE_COMMENT=79\nSRC_WS=80\nEXPLAIN_PIPE=81\n'dissect'=1\n'drop'=2\n'enrich'=3\n'eval'=4\n'explain'=5\n'from'=6\n'grok'=7\n'inlinestats'=8\n'keep'=9\n'limit'=10\n'mv_expand'=11\n'project'=12\n'rename'=13\n'row'=14\n'show'=15\n'sort'=16\n'stats'=17\n'where'=18\n'by'=30\n'and'=31\n'asc'=32\n'desc'=35\n'.'=36\n'false'=37\n'first'=38\n'last'=39\n'('=40\n'in'=41\n'is'=42\n'like'=43\n'not'=44\n'null'=45\n'nulls'=46\n'or'=47\n'?'=48\n'rlike'=49\n')'=50\n'true'=51\n'info'=52\n'functions'=53\n'=='=54\n'!='=55\n'<'=56\n'<='=57\n'>'=58\n'>='=59\n'+'=60\n'-'=61\n'*'=62\n'/'=63\n'%'=64\n']'=66\n'as'=72\n'metadata'=73\n'on'=74\n'with'=75\n", + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/language_definition/esql_base_lexer.tokens', + }, + }, +]; + +/** + * Mock LangChain `Document`s from `knowledge_base/esql/example_queries`, loaded from a LangChain `DirectoryLoader` + */ +export const mockExampleQueryDocsFromDirectoryLoader: Document[] = [ + { + pageContent: + '[[esql-example-queries]]\n\nThe following is an example an ES|QL query:\n\n```\nFROM logs-*\n| WHERE NOT CIDR_MATCH(destination.ip, "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")\n| STATS destcount = COUNT(destination.ip) by user.name, host.name\n| ENRICH ldap_lookup_new ON user.name\n| WHERE group.name IS NOT NULL\n| EVAL follow_up = CASE(\n destcount >= 100, "true",\n "false")\n| SORT destcount desc\n| KEEP destcount, host.name, user.name, group.name, follow_up\n```\n', + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc', + }, + }, + { + pageContent: + '[[esql-example-queries]]\n\nThe following is an example an ES|QL query:\n\n```\nfrom logs-*\n| grok dns.question.name "%{DATA}\\\\.%{GREEDYDATA:dns.question.registered_domain:string}"\n| stats unique_queries = count_distinct(dns.question.name) by dns.question.registered_domain, process.name\n| where unique_queries > 5\n| sort unique_queries desc\n```\n', + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0002.asciidoc', + }, + }, + { + pageContent: + '[[esql-example-queries]]\n\nThe following is an example an ES|QL query:\n\n```\nfrom logs-*\n| where event.code is not null\n| stats event_code_count = count(event.code) by event.code,host.name\n| enrich win_events on event.code with EVENT_DESCRIPTION\n| where EVENT_DESCRIPTION is not null and host.name is not null\n| rename EVENT_DESCRIPTION as event.description\n| sort event_code_count desc\n| keep event_code_count,event.code,host.name,event.description\n```\n', + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0003.asciidoc', + }, + }, +]; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts new file mode 100644 index 0000000000000..a52e9faed130c --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_query.ts @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslTextExpansionQuery } from '@elastic/elasticsearch/lib/api/types'; + +import type { MsearchQueryBody } from '../lib/langchain/elasticsearch_store/helpers/get_msearch_query_body'; + +/** + * This mock Elasticsearch msearch request body contains two queries: + * - The first query is a similarity (vector) search + * - The second query is a required KB document (terms) search + */ +export const mSearchQueryBody: MsearchQueryBody = { + body: [ + { + index: '.kibana-elastic-ai-assistant-kb', + }, + { + query: { + bool: { + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: + 'Generate an ESQL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', + }, + } as unknown as QueryDslTextExpansionQuery, + }, + ], + }, + }, + size: 1, + }, + { + index: '.kibana-elastic-ai-assistant-kb', + }, + { + query: { + bool: { + must: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, + }, + size: 1, + }, + ], +}; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_response.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_response.ts new file mode 100644 index 0000000000000..f281140b34b38 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/msearch_response.ts @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { MsearchResponse } from '@elastic/elasticsearch/lib/api/types'; + +/** + * This mock response from an Elasticsearch msearch contains two hits, where + * the first hit is from a similarity (vector) search, and the second hit is a + * required KB document (terms) search. + */ +export const mockMsearchResponse: MsearchResponse = { + took: 142, + responses: [ + { + took: 142, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 0, + failed: 0, + }, + hits: { + total: { + value: 129, + relation: 'eq', + }, + max_score: 21.658352, + hits: [ + { + _index: '.kibana-elastic-ai-assistant-kb', + _id: 'fa1c8ba1-25c9-4404-9736-09b7eb7124f8', + _score: 21.658352, + _ignored: ['text.keyword'], + _source: { + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/docs/source_commands/from.asciidoc', + }, + vector: { + tokens: { + wild: 1.2001507, + // truncated for mock + }, + model_id: '.elser_model_2', + }, + text: "[[esql-from]]\n=== `FROM`\n\nThe `FROM` source command returns a table with up to 10,000 documents from a\ndata stream, index, or alias. Each row in the resulting table represents a\ndocument. Each column corresponds to a field, and can be accessed by the name\nof that field.\n\n[source,esql]\n----\nFROM employees\n----\n\nYou can use <> to refer to indices, aliases\nand data streams. This can be useful for time series data, for example to access\ntoday's index:\n\n[source,esql]\n----\nFROM \n----\n\nUse comma-separated lists or wildcards to query multiple data streams, indices,\nor aliases:\n\n[source,esql]\n----\nFROM employees-00001,employees-*\n----\n", + }, + }, + ], + }, + status: 200, + }, + { + took: 3, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 0, + failed: 0, + }, + hits: { + total: { + value: 14, + relation: 'eq', + }, + max_score: 0.034783483, + hits: [ + { + _index: '.kibana-elastic-ai-assistant-kb', + _id: '280d4882-0f64-4471-a268-669a3f8c958f', + _score: 0.034783483, + _ignored: ['text.keyword'], + _source: { + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc', + required: true, + kbResource: 'esql', + }, + vector: { + tokens: { + user: 1.1084619, + // truncated for mock + }, + model_id: '.elser_model_2', + }, + text: '[[esql-example-queries]]\n\nThe following is an example an ES|QL query:\n\n```\nFROM logs-*\n| WHERE NOT CIDR_MATCH(destination.ip, "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")\n| STATS destcount = COUNT(destination.ip) by user.name, host.name\n| ENRICH ldap_lookup_new ON user.name\n| WHERE group.name IS NOT NULL\n| EVAL follow_up = CASE(\n destcount >= 100, "true",\n "false")\n| SORT destcount desc\n| KEEP destcount, host.name, user.name, group.name, follow_up\n```\n', + }, + }, + ], + }, + status: 200, + }, + ], +}; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/query_text.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/query_text.ts new file mode 100644 index 0000000000000..1ea69b786ad1f --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/query_text.ts @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** + * This mock query text is an example of a prompt that might be passed to + * the `ElasticSearchStore`'s `similaritySearch` function, as the `query` + * parameter. + * + * In the real world, an LLM extracted the `mockQueryText` from the + * following prompt, which includes a system prompt: + * + * ``` + * You are a helpful, expert assistant who answers questions about Elastic Security. Do not answer questions unrelated to Elastic Security. + * If you answer a question related to KQL, EQL, or ES|QL, it should be immediately usable within an Elastic Security timeline; please always format the output correctly with back ticks. Any answer provided for Query DSL should also be usable in a security timeline. This means you should only ever include the "filter" portion of the query. + * + * Use the following context to answer questions: + * + * Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names. + * ``` + * + * In the example above, the LLM omitted the system prompt, such that only `mockQueryText` is passed to the `similaritySearch` function. + */ +export const mockQueryText = + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called follow_up that contains a value of true, otherwise, it should contain false. The user names should also be enriched with their respective group names.'; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/terms.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/terms.ts new file mode 100644 index 0000000000000..0606c905d6df3 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/terms.ts @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { Field, FieldValue, QueryDslTermQuery } from '@elastic/elasticsearch/lib/api/types'; + +/** + * These (mock) terms may be used in multiple queries. + * + * For example, it may be be used in a vector search to exclude the required `esql` KB docs. + * + * It may also be used in a terms search to find all of the required `esql` KB docs. + */ +export const mockTerms: Array>> = [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, +]; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/terms_search_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/terms_search_query.ts new file mode 100644 index 0000000000000..c8af748516a1f --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/terms_search_query.ts @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; + +/** + * This Elasticsearch query DSL is a terms search for required `esql` KB docs + */ +export const mockTermsSearchQuery: QueryDslQueryContainer = { + bool: { + must: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, +}; diff --git a/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts new file mode 100644 index 0000000000000..30fbd0ad2c58f --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/__mocks__/vector_search_query.ts @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; + +/** + * A mock vector search query DSL + */ +export const mockVectorSearchQuery: QueryDslQueryContainer = { + bool: { + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called "follow_up" that contains a value of "true", otherwise, it should contain "false". The user names should also be enriched with their respective group names.', + }, + }, + }, + ], + }, +} as QueryDslQueryContainer; diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/README.md b/x-pack/plugins/elastic_assistant/server/knowledge_base/README.md index 9965a1624f798..54c9c885b5189 100644 --- a/x-pack/plugins/elastic_assistant/server/knowledge_base/README.md +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/README.md @@ -3,12 +3,13 @@ This directory contains assets for the Knowledge Base feature. The assets are used by the Elastic AI Assistant to answer questions about content that the underlying model may not have been trained on. Initial assets are provided for the following categories: * ES|QL - * General Documentation as from: https://github.com/elastic/elasticsearch/tree/main/docs/reference/esql + * General Documentation as from: * Excluding `functions/signature/*.svg` - * ANTLR Language Definitions as from: https://github.com/elastic/elasticsearch/tree/main/x-pack/plugin/esql/src/main/antlr + * ANTLR Language Definitions as from: + * Sample queries that represent valid (and invalid) ES|QL queries, curated manually from a variety of sources The assets are stored in their original source format, so `.asciidoc` for documentation, and `.g4` and `.tokens` for the ANTLR language definitions. File names have been updated to be snake_case to satisfy Kibana linting rules. ### Future -Once asset format and chunking strategies are finalized, we may want to either move the assets to a shared package so they can be consumed by other plugins, or potentially ship the pre-packaged ELSER embeddings as part of a Fleet Integration. For now though, the assets will be included in their source format within the plugin, and can then be processed and embedded at runtime. \ No newline at end of file +Once asset format and chunking strategies are finalized, we may want to either move the assets to a shared package so they can be consumed by other plugins, or potentially ship the pre-packaged ELSER embeddings as part of a Fleet Integration. For now though, the assets will be included in their source format within the plugin, and can then be processed and embedded at runtime. diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc new file mode 100644 index 0000000000000..a9373d4eec246 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc @@ -0,0 +1,16 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM logs-* +| WHERE NOT CIDR_MATCH(destination.ip, "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16") +| STATS destcount = COUNT(destination.ip) by user.name, host.name +| ENRICH ldap_lookup_new ON user.name +| WHERE group.name IS NOT NULL +| EVAL follow_up = CASE( + destcount >= 100, "true", + "false") +| SORT destcount desc +| KEEP destcount, host.name, user.name, group.name, follow_up +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0002.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0002.asciidoc new file mode 100644 index 0000000000000..cb4e25ba9e0c3 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0002.asciidoc @@ -0,0 +1,11 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +from logs-* +| grok dns.question.name "%{DATA}\\.%{GREEDYDATA:dns.question.registered_domain:string}" +| stats unique_queries = count_distinct(dns.question.name) by dns.question.registered_domain, process.name +| where unique_queries > 5 +| sort unique_queries desc +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0003.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0003.asciidoc new file mode 100644 index 0000000000000..61d4c48a810c5 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0003.asciidoc @@ -0,0 +1,14 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +from logs-* +| where event.code is not null +| stats event_code_count = count(event.code) by event.code,host.name +| enrich win_events on event.code with EVENT_DESCRIPTION +| where EVENT_DESCRIPTION is not null and host.name is not null +| rename EVENT_DESCRIPTION as event.description +| sort event_code_count desc +| keep event_code_count,event.code,host.name,event.description +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0004.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0004.asciidoc new file mode 100644 index 0000000000000..3a66208bfa158 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0004.asciidoc @@ -0,0 +1,15 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +from logs-* +| where event.category == "file" and event.action == "creation" +| stats filecount = count(file.name) by process.name,host.name +| dissect process.name "%{process}.%{extension}" +| eval proclength = length(process.name) +| where proclength > 10 +| sort filecount,proclength desc +| limit 10 +| keep host.name,process.name,filecount,process,extension,fullproc,proclength +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0005.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0005.asciidoc new file mode 100644 index 0000000000000..d5a42c7ce21fd --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0005.asciidoc @@ -0,0 +1,13 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +from logs-* +| where process.name == "curl.exe" +| stats bytes = sum(destination.bytes) by destination.address +| eval kb = bytes/1024 +| sort kb desc +| limit 10 +| keep kb,destination.address +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0006.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0006.asciidoc new file mode 100644 index 0000000000000..26009eccaf4a9 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0006.asciidoc @@ -0,0 +1,10 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM metrics-apm* +| WHERE metricset.name == "transaction" AND metricset.interval == "1m" +| EVAL bucket = AUTO_BUCKET(transaction.duration.histogram, 50, , ) +| STATS avg_duration = AVG(transaction.duration.histogram) BY bucket +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0007.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0007.asciidoc new file mode 100644 index 0000000000000..88e0b2260ff11 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0007.asciidoc @@ -0,0 +1,10 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM packetbeat-* +| STATS doc_count = COUNT(destination.domain) BY destination.domain +| SORT doc_count DESC +| LIMIT 10 +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0008.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0008.asciidoc new file mode 100644 index 0000000000000..333ae968c020c --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0008.asciidoc @@ -0,0 +1,11 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM employees +| EVAL hire_date_formatted = DATE_FORMAT(hire_date, "MMMM yyyy") +| SORT hire_date +| KEEP emp_no, hire_date_formatted +| LIMIT 5 +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0009.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0009.asciidoc new file mode 100644 index 0000000000000..f9762b0f695be --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0009.asciidoc @@ -0,0 +1,7 @@ +[[esql-example-queries]] + +The following is NOT an example of an ES|QL query: + +``` +Pagination is not supported +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0010.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0010.asciidoc new file mode 100644 index 0000000000000..ee361169eecf0 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0010.asciidoc @@ -0,0 +1,11 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM logs-* +| WHERE @timestamp >= NOW() - 15 minutes +| EVAL bucket = DATE_TRUNC(1 minute, @timestamp) +| STATS avg_cpu = AVG(system.cpu.total.norm.pct) BY bucket, host.name +| LIMIT 10 +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0011.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0011.asciidoc new file mode 100644 index 0000000000000..0e3cf73139ff9 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0011.asciidoc @@ -0,0 +1,13 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM traces-apm* +| WHERE @timestamp >= NOW() - 24 hours +| EVAL successful = CASE(event.outcome == "success", 1, 0), + failed = CASE(event.outcome == "failure", 1, 0) +| STATS success_rate = AVG(successful), + avg_duration = AVG(transaction.duration), + total_requests = COUNT(transaction.id) BY service.name +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0012.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0012.asciidoc new file mode 100644 index 0000000000000..e940b57368fbf --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0012.asciidoc @@ -0,0 +1,9 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM metricbeat* +| EVAL cpu_pct_normalized = (system.cpu.user.pct + system.cpu.system.pct) / system.cpu.cores +| STATS AVG(cpu_pct_normalized) BY host.name +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0013.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0013.asciidoc new file mode 100644 index 0000000000000..657b5e7397ab5 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0013.asciidoc @@ -0,0 +1,10 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM postgres-logs +| DISSECT message "%{} duration: %{query_duration} ms" +| EVAL query_duration_num = TO_DOUBLE(query_duration) +| STATS avg_duration = AVG(query_duration_num) +``` diff --git a/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0014.asciidoc b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0014.asciidoc new file mode 100644 index 0000000000000..d303642641aa3 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0014.asciidoc @@ -0,0 +1,9 @@ +[[esql-example-queries]] + +The following is an example ES|QL query: + +``` +FROM nyc_taxis +| WHERE DATE_EXTRACT(drop_off_time, "hour") >= 6 AND DATE_EXTRACT(drop_off_time, "hour") < 10 +| LIMIT 10 +``` diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.test.ts new file mode 100644 index 0000000000000..b77e20a1030ff --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.test.ts @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { addRequiredKbResourceMetadata } from './add_required_kb_resource_metadata'; +import { mockExampleQueryDocsFromDirectoryLoader } from '../../../__mocks__/docs_from_directory_loader'; + +describe('addRequiredKbResourceMetadata', () => { + const kbResource = 'esql'; + + test('it includes the original metadata properties', () => { + const EXPECTED_ADDITIONAL_KEYS_COUNT = 2; // two keys, `kbResource` and `required` + + const transformedDocs = addRequiredKbResourceMetadata({ + docs: mockExampleQueryDocsFromDirectoryLoader, + kbResource, + }); + + transformedDocs.forEach((doc, i) => { + expect(Object.keys(doc.metadata).length).toEqual( + Object.keys(mockExampleQueryDocsFromDirectoryLoader[i].metadata).length + + EXPECTED_ADDITIONAL_KEYS_COUNT + ); + }); + }); + + test('it adds the expected `kbResource` metadata to each document', () => { + const transformedDocs = addRequiredKbResourceMetadata({ + docs: mockExampleQueryDocsFromDirectoryLoader, + kbResource, + }); + + transformedDocs.forEach((doc) => { + expect(doc.metadata).toHaveProperty('kbResource', kbResource); + }); + }); + + test('it adds the expected `required` metadata to each document', () => { + const transformedDocs = addRequiredKbResourceMetadata({ + docs: mockExampleQueryDocsFromDirectoryLoader, + kbResource, + }); + + transformedDocs.forEach((doc) => { + expect(doc.metadata).toHaveProperty('required', true); + }); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.ts new file mode 100644 index 0000000000000..43804cc344f5b --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/add_required_kb_resource_metadata.ts @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Document } from 'langchain/document'; + +/** + * Transforms a set of documents by adding metadata that indicates those documents are required + * + * An additional property, `metadata`, is added to each document if it doesn't already exist. + * + * The `metadata` property is an object that contains the following properties: + * - the original metadata properties of the document (when they exist) + * - `kbResource`: The name of the Knowledge Base resource that the document belongs to + * - `required`: A boolean indicating whether the document is required for searches on the kbResource topic + * + * @returns A transformed set of documents, such that each document has the required metadata + */ +export const addRequiredKbResourceMetadata = ({ + docs, + kbResource, +}: { + docs: Array>>; + kbResource: string; +}): Array>> => + docs.map((doc) => ({ + ...doc, + metadata: { + ...doc.metadata, + kbResource, + required: true, // indicates that the document is required for searches on the kbResource topic + }, + })); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.test.ts new file mode 100644 index 0000000000000..307f5fc3055bb --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.test.ts @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Logger } from '@kbn/core/server'; + +import { addRequiredKbResourceMetadata } from './add_required_kb_resource_metadata'; +import { ElasticsearchStore } from '../elasticsearch_store/elasticsearch_store'; +import { loadESQL } from './esql_loader'; +import { + mockEsqlDocsFromDirectoryLoader, + mockEsqlLanguageDocsFromDirectoryLoader, + mockExampleQueryDocsFromDirectoryLoader, +} from '../../../__mocks__/docs_from_directory_loader'; +import { ESQL_RESOURCE } from '../../../routes/knowledge_base/constants'; + +let mockLoad = jest.fn(); + +jest.mock('langchain/document_loaders/fs/directory', () => ({ + DirectoryLoader: jest.fn().mockImplementation(() => ({ + load: mockLoad, + })), +})); + +jest.mock('langchain/document_loaders/fs/text', () => ({ + TextLoader: jest.fn().mockImplementation(() => ({})), +})); + +const esStore = { + addDocuments: jest.fn().mockResolvedValue(['1', '2', '3', '4', '5']), +} as unknown as ElasticsearchStore; + +const logger = { + info: jest.fn(), + error: jest.fn(), +} as unknown as Logger; + +describe('loadESQL', () => { + beforeEach(() => { + jest.clearAllMocks(); + + mockLoad = jest + .fn() + .mockReturnValueOnce(mockEsqlDocsFromDirectoryLoader) + .mockReturnValueOnce(mockEsqlLanguageDocsFromDirectoryLoader) + .mockReturnValueOnce(mockExampleQueryDocsFromDirectoryLoader); + }); + + describe('loadESQL', () => { + beforeEach(async () => { + await loadESQL(esStore, logger); + }); + + it('loads ES|QL docs, language files, and example queries into the Knowledge Base', async () => { + expect(esStore.addDocuments).toHaveBeenCalledWith([ + ...mockEsqlDocsFromDirectoryLoader, + ...mockEsqlLanguageDocsFromDirectoryLoader, + ...addRequiredKbResourceMetadata({ + docs: mockExampleQueryDocsFromDirectoryLoader, + kbResource: ESQL_RESOURCE, + }), + ]); + }); + + it('logs the expected (distinct) counts for each category of documents', async () => { + expect((logger.info as jest.Mock).mock.calls[0][0]).toEqual( + 'Loading 1 ES|QL docs, 2 language docs, and 3 example queries into the Knowledge Base' + ); + }); + + it('logs the expected total of all documents loaded', async () => { + expect((logger.info as jest.Mock).mock.calls[1][0]).toEqual( + 'Loaded 5 ES|QL docs, language docs, and example queries into the Knowledge Base' + ); + }); + + it('does NOT log an error in the happy path', async () => { + expect(logger.error).not.toHaveBeenCalled(); + }); + }); + + it('returns true if documents were loaded', async () => { + (esStore.addDocuments as jest.Mock).mockResolvedValueOnce(['this is a response']); + + const result = await loadESQL(esStore, logger); + + expect(result).toBe(true); + }); + + it('returns false if documents were NOT loaded', async () => { + (esStore.addDocuments as jest.Mock).mockResolvedValueOnce([]); + + const result = await loadESQL(esStore, logger); + + expect(result).toBe(false); + }); + + it('logs the expected error if loading fails', async () => { + const error = new Error('Failed to load documents'); + (esStore.addDocuments as jest.Mock).mockRejectedValueOnce(error); + + await loadESQL(esStore, logger); + + expect(logger.error).toHaveBeenCalledWith( + 'Failed to load ES|QL docs, language docs, and example queries into the Knowledge Base', + error + ); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.ts index 5b7388be2ca09..33310183e3d10 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/content_loaders/esql_loader.ts @@ -6,11 +6,13 @@ */ import { Logger } from '@kbn/core/server'; - import { DirectoryLoader } from 'langchain/document_loaders/fs/directory'; import { TextLoader } from 'langchain/document_loaders/fs/text'; import { resolve } from 'path'; + import { ElasticsearchStore } from '../elasticsearch_store/elasticsearch_store'; +import { addRequiredKbResourceMetadata } from './add_required_kb_resource_metadata'; +import { ESQL_RESOURCE } from '../../../routes/knowledge_base/constants'; /** * Loads the ESQL docs and language files into the Knowledge Base. @@ -37,22 +39,46 @@ export const loadESQL = async (esStore: ElasticsearchStore, logger: Logger): Pro true ); + const exampleQueriesLoader = new DirectoryLoader( + resolve(__dirname, '../../../knowledge_base/esql/example_queries'), + { + '.asciidoc': (path) => new TextLoader(path), + }, + true + ); + const docs = await docsLoader.load(); const languageDocs = await languageLoader.load(); + const rawExampleQueries = await exampleQueriesLoader.load(); + + // Add additional metadata to the example queries that indicates they are required KB documents: + const requiredExampleQueries = addRequiredKbResourceMetadata({ + docs: rawExampleQueries, + kbResource: ESQL_RESOURCE, + }); logger.info( - `Loading ${docs.length} ESQL docs and ${languageDocs.length} language docs into the Knowledge Base` + `Loading ${docs.length} ES|QL docs, ${languageDocs.length} language docs, and ${requiredExampleQueries.length} example queries into the Knowledge Base` ); - const response = await esStore.addDocuments([...docs, ...languageDocs]); + const response = await esStore.addDocuments([ + ...docs, + ...languageDocs, + ...requiredExampleQueries, + ]); logger.info( - `Loaded ${response?.length ?? 0} ESQL docs and language docs into the Knowledge Base` + `Loaded ${ + response?.length ?? 0 + } ES|QL docs, language docs, and example queries into the Knowledge Base` ); return response.length > 0; } catch (e) { - logger.error(`Failed to load ESQL docs and language docs into the Knowledge Base`, e); + logger.error( + `Failed to load ES|QL docs, language docs, and example queries into the Knowledge Base`, + e + ); return false; } }; diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.test.ts index 94ac161d7abb1..9d563e240064a 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.test.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.test.ts @@ -5,14 +5,21 @@ * 2.0. */ -import { Document } from 'langchain/document'; -import { ElasticsearchStore } from './elasticsearch_store'; import { elasticsearchServiceMock } from '@kbn/core-elasticsearch-server-mocks'; import { loggingSystemMock } from '@kbn/core-logging-server-mocks'; import { IndicesCreateResponse, MlGetTrainedModelsResponse, } from '@elastic/elasticsearch/lib/api/types'; +import { Document } from 'langchain/document'; + +import { + ElasticsearchStore, + FALLBACK_SIMILARITY_SEARCH_SIZE, + TERMS_QUERY_SIZE, +} from './elasticsearch_store'; +import { mockMsearchResponse } from '../../../__mocks__/msearch_response'; +import { mockQueryText } from '../../../__mocks__/query_text'; jest.mock('uuid', () => ({ v4: jest.fn(), @@ -63,7 +70,18 @@ describe('ElasticsearchStore', () => { expect(created).toBe(true); expect(mockEsClient.indices.create).toHaveBeenCalledWith({ index: KB_INDEX, - mappings: { properties: { vector: { properties: { tokens: { type: 'rank_features' } } } } }, + mappings: { + properties: { + metadata: { + properties: { + kbResource: { type: 'keyword' }, + required: { type: 'boolean' }, + source: { type: 'keyword' }, + }, + }, + vector: { properties: { tokens: { type: 'rank_features' } } }, + }, + }, settings: { default_pipeline: '.kibana-elastic-ai-assistant-kb-ingest-pipeline' }, }); }); @@ -197,59 +215,152 @@ describe('ElasticsearchStore', () => { describe('similaritySearch', () => { it('Checks if documents are found', async () => { - const query = 'find the docs!'; - mockEsClient.search.mockResolvedValue({ - took: 3, - timed_out: false, - _shards: { total: 1, successful: 1, skipped: 0, failed: 0 }, - hits: { - total: { value: 129, relation: 'eq' }, - max_score: 17.86367, - hits: [ - { - _index: '.kibana-elastic-ai-assistant-kb', - _id: 'b71ea007-8b46-4e02-81b4-485faad06e79', - _score: 9.308316, - _ignored: ['text.keyword'], - _source: { - metadata: { - source: '/found/in/test/land', - }, - vector: { - tokens: {}, - model_id: '.elser_model_2', - }, - text: 'documents', + mockEsClient.msearch.mockResolvedValue(mockMsearchResponse); + + const searchResults = await esStore.similaritySearch(mockQueryText); + + expect(searchResults).toStrictEqual([ + { + pageContent: + "[[esql-from]]\n=== `FROM`\n\nThe `FROM` source command returns a table with up to 10,000 documents from a\ndata stream, index, or alias. Each row in the resulting table represents a\ndocument. Each column corresponds to a field, and can be accessed by the name\nof that field.\n\n[source,esql]\n----\nFROM employees\n----\n\nYou can use <> to refer to indices, aliases\nand data streams. This can be useful for time series data, for example to access\ntoday's index:\n\n[source,esql]\n----\nFROM \n----\n\nUse comma-separated lists or wildcards to query multiple data streams, indices,\nor aliases:\n\n[source,esql]\n----\nFROM employees-00001,employees-*\n----\n", + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/docs/source_commands/from.asciidoc', + }, + }, + { + pageContent: + '[[esql-example-queries]]\n\nThe following is an example an ES|QL query:\n\n```\nFROM logs-*\n| WHERE NOT CIDR_MATCH(destination.ip, "10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")\n| STATS destcount = COUNT(destination.ip) by user.name, host.name\n| ENRICH ldap_lookup_new ON user.name\n| WHERE group.name IS NOT NULL\n| EVAL follow_up = CASE(\n destcount >= 100, "true",\n "false")\n| SORT destcount desc\n| KEEP destcount, host.name, user.name, group.name, follow_up\n```\n', + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/example_queries/esql_example_query_0001.asciidoc', + }, + }, + ]); + + expect(mockEsClient.msearch).toHaveBeenCalledWith({ + body: [ + { + index: '.elastic-assistant-kb', + }, + { + query: { + bool: { + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: mockQueryText, + }, + }, + }, + ], }, }, - ], - }, + size: FALLBACK_SIMILARITY_SEARCH_SIZE, // <-- `FALLBACK_SIMILARITY_SEARCH_SIZE` is used when `k` is not provided + }, + { + index: '.elastic-assistant-kb', + }, + { + query: { + bool: { + must: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, + }, + size: TERMS_QUERY_SIZE, + }, + ], }); + }); - const searchResults = await esStore.similaritySearch(query); + it('uses the value of `k` instead of the `FALLBACK_SIMILARITY_SEARCH_SIZE` when `k` is provided', async () => { + mockEsClient.msearch.mockResolvedValue(mockMsearchResponse); - expect(searchResults).toStrictEqual([ - new Document({ - pageContent: 'documents', - metadata: { source: '/found/in/test/land' }, - }), - ]); - expect(mockEsClient.search).toHaveBeenCalledWith({ - index: KB_INDEX, - query: { - bool: { - must: [ - { - text_expansion: { - 'vector.tokens': { - model_id: '.elser_model_2', - model_text: query, + const k = 4; + await esStore.similaritySearch(mockQueryText, k); + + expect(mockEsClient.msearch).toHaveBeenCalledWith({ + body: [ + { + index: '.elastic-assistant-kb', + }, + { + query: { + bool: { + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, }, - }, + ], + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: mockQueryText, + }, + }, + }, + ], }, - ], + }, + size: k, // <-- `k` is used instead of `FALLBACK_SIMILARITY_SEARCH_SIZE` }, - }, + { + index: '.elastic-assistant-kb', + }, + { + query: { + bool: { + must: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, + }, + size: TERMS_QUERY_SIZE, + }, + ], }); }); }); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.ts index 99ce5fd39439d..d135ffb734bd4 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/elasticsearch_store.ts @@ -5,23 +5,26 @@ * 2.0. */ -import { Document } from 'langchain/document'; -import { Callbacks } from 'langchain/callbacks'; -import { VectorStore } from 'langchain/vectorstores/base'; import { ElasticsearchClient, Logger } from '@kbn/core/server'; - -import { - MappingTypeMapping, - QueryDslTextExpansionQuery, -} from '@elastic/elasticsearch/lib/api/types'; +import { MappingTypeMapping } from '@elastic/elasticsearch/lib/api/types'; import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; - +import { Callbacks } from 'langchain/callbacks'; +import { Document } from 'langchain/document'; +import { VectorStore } from 'langchain/vectorstores/base'; import * as uuid from 'uuid'; + import { ElasticsearchEmbeddings } from '../embeddings/elasticsearch_embeddings'; +import { FlattenedHit, getFlattenedHits } from './helpers/get_flattened_hits'; +import { getMsearchQueryBody } from './helpers/get_msearch_query_body'; +import { getTermsSearchQuery } from './helpers/get_terms_search_query'; +import { getVectorSearchQuery } from './helpers/get_vector_search_query'; +import type { MsearchResponse } from './helpers/types'; import { + ESQL_RESOURCE, KNOWLEDGE_BASE_INDEX_PATTERN, KNOWLEDGE_BASE_INGEST_PIPELINE, } from '../../../routes/knowledge_base/constants'; +import { getRequiredKbDocsTermsQueryDsl } from './helpers/get_required_kb_docs_terms_query_dsl'; interface CreatePipelineParams { id?: string; @@ -33,6 +36,19 @@ interface CreateIndexParams { pipeline?: string; } +/** + * A fallback for the the query `size` that determines how many documents to + * return from Elasticsearch when performing a similarity search. + * + * The size is typically determined by the implementation of LangChain's + * `VectorStoreRetriever._getRelevantDocuments` function, so this fallback is + * only required when using the `ElasticsearchStore` directly. + */ +export const FALLBACK_SIMILARITY_SEARCH_SIZE = 10; + +/** The maximum number of hits to return from a `terms` query, via the `size` parameter */ +export const TERMS_QUERY_SIZE = 10000; + /** * Basic ElasticsearchStore implementation only leveraging ELSER for storage and retrieval. */ @@ -44,17 +60,25 @@ export class ElasticsearchStore extends VectorStore { private readonly index: string; private readonly logger: Logger; private readonly model: string; + private readonly kbResource: string; _vectorstoreType(): string { return 'elasticsearch'; } - constructor(esClient: ElasticsearchClient, index: string, logger: Logger, model?: string) { + constructor( + esClient: ElasticsearchClient, + index: string, + logger: Logger, + model?: string, + kbResource?: string | undefined + ) { super(new ElasticsearchEmbeddings(logger), { esClient, index }); this.esClient = esClient; this.index = index ?? KNOWLEDGE_BASE_INDEX_PATTERN; this.logger = logger; this.model = model ?? '.elser_model_2'; + this.kbResource = kbResource ?? ESQL_RESOURCE; } /** @@ -150,6 +174,9 @@ export class ElasticsearchStore extends VectorStore { * @param filter Optional filter to apply to the search * @param _callbacks Optional callbacks * + * Fun facts: + * - This function is called by LangChain's `VectorStoreRetriever._getRelevantDocuments` + * - The `k` parameter is typically determined by LangChain's `VectorStoreRetriever._getRelevantDocuments`, and has been observed to default to `4` in the wild (see langchain/dist/vectorstores/base.ts) * @returns Promise of similar documents */ similaritySearch = async ( @@ -158,42 +185,42 @@ export class ElasticsearchStore extends VectorStore { filter?: this['FilterType'] | undefined, _callbacks?: Callbacks | undefined ): Promise => { - const queryBody: QueryDslQueryContainer = { - bool: { - must: [ - { - text_expansion: { - 'vector.tokens': { - model_id: this.model, - model_text: query, - }, - } as unknown as QueryDslTextExpansionQuery, - }, - ], - filter, - }, - }; + // requiredDocs is an array of filters that can be used in a `bool` Elasticsearch DSL query to filter in/out required KB documents: + const requiredDocs = getRequiredKbDocsTermsQueryDsl(this.kbResource); + + // The `k` parameter is typically provided by LangChain's `VectorStoreRetriever._getRelevantDocuments`, which calls this function: + const vectorSearchQuerySize = k ?? FALLBACK_SIMILARITY_SEARCH_SIZE; + + // build a vector search query: + const vectorSearchQuery = getVectorSearchQuery({ + filter, + modelId: this.model, + mustNotTerms: requiredDocs, + query, + }); + + // build a (separate) terms search query: + const termsSearchQuery = getTermsSearchQuery(requiredDocs); + + // combine the vector search query and the terms search queries into a single multi-search query: + const mSearchQueryBody = getMsearchQueryBody({ + index: this.index, + termsSearchQuery, + termsSearchQuerySize: TERMS_QUERY_SIZE, + vectorSearchQuery, + vectorSearchQuerySize, + }); try { - const result = await this.esClient.search<{ - text: string; - metadata: Record; - }>({ - index: this.index, - size: k, - query: queryBody, - }); + // execute both queries via a single multi-search request: + const result = await this.esClient.msearch(mSearchQueryBody); - const results = result.hits.hits.map( - (hit) => - new Document({ - pageContent: hit?._source?.text ?? '', - metadata: hit?._source?.metadata, - }) - ); + // flatten the results of the combined queries into a single array of hits: + const results: FlattenedHit[] = result.responses.flatMap((response) => { + const maybeEsqlMsearchResponse: MsearchResponse = response as MsearchResponse; - this.logger.debug(`Similarity Search Query:\n ${JSON.stringify(queryBody)}`); - this.logger.debug(`Similarity Search Results:\n ${JSON.stringify(results)}`); + return getFlattenedHits(maybeEsqlMsearchResponse); + }); return results; } catch (e) { @@ -223,6 +250,16 @@ export class ElasticsearchStore extends VectorStore { createIndex = async ({ index, pipeline }: CreateIndexParams = {}): Promise => { const mappings: MappingTypeMapping = { properties: { + metadata: { + properties: { + /** the category of knowledge, e.g. `esql` */ + kbResource: { type: 'keyword' }, + /** when `true`, return this document in all searches for the `kbResource` */ + required: { type: 'boolean' }, + /** often a file path when the document was created via a LangChain `DirectoryLoader`, this metadata describes the origin of the document */ + source: { type: 'keyword' }, + }, + }, vector: { properties: { tokens: { type: 'rank_features' } }, }, diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.test.ts new file mode 100644 index 0000000000000..4ceda3904d420 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.test.ts @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getFlattenedHits } from './get_flattened_hits'; +import { mockMsearchResponse } from '../../../../__mocks__/msearch_response'; +import type { MsearchResponse } from './types'; + +describe('getFlattenedHits', () => { + it('returns an empty array when the response is undefined', () => { + const result = getFlattenedHits(undefined); + + expect(result).toEqual([]); + }); + + it('returns an empty array when hits > hits is empty', () => { + const result = getFlattenedHits({ hits: { hits: [] } }); + + expect(result).toEqual([]); + }); + + it('returns the expected flattened hits given a non-empty `MsearchResponse`', () => { + const expected = [ + { + pageContent: + "[[esql-from]]\n=== `FROM`\n\nThe `FROM` source command returns a table with up to 10,000 documents from a\ndata stream, index, or alias. Each row in the resulting table represents a\ndocument. Each column corresponds to a field, and can be accessed by the name\nof that field.\n\n[source,esql]\n----\nFROM employees\n----\n\nYou can use <> to refer to indices, aliases\nand data streams. This can be useful for time series data, for example to access\ntoday's index:\n\n[source,esql]\n----\nFROM \n----\n\nUse comma-separated lists or wildcards to query multiple data streams, indices,\nor aliases:\n\n[source,esql]\n----\nFROM employees-00001,employees-*\n----\n", + metadata: { + source: + '/Users/andrew.goldstein/Projects/forks/andrew-goldstein/kibana/x-pack/plugins/elastic_assistant/server/knowledge_base/esql/docs/source_commands/from.asciidoc', + }, + }, + ]; + + const result = getFlattenedHits(mockMsearchResponse.responses[0] as MsearchResponse); + + expect(result).toEqual(expected); + }); + + it('returns an array of FlattenedHits with empty strings when given an MsearchResponse with missing fields', () => { + const msearchResponse = { + hits: { + hits: [ + { + _source: { + metadata: { + source: '/source/1', + }, + }, + }, + { + _source: { + text: 'Source 2 text', + }, + }, + ], + }, + }; + + const expected = [ + { + pageContent: '', // <-- missing text field + metadata: { + source: '/source/1', + }, + }, + { + pageContent: 'Source 2 text', + metadata: { + source: '', // <-- missing source field + }, + }, + ]; + + const result = getFlattenedHits(msearchResponse); + + expect(result).toEqual(expected); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.ts new file mode 100644 index 0000000000000..f6c3a3ef0e9fa --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_flattened_hits.ts @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { MsearchKbHit, MsearchResponse } from './types'; + +/** + * Represents a flattened hit from an Elasticsearch Msearch response + * + * It contains the page content and metadata source of a KB document + */ +export interface FlattenedHit { + pageContent: string; + metadata: { + source: string; + }; +} + +/** + * Returns an array of flattened hits from the specified Msearch response + * that contain the page content and metadata source of KB documents + * + * @param maybeMsearchResponse An Elasticsearch Msearch response, which returns the results of multiple searches in a single request + * @returns Returns an array of flattened hits from the specified Msearch response that contain the page content and metadata source of KB documents + */ +export const getFlattenedHits = ( + maybeMsearchResponse: MsearchResponse | undefined +): FlattenedHit[] => + maybeMsearchResponse?.hits?.hits?.flatMap((hit: MsearchKbHit) => ({ + pageContent: hit?._source?.text ?? '', + metadata: { + source: hit?._source?.metadata?.source ?? '', + }, + })) ?? []; diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.test.ts new file mode 100644 index 0000000000000..2697aaf76a085 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.test.ts @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { TERMS_QUERY_SIZE } from '../elasticsearch_store'; +import { getMsearchQueryBody } from './get_msearch_query_body'; +import { mockTermsSearchQuery } from '../../../../__mocks__/terms_search_query'; +import { mockVectorSearchQuery } from '../../../../__mocks__/vector_search_query'; + +describe('getMsearchQueryBody', () => { + it('returns the expected multi-search request body', () => { + const index = '.kibana-elastic-ai-assistant-kb'; + + const vectorSearchQuery = mockVectorSearchQuery; + const vectorSearchQuerySize = 4; + + const termsSearchQuery = mockTermsSearchQuery; + const termsSearchQuerySize = TERMS_QUERY_SIZE; + + const result = getMsearchQueryBody({ + index, + termsSearchQuery, + termsSearchQuerySize, + vectorSearchQuery, + vectorSearchQuerySize, + }); + + expect(result).toEqual({ + body: [ + { index }, + { + query: mockVectorSearchQuery, + size: vectorSearchQuerySize, + }, + { index }, + { + query: mockTermsSearchQuery, + size: TERMS_QUERY_SIZE, + }, + ], + }); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.ts new file mode 100644 index 0000000000000..c93c3f2e30954 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_msearch_query_body.ts @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; + +/** + * Represents an entry in a multi-search request body that specifies the name of an index to search + */ +export interface MsearchQueryBodyIndexEntry { + index: string; +} + +/** + * Represents an entry in a multi-search request body that specifies a query to execute + */ +export interface MsearchQueryBodyQueryEntry { + query: QueryDslQueryContainer; + size: number; +} + +/** + * Represents a multi-search request body, which returns the results of multiple searches in a single request + */ +export interface MsearchQueryBody { + body: Array; +} + +/** + * Returns a multi-search request body, which returns the results of multiple searches in a single request + * + * @param index The KB index to search, e.g. `.kibana-elastic-ai-assistant-kb` + * @param termsSearchQuery An Elasticsearch DSL query that performs a terms search, typically used to search for required KB documents + * @param termsSearchQuerySize The maximum number of required KB documents to return + * @param vectorSearchQuery An Elasticsearch DSL query that performs a vector search, typically used to search for similar KB documents + * @param vectorSearchQuerySize The maximum number of similar KB documents to return + * @returns A multi-search request body, which returns the results of multiple searches in a single request + */ +export const getMsearchQueryBody = ({ + index, + termsSearchQuery, + termsSearchQuerySize, + vectorSearchQuery, + vectorSearchQuerySize, +}: { + index: string; + termsSearchQuery: QueryDslQueryContainer; + termsSearchQuerySize: number; + vectorSearchQuery: QueryDslQueryContainer; + vectorSearchQuerySize: number; +}): MsearchQueryBody => ({ + body: [ + { index }, + { + query: vectorSearchQuery, + size: vectorSearchQuerySize, + }, + { index }, + { + query: termsSearchQuery, + size: termsSearchQuerySize, + }, + ], +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.test.ts new file mode 100644 index 0000000000000..5c4f944e83178 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.test.ts @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getRequiredKbDocsTermsQueryDsl } from './get_required_kb_docs_terms_query_dsl'; + +const kbResource = 'esql'; + +describe('getRequiredKbDocsTermsQueryDsl', () => { + it('returns the expected terms query DSL', () => { + const result = getRequiredKbDocsTermsQueryDsl(kbResource); + + expect(result).toEqual([ + { term: { 'metadata.kbResource': 'esql' } }, + { term: { 'metadata.required': true } }, + ]); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.ts new file mode 100644 index 0000000000000..ba5af8c3bfef7 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_required_kb_docs_terms_query_dsl.ts @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { Field, FieldValue, QueryDslTermQuery } from '@elastic/elasticsearch/lib/api/types'; + +/** + * For the specified topic, returns an array of filters that can be used in a + * `bool` Elasticsearch DSL query to filter in/out required KB documents. + * + * The returned filters can be used in different types of queries to, for example: + * - To filter out required KB documents from a vector search + * - To filter in required KB documents in a terms query + * + * @param kbResource Search for required KB documents for this topic + * + * @returns An array of `term`s that may be used in a `bool` Elasticsearch DSL query to filter in/out required KB documents + */ +export const getRequiredKbDocsTermsQueryDsl = ( + kbResource: string +): Array>> => [ + { + term: { + 'metadata.kbResource': kbResource, + }, + }, + { + term: { + 'metadata.required': true, + }, + }, +]; diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.test.ts new file mode 100644 index 0000000000000..98d3b2c5d36c2 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.test.ts @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getTermsSearchQuery } from './get_terms_search_query'; +import { mockTerms } from '../../../../__mocks__/terms'; + +describe('getTermsSearchQuery', () => { + it('returns the expected Elasticsearch query DSL', () => { + const query = getTermsSearchQuery(mockTerms); + + expect(query).toEqual({ + bool: { + must: mockTerms, + }, + }); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.ts new file mode 100644 index 0000000000000..8fcc7b3b20851 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_terms_search_query.ts @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { + Field, + FieldValue, + QueryDslTermQuery, + QueryDslQueryContainer, +} from '@elastic/elasticsearch/lib/api/types'; + +/** + * Returns an Elasticsearch DSL query that performs a terms search, + * such that all of the specified terms must be present in the search results. + * + * @param mustTerms All of the specified terms must be present in the search results + * + * @returns An Elasticsearch DSL query that performs a terms search, such that all of the specified terms must be present in the search results + */ +export const getTermsSearchQuery = ( + mustTerms: Array>> +): QueryDslQueryContainer => ({ + bool: { + must: [...mustTerms], // all of the specified terms must be present in the search results + }, +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.test.ts new file mode 100644 index 0000000000000..da6a7227953f2 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.test.ts @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; + +import { getVectorSearchQuery } from './get_vector_search_query'; +import { mockTerms } from '../../../../__mocks__/terms'; +import { mockQueryText } from '../../../../__mocks__/query_text'; + +describe('getVectorSearchQuery', () => { + it('returns the expected query when mustNotTerms is empty', () => { + const result = getVectorSearchQuery({ + filter: undefined, + modelId: '.elser_model_2', + mustNotTerms: [], // <--- empty + query: mockQueryText, + }); + + expect(result).toEqual({ + bool: { + filter: undefined, + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called follow_up that contains a value of true, otherwise, it should contain false. The user names should also be enriched with their respective group names.', + }, + }, + }, + ], + must_not: [], + }, + }); + }); + + it('returns the expected query when mustNotTerms are provided', () => { + const result = getVectorSearchQuery({ + filter: undefined, + modelId: '.elser_model_2', + mustNotTerms: mockTerms, // <--- mock terms + query: mockQueryText, + }); + + expect(result).toEqual({ + bool: { + filter: undefined, + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called follow_up that contains a value of true, otherwise, it should contain false. The user names should also be enriched with their respective group names.', + }, + }, + }, + ], + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, + }); + }); + + it('returns the expected results when a filter is provided', () => { + const filter: QueryDslQueryContainer = { + bool: { + must: [ + { + term: { + 'some.field': 'value', + }, + }, + ], + }, + }; + + const result = getVectorSearchQuery({ + filter, + modelId: '.elser_model_2', + mustNotTerms: mockTerms, // <--- mock terms + query: mockQueryText, + }); + + expect(result).toEqual({ + bool: { + filter, + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: '.elser_model_2', + model_text: + 'Generate an ES|QL query that will count the number of connections made to external IP addresses, broken down by user. If the count is greater than 100 for a specific user, add a new field called follow_up that contains a value of true, otherwise, it should contain false. The user names should also be enriched with their respective group names.', + }, + }, + }, + ], + must_not: [ + { + term: { + 'metadata.kbResource': 'esql', + }, + }, + { + term: { + 'metadata.required': true, + }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.ts new file mode 100644 index 0000000000000..b80038cea2a92 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/get_vector_search_query.ts @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { + Field, + FieldValue, + QueryDslQueryContainer, + QueryDslTermQuery, + QueryDslTextExpansionQuery, +} from '@elastic/elasticsearch/lib/api/types'; + +/** + * Returns an Elasticsearch query DSL that performs a vector search + * that excludes a set of documents from the search results. + * + * @param filter Optional filter to apply to the search + * @param modelId ID of the model to search with, e.g. `.elser_model_2` + * @param mustNotTerms Array of objects that may be used in a `bool` Elasticsearch DSL query to, for example, exclude the required KB docs from the vector search, so there's no overlap + * @param query The search query provided by the user + * @returns + */ +export const getVectorSearchQuery = ({ + filter, + modelId, + mustNotTerms, + query, +}: { + filter: QueryDslQueryContainer | undefined; + modelId: string; + mustNotTerms: Array>>; + query: string; +}): QueryDslQueryContainer => ({ + bool: { + must_not: [...mustNotTerms], + must: [ + { + text_expansion: { + 'vector.tokens': { + model_id: modelId, + model_text: query, + }, + } as unknown as QueryDslTextExpansionQuery, + }, + ], + filter, + }, +}); diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/types.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/types.ts new file mode 100644 index 0000000000000..a0f549a00ab26 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/elasticsearch_store/helpers/types.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** + * A hit from the response to an Elasticsearch multi-search request, + * which returns the results of multiple searches in a single request. + * + * Search hits may contain the following properties that may be present in + * knowledge base documents: + * + * 1) the `metadata` property, an object that may have the following properties: + * - `kbResource`: The name of the Knowledge Base resource that the document belongs to, e.g. `esql` + * - `required`: A boolean indicating whether the document is required for searches on the `kbResource` topic + * - `source`: Describes the origin of the document, sometimes a file path via a LangChain DirectoryLoader + * 2) the `text` property, a string containing the text of the document + * 3) the `vector` property, containing the document's embeddings + */ +export interface MsearchKbHit { + _id?: string; + _ignored?: string[]; + _index?: string; + _score?: number; + _source?: { + metadata?: { + kbResource?: string; + required?: boolean; + source?: string; + }; + text?: string; + vector?: { + tokens?: Record; + }; + }; +} + +/** + * A Response from an Elasticsearch multi-search request, which returns the + * results of multiple searches in a single request. + */ +export interface MsearchResponse { + hits?: { + hits?: MsearchKbHit[]; + }; +} diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.test.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.test.ts index d9e301d557190..e63da9257aa36 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.test.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.test.ts @@ -5,16 +5,17 @@ * 2.0. */ -import { KibanaRequest } from '@kbn/core/server'; import { PluginStartContract as ActionsPluginStart } from '@kbn/actions-plugin/server'; +import { elasticsearchServiceMock } from '@kbn/core-elasticsearch-server-mocks'; +import { KibanaRequest } from '@kbn/core/server'; +import { loggerMock } from '@kbn/logging-mocks'; -import { ResponseBody } from '../types'; import { ActionsClientLlm } from '../llm/actions_client_llm'; import { mockActionResponse } from '../../../__mocks__/action_result_data'; import { langChainMessages } from '../../../__mocks__/lang_chain_messages'; +import { ESQL_RESOURCE } from '../../../routes/knowledge_base/constants'; +import { ResponseBody } from '../types'; import { callAgentExecutor } from '.'; -import { loggerMock } from '@kbn/logging-mocks'; -import { elasticsearchServiceMock } from '@kbn/core-elasticsearch-server-mocks'; jest.mock('../llm/actions_client_llm'); @@ -66,6 +67,7 @@ describe('callAgentExecutor', () => { langChainMessages, logger: mockLogger, request: mockRequest, + kbResource: ESQL_RESOURCE, }); expect(ActionsClientLlm).toHaveBeenCalledWith({ @@ -84,6 +86,7 @@ describe('callAgentExecutor', () => { langChainMessages, logger: mockLogger, request: mockRequest, + kbResource: ESQL_RESOURCE, }); expect(mockCall).toHaveBeenCalledWith({ @@ -101,6 +104,7 @@ describe('callAgentExecutor', () => { langChainMessages: onlyOneMessage, logger: mockLogger, request: mockRequest, + kbResource: ESQL_RESOURCE, }); expect(mockCall).toHaveBeenCalledWith({ @@ -116,6 +120,7 @@ describe('callAgentExecutor', () => { langChainMessages, logger: mockLogger, request: mockRequest, + kbResource: ESQL_RESOURCE, }); expect(result).toEqual({ diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.ts index e1f0ed100e5ac..694bd44bfd471 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/execute_custom_llm_chain/index.ts @@ -24,6 +24,7 @@ export const callAgentExecutor = async ({ logger, request, elserId, + kbResource, }: AgentExecutorParams): AgentExecutorResponse => { const llm = new ActionsClientLlm({ actions, connectorId, request, llmType, logger }); @@ -39,7 +40,13 @@ export const callAgentExecutor = async ({ }); // ELSER backed ElasticsearchStore for Knowledge Base - const esStore = new ElasticsearchStore(esClient, KNOWLEDGE_BASE_INDEX_PATTERN, logger, elserId); + const esStore = new ElasticsearchStore( + esClient, + KNOWLEDGE_BASE_INDEX_PATTERN, + logger, + elserId, + kbResource + ); const chain = RetrievalQAChain.fromLLM(llm, esStore.asRetriever()); const tools: Tool[] = [ diff --git a/x-pack/plugins/elastic_assistant/server/lib/langchain/executors/types.ts b/x-pack/plugins/elastic_assistant/server/lib/langchain/executors/types.ts index e1e2817ee17a1..1c15ff8c97da4 100644 --- a/x-pack/plugins/elastic_assistant/server/lib/langchain/executors/types.ts +++ b/x-pack/plugins/elastic_assistant/server/lib/langchain/executors/types.ts @@ -16,6 +16,7 @@ export interface AgentExecutorParams { actions: ActionsPluginStart; connectorId: string; esClient: ElasticsearchClient; + kbResource: string | undefined; langChainMessages: BaseMessage[]; llmType?: string; logger: Logger; diff --git a/x-pack/plugins/elastic_assistant/server/routes/evaluate/post_evaluate.ts b/x-pack/plugins/elastic_assistant/server/routes/evaluate/post_evaluate.ts index ed3ff729b623e..b65822524f1cd 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/evaluate/post_evaluate.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/evaluate/post_evaluate.ts @@ -7,11 +7,11 @@ import { IRouter, KibanaRequest, Logger } from '@kbn/core/server'; import { transformError } from '@kbn/securitysolution-es-utils'; - import { v4 as uuidv4 } from 'uuid'; + +import { ESQL_RESOURCE } from '../knowledge_base/constants'; import { buildResponse } from '../../lib/build_response'; import { buildRouteValidation } from '../../schemas/common'; - import { ElasticAssistantRequestHandlerContext, GetElser } from '../../types'; import { EVALUATE } from '../../../common/constants'; import { PostEvaluateBody, PostEvaluatePathQuery } from '../../schemas/evaluate/post_evaluate'; @@ -126,6 +126,7 @@ export const postEvaluateRoute = ( llmType, logger, request: skeletonRequest, + kbResource: ESQL_RESOURCE, }) ); }); diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.test.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.test.ts new file mode 100644 index 0000000000000..7c4a9058e7df7 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.test.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getKbResource } from './get_kb_resource'; + +describe('getKbResource', () => { + it('returns undefined when the request is undefined', () => { + const result = getKbResource(undefined); + + expect(result).toBeUndefined(); + }); + + it('returns undefined when params is undefined', () => { + const request = { params: undefined }; + + const result = getKbResource(request); + + expect(result).toBeUndefined(); + }); + + it('returns undefined when resource is undefined', () => { + const request = { params: { resource: undefined } }; + + const result = getKbResource(request); + + expect(result).toBeUndefined(); + }); + + it('returns the decoded resource', () => { + const request = { params: { resource: 'esql%20query' } }; + + const result = getKbResource(request); + + expect(result).toEqual('esql query'); + }); +}); diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.ts new file mode 100644 index 0000000000000..a238a8f55d615 --- /dev/null +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_kb_resource.ts @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** + * A knowledge base REST request + */ +interface KbRequest { + params?: { + resource?: string; + }; +} + +/** + * Returns the optional resource, e.g. `esql` from the request params, or undefined if it doesn't exist + * + * @param request A REST request + * + * @returns Returns the optional resource, e.g. `esql` from the request params, or undefined if it doesn't exist + */ +export const getKbResource = (request: KbRequest | undefined): string | undefined => { + if (request?.params?.resource != null) { + return decodeURIComponent(request.params.resource); + } else { + return undefined; + } +}; diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts index 3bb1961bd0956..c2c616939677d 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/get_knowledge_base_status.ts @@ -8,6 +8,8 @@ import { IRouter } from '@kbn/core/server'; import { transformError } from '@kbn/securitysolution-es-utils'; import type { GetKnowledgeBaseStatusResponse } from '@kbn/elastic-assistant'; + +import { getKbResource } from './get_kb_resource'; import { buildResponse } from '../../lib/build_response'; import { buildRouteValidation } from '../../schemas/common'; import { ElasticAssistantRequestHandlerContext, GetElser } from '../../types'; @@ -42,17 +44,16 @@ export const getKnowledgeBaseStatusRoute = ( const logger = (await context.elasticAssistant).logger; try { - const kbResource = - request.params.resource != null ? decodeURIComponent(request.params.resource) : undefined; - // Get a scoped esClient for finding the status of the Knowledge Base index, pipeline, and documents const esClient = (await context.core).elasticsearch.client.asCurrentUser; const elserId = await getElser(request, (await context.core).savedObjects.getClient()); + const kbResource = getKbResource(request); const esStore = new ElasticsearchStore( esClient, KNOWLEDGE_BASE_INDEX_PATTERN, logger, - elserId + elserId, + kbResource ); const indexExists = await esStore.indexExists(); diff --git a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/post_knowledge_base.ts b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/post_knowledge_base.ts index 580c03e23a0cc..2ac938d3db45a 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/post_knowledge_base.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/knowledge_base/post_knowledge_base.ts @@ -7,12 +7,14 @@ import { IRouter } from '@kbn/core/server'; import { transformError } from '@kbn/securitysolution-es-utils'; + import { buildResponse } from '../../lib/build_response'; import { buildRouteValidation } from '../../schemas/common'; import { ElasticAssistantRequestHandlerContext, GetElser } from '../../types'; import { KNOWLEDGE_BASE } from '../../../common/constants'; import { ElasticsearchStore } from '../../lib/langchain/elasticsearch_store/elasticsearch_store'; import { ESQL_DOCS_LOADED_QUERY, ESQL_RESOURCE, KNOWLEDGE_BASE_INDEX_PATTERN } from './constants'; +import { getKbResource } from './get_kb_resource'; import { PostKnowledgeBasePathParams } from '../../schemas/knowledge_base/post_knowledge_base'; import { loadESQL } from '../../lib/langchain/content_loaders/esql_loader'; @@ -41,17 +43,16 @@ export const postKnowledgeBaseRoute = ( const logger = (await context.elasticAssistant).logger; try { - const kbResource = - request.params.resource != null ? decodeURIComponent(request.params.resource) : undefined; - // Get a scoped esClient for creating the Knowledge Base index, pipeline, and documents const esClient = (await context.core).elasticsearch.client.asCurrentUser; const elserId = await getElser(request, (await context.core).savedObjects.getClient()); + const kbResource = getKbResource(request); const esStore = new ElasticsearchStore( esClient, KNOWLEDGE_BASE_INDEX_PATTERN, logger, - elserId + elserId, + kbResource ); // Pre-check on index/pipeline diff --git a/x-pack/plugins/elastic_assistant/server/routes/post_actions_connector_execute.ts b/x-pack/plugins/elastic_assistant/server/routes/post_actions_connector_execute.ts index 8f620dac06faa..5303796d1c983 100644 --- a/x-pack/plugins/elastic_assistant/server/routes/post_actions_connector_execute.ts +++ b/x-pack/plugins/elastic_assistant/server/routes/post_actions_connector_execute.ts @@ -16,6 +16,7 @@ import { PostActionsConnectorExecutePathParams, } from '../schemas/post_actions_connector_execute'; import { ElasticAssistantRequestHandlerContext, GetElser } from '../types'; +import { ESQL_RESOURCE } from './knowledge_base/constants'; import { callAgentExecutor } from '../lib/langchain/execute_custom_llm_chain'; export const postActionsConnectorExecuteRoute = ( @@ -58,6 +59,7 @@ export const postActionsConnectorExecuteRoute = ( logger, request, elserId, + kbResource: ESQL_RESOURCE, }); return response.ok({ diff --git a/x-pack/plugins/security_solution/public/assistant/content/prompts/system/translations.ts b/x-pack/plugins/security_solution/public/assistant/content/prompts/system/translations.ts index 90c75a7405526..63fcd9c9995a9 100644 --- a/x-pack/plugins/security_solution/public/assistant/content/prompts/system/translations.ts +++ b/x-pack/plugins/security_solution/public/assistant/content/prompts/system/translations.ts @@ -41,7 +41,7 @@ export const FORMAT_OUTPUT_CORRECTLY = i18n.translate( 'xpack.securitySolution.assistant.content.prompts.system.outputFormatting', { defaultMessage: - 'If you answer a question related to KQL or EQL, it should be immediately usable within an Elastic Security timeline; please always format the output correctly with back ticks. Any answer provided for Query DSL should also be usable in a security timeline. This means you should only ever include the "filter" portion of the query.', + 'If you answer a question related to KQL, EQL, or ES|QL, it should be immediately usable within an Elastic Security timeline; please always format the output correctly with back ticks. Any answer provided for Query DSL should also be usable in a security timeline. This means you should only ever include the "filter" portion of the query.', } );