From fa1998ce92715f5c7b718ba6972ee8f8938bfa4e Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Wed, 11 Dec 2024 12:35:01 +0100 Subject: [PATCH] [RCA] AI-assisted root cause analysis (#197200) Implements an LLM-based root cause analysis process. At a high level, it works by investigating entities - which means pulling in alerts, SLOs, and log patterns. From there, it can inspect related entities to get to the root cause. The backend implementation lives in `x-pack/packages/observability_utils-*` (`service_rca`). It can be imported into any server-side plugin and executed from there. The UI changes are mostly contained to `x-pack/plugins/observability_solution/observabillity_ai_assistant_app`. This plugin now exports a `RootCauseAnalysisContainer` which takes a stream of data that is returned by the root cause analysis process. The current implementation lives in the Investigate app. There, it calls its own endpoint that kicks off the RCA process, and feeds it into the `RootCauseAnalysisContainer` exposed by the Observability AI Assistant app plugin. I've left it in a route there so the investigation itself can be updated as the process runs - this would allow the user to close the browser and come back later, and see a full investigation. > [!NOTE] > Notes for reviewing teams > > @kbn/es-types: > - support both types and typesWithBodyKey > - simplify KeysOfSources type > > @kbn/server-route-repository: > - abortable streamed responses > > @kbn/sse-utils*: > - abortable streamed responses > - serialize errors in specific format for more reliable re-hydration of errors > - keep connection open with SSE comments > > @kbn/inference-*: > - export *Of variants of types, for easier manual inference > - add automated retries for `output` API > - add `name` to tool responses for type inference (get type of tool response via tool name) > - add `data` to tool responses for transporting internal data (not sent to the LLM) > - simplify `chunksIntoMessage` > - allow consumers of nlToEsql task to add to `system` prompt > - add toolCallId to validation error message > > @kbn/aiops*: > - export `categorizationAnalyzer` for use in observability-ai* > > @kbn/observability-ai-assistant* > - configurable limit (tokens or doc count) for knowledge base recall > > @kbn/slo*: > - export client that returns summary indices --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Maryam Saeidi Co-authored-by: Bena Kansara --- .eslintrc.js | 2 + .github/CODEOWNERS | 2 + package.json | 3 + packages/kbn-es-types/index.ts | 1 + packages/kbn-es-types/src/index.ts | 2 + packages/kbn-es-types/src/search.ts | 21 +- .../src/rest_specs/update.ts | 3 + .../src/schema/investigation.ts | 5 + .../src/register_routes.ts | 9 +- ...bservable_into_event_source_stream.test.ts | 198 + .../observable_into_event_source_stream.ts | 71 +- packages/kbn-sse-utils-server/tsconfig.json | 1 + packages/kbn-sse-utils/README.md | 3 +- tsconfig.base.json | 4 + .../observability_ai_common/jest.config.js | 15 + .../observability_ai_common/kibana.jsonc | 7 + .../observability_ai_common/package.json | 6 + .../root_cause_analysis/index.ts | 12 + .../root_cause_analysis/tool_names.ts | 10 + .../observability_ai_common/tsconfig.json | 20 + .../observability_ai_server/jest.config.js | 12 + .../observability_ai_server/kibana.jsonc | 7 + .../observability_ai_server/package.json | 6 + .../call_end_rca_process_tool.ts | 51 + .../call_investigate_entity_tool.ts | 80 + .../root_cause_analysis/call_observe_tool.ts | 91 + .../empty_assistant_message.ts | 15 + .../root_cause_analysis/index.ts | 20 + .../root_cause_analysis/prompts/index.ts | 345 + .../run_root_cause_analysis.ts | 305 + .../tasks/analyze_log_patterns/index.ts | 402 + .../tasks/describe_entity/index.ts | 74 + .../tasks/describe_log_patterns/index.ts | 189 + .../analyze_fetched_related_entities.ts | 438 + .../extract_related_entities.ts | 159 + .../tasks/find_related_entities/index.ts | 97 + ...e_keyword_searches_for_related_entities.ts | 199 + .../tasks/generate_timeline/index.ts | 96 + .../tasks/get_knowledge_base_entries/index.ts | 185 + .../tasks/investigate_entity/index.ts | 268 + .../tasks/investigate_entity/prompts.ts | 22 + .../tasks/investigate_entity/types.ts | 31 + .../observe_investigation_results/index.ts | 239 + .../index.ts | 84 + .../tasks/write_final_report/index.ts | 191 + .../root_cause_analysis/tools.ts | 77 + .../root_cause_analysis/types.ts | 101 + .../root_cause_analysis/util/call_tools.ts | 177 + .../util/chunk_output_calls.ts | 97 + .../root_cause_analysis/util/format_entity.ts | 12 + .../get_previously_investigated_entities.ts | 22 + .../util/serialize_knowledge_base_entries.ts | 34 + .../util/stringify_summaries.ts | 47 + .../root_cause_analysis/util/to_blockquote.ts | 13 + .../validate_investigate_entity_tool_call.ts | 124 + .../observability_ai_server/tsconfig.json | 29 + .../entities/get_entities_by_fuzzy_search.ts | 50 + .../entities/get_log_patterns.ts | 405 + .../observability_utils_server/tsconfig.json | 2 + .../shared/ai-infra/inference-common/index.ts | 3 + .../src/chat_complete/index.ts | 3 + .../src/chat_complete/messages.ts | 67 +- .../src/chat_complete/tools.ts | 23 +- .../inference-common/src/output/api.ts | 11 + .../ai-infra/product-doc-common/package.json | 2 +- .../categorization_analyzer.ts | 72 + .../create_category_request.ts | 70 +- .../translations/translations/fr-FR.json | 1 - .../translations/translations/ja-JP.json | 1 - .../translations/translations/zh-CN.json | 1 - .../common/output/create_output_api.test.ts | 77 + .../common/output/create_output_api.ts | 89 +- .../bedrock/bedrock_claude_adapter.test.ts | 1 + .../adapters/gemini/gemini_adapter.test.ts | 1 + .../adapters/openai/openai_adapter.test.ts | 1 + .../adapters/openai/openai_adapter.ts | 2 - .../inference/server/chat_complete/api.ts | 18 +- .../inference/server/chat_complete/errors.ts | 2 +- .../wrap_with_simulated_function_calling.ts | 2 +- .../utils/chunks_into_message.test.ts | 2 +- .../utils/chunks_into_message.ts | 62 +- .../inference/server/routes/chat_complete.ts | 2 + .../tasks/nl_to_esql/actions/generate_esql.ts | 5 +- .../inference/server/tasks/nl_to_esql/task.ts | 2 + .../server/tasks/nl_to_esql/types.ts | 1 + .../server/util/validate_tool_calls.test.ts | 11 +- .../server/util/validate_tool_calls.ts | 3 +- .../inventory/tsconfig.json | 3 + .../get_mock_investigate_app_services.tsx | 5 + .../investigate_app/kibana.jsonc | 12 +- .../investigate_app/public/api/index.ts | 27 +- .../assistant_hypothesis.tsx | 266 +- .../investigate_app/public/plugin.tsx | 6 + .../investigate_app/public/services/types.ts | 2 + .../investigate_app/public/types.ts | 6 + ...investigate_app_server_route_repository.ts | 20 +- .../server/routes/rca/route.ts | 163 + .../investigate_app/server/types.ts | 28 +- .../investigate_app/tsconfig.json | 88 +- .../common/convert_messages_for_inference.ts | 1 + .../components/message_panel/message_text.tsx | 9 +- .../server/index.ts | 5 + .../server/service/client/index.test.ts | 5 +- .../server/service/client/index.ts | 3 + .../service/knowledge_base_service/index.ts | 8 +- .../server/types.ts | 3 +- .../observability_ai_assistant/tsconfig.json | 1 + .../.storybook/storybook_decorator.tsx | 11 + .../kibana.jsonc | 4 +- .../public/components/charts/spark_plot.tsx | 43 +- .../components/rca/entity_badge/index.tsx | 26 + .../mock/complete_root_cause_analysis.json | 19532 ++++++++++++++++ .../public/components/rca/mock/index.ts | 26 + .../rca/rca_callout/index.stories.tsx | 37 + .../components/rca/rca_callout/index.tsx | 106 + .../rca/rca_collapsible_panel/index.tsx | 31 + .../rca/rca_container/index.stories.tsx | 59 + .../components/rca/rca_container/index.tsx | 338 + .../index.stories.tsx | 31 + .../rca/rca_entity_investigation/index.tsx | 57 + .../index.stories.tsx | 30 + .../rca_entity_log_pattern_table/index.tsx | 299 + .../rca_observation_panel/index.stories.tsx | 36 + .../rca/rca_observation_panel/index.tsx | 64 + .../public/components/rca/rca_panel/index.tsx | 36 + .../rca/rca_report/index.stories.tsx | 31 + .../components/rca/rca_report/index.tsx | 42 + .../components/rca/rca_step/index.stories.tsx | 60 + .../public/components/rca/rca_step/index.tsx | 73 + .../components/rca/rca_stop_button/index.tsx | 26 + .../rca/rca_task_step/index.stories.tsx | 65 + .../components/rca/rca_task_step/index.tsx | 37 + .../public/index.ts | 2 + .../public/plugin.tsx | 30 +- .../public/types.ts | 8 +- ...bservability_ai_assistant_app_es_client.ts | 25 + .../server/types.ts | 6 + .../server/util/get_log_sources.ts | 13 + .../tsconfig.json | 10 +- .../slo/server/client/index.ts | 40 + .../slo/server/index.ts | 4 + .../slo/server/plugin.ts | 12 +- .../slo/server/types.ts | 7 +- yarn.lock | 13 + 144 files changed, 27293 insertions(+), 364 deletions(-) create mode 100644 packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/package.json create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/package.json create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts create mode 100644 x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json create mode 100644 x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts create mode 100644 x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts create mode 100644 x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts create mode 100644 x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/.storybook/storybook_decorator.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/entity_badge/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/complete_root_cause_analysis.json create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/index.ts create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_collapsible_panel/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_panel/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_stop_button/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.stories.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.tsx create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/server/clients/create_observability_ai_assistant_app_es_client.ts create mode 100644 x-pack/plugins/observability_solution/observability_ai_assistant_app/server/util/get_log_sources.ts create mode 100644 x-pack/plugins/observability_solution/slo/server/client/index.ts diff --git a/.eslintrc.js b/.eslintrc.js index 0fe79513a9948..2bdcaf9901474 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -919,6 +919,7 @@ module.exports = { 'x-pack/plugins/observability_solution/exploratory_view/**/*.{js,mjs,ts,tsx}', 'x-pack/plugins/observability_solution/ux/**/*.{js,mjs,ts,tsx}', 'x-pack/plugins/observability_solution/slo/**/*.{js,mjs,ts,tsx}', + 'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}', ], rules: { 'no-console': ['warn', { allow: ['error'] }], @@ -938,6 +939,7 @@ module.exports = { 'x-pack/plugins/observability_solution/observability/**/*.stories.*', 'x-pack/plugins/observability_solution/exploratory_view/**/*.stories.*', 'x-pack/plugins/observability_solution/slo/**/*.stories.*', + 'x-pack/packages/observability/**/*.{js,mjs,ts,tsx}', ], rules: { 'react/function-component-definition': [ diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 901fbc18cbbc0..087ffceddc04c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -800,6 +800,8 @@ x-pack/packages/observability/alerting_rule_utils @elastic/obs-ux-management-tea x-pack/packages/observability/alerting_test_data @elastic/obs-ux-management-team x-pack/packages/observability/get_padded_alert_time_range_util @elastic/obs-ux-management-team x-pack/packages/observability/logs_overview @elastic/obs-ux-logs-team +x-pack/packages/observability/observability_ai/observability_ai_common @elastic/obs-ai-assistant +x-pack/packages/observability/observability_ai/observability_ai_server @elastic/obs-ai-assistant x-pack/packages/observability/observability_utils/observability_utils_browser @elastic/observability-ui x-pack/packages/observability/observability_utils/observability_utils_common @elastic/observability-ui x-pack/packages/observability/observability_utils/observability_utils_server @elastic/observability-ui diff --git a/package.json b/package.json index a5687c0ae3729..cfa62b257ec44 100644 --- a/package.json +++ b/package.json @@ -695,6 +695,8 @@ "@kbn/observability-ai-assistant-app-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_app", "@kbn/observability-ai-assistant-management-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant_management", "@kbn/observability-ai-assistant-plugin": "link:x-pack/plugins/observability_solution/observability_ai_assistant", + "@kbn/observability-ai-common": "link:x-pack/packages/observability/observability_ai/observability_ai_common", + "@kbn/observability-ai-server": "link:x-pack/packages/observability/observability_ai/observability_ai_server", "@kbn/observability-alert-details": "link:x-pack/packages/observability/alert_details", "@kbn/observability-alerting-rule-utils": "link:x-pack/packages/observability/alerting_rule_utils", "@kbn/observability-alerting-test-data": "link:x-pack/packages/observability/alerting_test_data", @@ -1145,6 +1147,7 @@ "fnv-plus": "^1.3.1", "formik": "^2.4.6", "fp-ts": "^2.3.1", + "fuse.js": "^7.0.0", "get-port": "^5.0.0", "getopts": "^2.2.5", "getos": "^3.1.0", diff --git a/packages/kbn-es-types/index.ts b/packages/kbn-es-types/index.ts index 683fddb541baf..d4ba23840e2a6 100644 --- a/packages/kbn-es-types/index.ts +++ b/packages/kbn-es-types/index.ts @@ -13,6 +13,7 @@ export type { SearchHit, ESSearchResponse, ESSearchRequest, + ESSearchRequestWithoutBody, ESSourceOptions, InferSearchResponseOf, AggregationResultOf, diff --git a/packages/kbn-es-types/src/index.ts b/packages/kbn-es-types/src/index.ts index 2ec37ba474789..77d02320b6f2d 100644 --- a/packages/kbn-es-types/src/index.ts +++ b/packages/kbn-es-types/src/index.ts @@ -8,6 +8,7 @@ */ import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import * as estypesWithoutBody from '@elastic/elasticsearch/lib/api/types'; import type { Field, QueryDslFieldAndFormat, @@ -26,6 +27,7 @@ import { export type ESFilter = estypes.QueryDslQueryContainer; export type ESSearchRequest = estypes.SearchRequest; +export type ESSearchRequestWithoutBody = estypesWithoutBody.SearchRequest; export type AggregationOptionsByType = Required; // Typings for Elasticsearch queries and aggregations. These are intended to be diff --git a/packages/kbn-es-types/src/search.ts b/packages/kbn-es-types/src/search.ts index d3675e04c2663..1c9a9e16fd4a7 100644 --- a/packages/kbn-es-types/src/search.ts +++ b/packages/kbn-es-types/src/search.ts @@ -23,20 +23,15 @@ type InvalidAggregationRequest = unknown; // Union keys are not included in keyof, but extends iterates over the types in a union. type ValidAggregationKeysOf> = T extends T ? keyof T : never; -type KeyOfSource = Record< - keyof T, - (T extends Record ? null : never) | string | number ->; +type KeyOfSource = { + [key in keyof T]: + | (T[key] extends Record ? null : never) + | string + | number; +}; -type KeysOfSources = T extends [any] - ? KeyOfSource - : T extends [any, any] - ? KeyOfSource & KeyOfSource - : T extends [any, any, any] - ? KeyOfSource & KeyOfSource & KeyOfSource - : T extends [any, any, any, any] - ? KeyOfSource & KeyOfSource & KeyOfSource & KeyOfSource - : Record; +// convert to intersection to be able to get all the keys +type KeysOfSources = UnionToIntersection>>>; type CompositeKeysOf = TAggregationContainer extends { diff --git a/packages/kbn-investigation-shared/src/rest_specs/update.ts b/packages/kbn-investigation-shared/src/rest_specs/update.ts index cab773d9549a2..42cf1539d2b4d 100644 --- a/packages/kbn-investigation-shared/src/rest_specs/update.ts +++ b/packages/kbn-investigation-shared/src/rest_specs/update.ts @@ -24,6 +24,9 @@ const updateInvestigationParamsSchema = z.object({ }), tags: z.array(z.string()), externalIncidentUrl: z.string().nullable(), + rootCauseAnalysis: z.object({ + events: z.array(z.any()), + }), }) .partial(), }); diff --git a/packages/kbn-investigation-shared/src/schema/investigation.ts b/packages/kbn-investigation-shared/src/schema/investigation.ts index 751f1a20048a5..23806c23e94a6 100644 --- a/packages/kbn-investigation-shared/src/schema/investigation.ts +++ b/packages/kbn-investigation-shared/src/schema/investigation.ts @@ -35,6 +35,11 @@ const investigationSchema = z.object({ notes: z.array(investigationNoteSchema), items: z.array(investigationItemSchema), externalIncidentUrl: z.string().nullable(), + rootCauseAnalysis: z + .object({ + events: z.array(z.any()), + }) + .optional(), }); type Status = z.infer; diff --git a/packages/kbn-server-route-repository/src/register_routes.ts b/packages/kbn-server-route-repository/src/register_routes.ts index 6201ffcd869ea..90c4f42b9ce44 100644 --- a/packages/kbn-server-route-repository/src/register_routes.ts +++ b/packages/kbn-server-route-repository/src/register_routes.ts @@ -98,8 +98,15 @@ export function registerRoutes>({ if (isKibanaResponse(result)) { return result; } else if (isObservable(result)) { + const controller = new AbortController(); + request.events.aborted$.subscribe(() => { + controller.abort(); + }); return response.ok({ - body: observableIntoEventSourceStream(result as Observable), + body: observableIntoEventSourceStream(result as Observable, { + logger, + signal: controller.signal, + }), }); } else { const body = result || {}; diff --git a/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts new file mode 100644 index 0000000000000..9f4f8ffa84284 --- /dev/null +++ b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.test.ts @@ -0,0 +1,198 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +import { Logger } from '@kbn/logging'; +import { observableIntoEventSourceStream } from './observable_into_event_source_stream'; +import { PassThrough } from 'node:stream'; +import { Subject } from 'rxjs'; +import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events'; +import { + ServerSentEventErrorCode, + createSSEInternalError, + createSSERequestError, +} from '@kbn/sse-utils/src/errors'; + +describe('observableIntoEventSourceStream', () => { + let logger: jest.Mocked; + + let controller: AbortController; + + let stream: PassThrough; + let source$: Subject; + + let data: string[]; + + beforeEach(() => { + jest.useFakeTimers(); + logger = { + debug: jest.fn(), + error: jest.fn(), + } as unknown as jest.Mocked; + + controller = new AbortController(); + source$ = new Subject(); + data = []; + + stream = observableIntoEventSourceStream(source$, { logger, signal: controller.signal }); + stream.on('data', (chunk) => { + data.push(chunk.toString()); + }); + }); + + afterEach(() => { + jest.clearAllTimers(); + }); + + it('writes events into the stream in SSE format', () => { + source$.next({ type: ServerSentEventType.data, data: { foo: 'bar' } }); + source$.complete(); + + jest.runAllTimers(); + + expect(data).toEqual(['event: data\ndata: {"data":{"foo":"bar"}}\n\n']); + }); + + it('handles SSE errors', () => { + const sseError = createSSEInternalError('Invalid input'); + + source$.error(sseError); + + jest.runAllTimers(); + + expect(logger.error).toHaveBeenCalledWith(sseError); + expect(logger.debug).toHaveBeenCalled(); + const debugFn = logger.debug.mock.calls[0][0] as () => string; + const loggedError = JSON.parse(debugFn()); + expect(loggedError).toEqual({ + type: 'error', + error: { + code: ServerSentEventErrorCode.internalError, + message: 'Invalid input', + meta: {}, + }, + }); + + expect(data).toEqual([ + `event: error\ndata: ${JSON.stringify({ + error: { + code: ServerSentEventErrorCode.internalError, + message: 'Invalid input', + meta: {}, + }, + })}\n\n`, + ]); + }); + + it('handles SSE errors with metadata', () => { + const sseError = createSSERequestError('Invalid request', 400); + + source$.error(sseError); + + jest.runAllTimers(); + + expect(logger.error).toHaveBeenCalledWith(sseError); + expect(logger.debug).toHaveBeenCalled(); + const debugFn = logger.debug.mock.calls[0][0] as () => string; + const loggedError = JSON.parse(debugFn()); + expect(loggedError).toEqual({ + type: 'error', + error: { + code: ServerSentEventErrorCode.requestError, + message: 'Invalid request', + meta: { + status: 400, + }, + }, + }); + + expect(data).toEqual([ + `event: error\ndata: ${JSON.stringify({ + error: { + code: ServerSentEventErrorCode.requestError, + message: 'Invalid request', + meta: { + status: 400, + }, + }, + })}\n\n`, + ]); + }); + + it('handles non-SSE errors', () => { + const error = new Error('Non-SSE Error'); + + source$.error(error); + + jest.runAllTimers(); + + expect(logger.error).toHaveBeenCalledWith(error); + expect(data).toEqual([ + `event: error\ndata: ${JSON.stringify({ + error: { + code: ServerSentEventErrorCode.internalError, + message: 'Non-SSE Error', + }, + })}\n\n`, + ]); + }); + + it('should send keep-alive comments every 10 seconds', () => { + jest.advanceTimersByTime(10000); + expect(data).toContain(': keep-alive'); + + jest.advanceTimersByTime(10000); + expect(data.filter((d) => d === ': keep-alive')).toHaveLength(2); + }); + + describe('without fake timers', () => { + beforeEach(() => { + jest.useFakeTimers({ doNotFake: ['nextTick'] }); + }); + + it('should end the stream when the observable completes', async () => { + jest.useFakeTimers({ doNotFake: ['nextTick'] }); + + const endSpy = jest.fn(); + stream.on('end', endSpy); + + source$.complete(); + + await new Promise((resolve) => process.nextTick(resolve)); + + expect(endSpy).toHaveBeenCalled(); + }); + + it('should end stream when signal is aborted', async () => { + const endSpy = jest.fn(); + stream.on('end', endSpy); + + // Emit some data + source$.next({ type: ServerSentEventType.data, data: { initial: 'data' } }); + + // Abort the signal + controller.abort(); + + // Emit more data after abort + source$.next({ type: ServerSentEventType.data, data: { after: 'abort' } }); + + await new Promise((resolve) => process.nextTick(resolve)); + + expect(endSpy).toHaveBeenCalled(); + + // Data after abort should not be received + expect(data).toEqual([ + `event: data\ndata: ${JSON.stringify({ data: { initial: 'data' } })}\n\n`, + ]); + }); + + afterEach(() => { + jest.useFakeTimers(); + }); + }); +}); diff --git a/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts index e0d653e44dabc..0a71cd60192e6 100644 --- a/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts +++ b/packages/kbn-sse-utils-server/src/observable_into_event_source_stream.ts @@ -7,12 +7,51 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -import { map, Observable } from 'rxjs'; +import { Logger } from '@kbn/logging'; +import { + isSSEError, + ServerSentErrorEvent, + ServerSentEventErrorCode, +} from '@kbn/sse-utils/src/errors'; +import { ServerSentEvent, ServerSentEventType } from '@kbn/sse-utils/src/events'; +import { catchError, map, Observable, of } from 'rxjs'; import { PassThrough } from 'stream'; -import { ServerSentEvent } from '@kbn/sse-utils'; -export function observableIntoEventSourceStream(source$: Observable): PassThrough { - const withSerializedEvents$ = source$.pipe( +export function observableIntoEventSourceStream( + source$: Observable, + { + logger, + signal, + }: { + logger: Pick; + signal: AbortSignal; + } +) { + const withSerializedErrors$ = source$.pipe( + catchError((error): Observable => { + if (isSSEError(error)) { + logger.error(error); + logger.debug(() => JSON.stringify(error)); + return of({ + type: ServerSentEventType.error, + error: { + code: error.code, + message: error.message, + meta: error.meta, + }, + }); + } + + logger.error(error); + + return of({ + type: ServerSentEventType.error, + error: { + code: ServerSentEventErrorCode.internalError, + message: error.message as string, + }, + }); + }), map((event) => { const { type, ...rest } = event; return `event: ${type}\ndata: ${JSON.stringify(rest)}\n\n`; @@ -21,18 +60,38 @@ export function observableIntoEventSourceStream(source$: Observable { + // `:` denotes a comment - this is to keep the connection open + // it will be ignored by the SSE parser on the client + stream.write(': keep-alive'); + }, 10000); + + const subscription = withSerializedErrors$.subscribe({ next: (line) => { stream.write(line); }, complete: () => { stream.end(); + clearTimeout(intervalId); }, error: (error) => { - stream.write(`event: error\ndata: ${JSON.stringify(error)}\n\n`); + clearTimeout(intervalId); + stream.write( + `event:error\ndata: ${JSON.stringify({ + error: { + code: ServerSentEventErrorCode.internalError, + message: error.message, + }, + })}\n\n` + ); stream.end(); }, }); + signal.addEventListener('abort', () => { + subscription.unsubscribe(); + stream.end(); + }); + return stream; } diff --git a/packages/kbn-sse-utils-server/tsconfig.json b/packages/kbn-sse-utils-server/tsconfig.json index 9053749c5898b..7f9b7b7e8f52f 100644 --- a/packages/kbn-sse-utils-server/tsconfig.json +++ b/packages/kbn-sse-utils-server/tsconfig.json @@ -15,5 +15,6 @@ ], "kbn_references": [ "@kbn/sse-utils", + "@kbn/logging", ] } diff --git a/packages/kbn-sse-utils/README.md b/packages/kbn-sse-utils/README.md index ad6dbf8b67c00..948376fb7e4e5 100644 --- a/packages/kbn-sse-utils/README.md +++ b/packages/kbn-sse-utils/README.md @@ -21,7 +21,8 @@ function myRequestHandler( data: { anyData: {}, }, - }) + }), + logger ), }); } diff --git a/tsconfig.base.json b/tsconfig.base.json index 6daea879e4151..1d7528bfd3e14 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -1322,6 +1322,10 @@ "@kbn/observability-ai-assistant-management-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant_management/*"], "@kbn/observability-ai-assistant-plugin": ["x-pack/plugins/observability_solution/observability_ai_assistant"], "@kbn/observability-ai-assistant-plugin/*": ["x-pack/plugins/observability_solution/observability_ai_assistant/*"], + "@kbn/observability-ai-common": ["x-pack/packages/observability/observability_ai/observability_ai_common"], + "@kbn/observability-ai-common/*": ["x-pack/packages/observability/observability_ai/observability_ai_common/*"], + "@kbn/observability-ai-server": ["x-pack/packages/observability/observability_ai/observability_ai_server"], + "@kbn/observability-ai-server/*": ["x-pack/packages/observability/observability_ai/observability_ai_server/*"], "@kbn/observability-alert-details": ["x-pack/packages/observability/alert_details"], "@kbn/observability-alert-details/*": ["x-pack/packages/observability/alert_details/*"], "@kbn/observability-alerting-rule-utils": ["x-pack/packages/observability/alerting_rule_utils"], diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js b/x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js new file mode 100644 index 0000000000000..d99760c04c1c0 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/jest.config.js @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +module.exports = { + preset: '@kbn/test', + rootDir: '../../../../..', + roots: [ + '/x-pack/packages/observability/observability_ai/observability_ai_common', + '/x-pack/packages/observability/observability_ai/observability_ai_server', + ], +}; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc b/x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc new file mode 100644 index 0000000000000..731f38d6bfe88 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/kibana.jsonc @@ -0,0 +1,7 @@ +{ + "type": "shared-common", + "id": "@kbn/observability-ai-common", + "owner": "@elastic/obs-ai-assistant", + "group": "observability", + "visibility": "private" +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/package.json b/x-pack/packages/observability/observability_ai/observability_ai_common/package.json new file mode 100644 index 0000000000000..0f5f1062937ba --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/package.json @@ -0,0 +1,6 @@ +{ + "name": "@kbn/observability-ai-common", + "private": true, + "version": "1.0.0", + "license": "Elastic License 2.0" +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts new file mode 100644 index 0000000000000..8d052799aa583 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/index.ts @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { + RCA_END_PROCESS_TOOL_NAME, + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from './tool_names'; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts new file mode 100644 index 0000000000000..76fe9e377daed --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/root_cause_analysis/tool_names.ts @@ -0,0 +1,10 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const RCA_OBSERVE_TOOL_NAME = 'observe'; +export const RCA_END_PROCESS_TOOL_NAME = 'endProcessAndWriteReport'; +export const RCA_INVESTIGATE_ENTITY_TOOL_NAME = 'investigateEntity'; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json b/x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json new file mode 100644 index 0000000000000..bb7b89b5671a0 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_common/tsconfig.json @@ -0,0 +1,20 @@ +{ + "extends": "../../../../../tsconfig.base.json", + "compilerOptions": { + "outDir": "target/types", + "types": [ + "jest", + "node", + "react" + ] + }, + "include": [ + "**/*.ts", + "**/*.tsx", + ], + "exclude": [ + "target/**/*" + ], + "kbn_references": [ + ] +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js b/x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js new file mode 100644 index 0000000000000..f395d4bf3bb03 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/jest.config.js @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +module.exports = { + preset: '@kbn/test', + rootDir: '../../../../..', + roots: ['/x-pack/packages/observability/observability_ai/observability_ai_server'], +}; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc b/x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc new file mode 100644 index 0000000000000..6eb48a95a5624 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/kibana.jsonc @@ -0,0 +1,7 @@ +{ + "type": "shared-server", + "id": "@kbn/observability-ai-server", + "owner": "@elastic/obs-ai-assistant", + "group": "observability", + "visibility": "private" +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/package.json b/x-pack/packages/observability/observability_ai/observability_ai_server/package.json new file mode 100644 index 0000000000000..fc6fc310801dc --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/package.json @@ -0,0 +1,6 @@ +{ + "name": "@kbn/observability-ai-server", + "private": true, + "version": "1.0.0", + "license": "Elastic License 2.0" +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts new file mode 100644 index 0000000000000..a1b546d2629a4 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_end_rca_process_tool.ts @@ -0,0 +1,51 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { from, Observable, of, switchMap } from 'rxjs'; +import { RCA_END_PROCESS_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { AssistantMessage, MessageRole } from '@kbn/inference-common'; +import { writeFinalReport } from './tasks/write_final_report'; +import { EndProcessToolMessage, RootCauseAnalysisContext } from './types'; +import { generateSignificantEventsTimeline } from './tasks/generate_timeline'; +import { EMPTY_ASSISTANT_MESSAGE } from './empty_assistant_message'; + +export function callEndRcaProcessTool({ + rcaContext, + toolCallId, +}: { + rcaContext: RootCauseAnalysisContext; + toolCallId: string; +}): Observable { + return from( + writeFinalReport({ + rcaContext, + }) + ).pipe( + switchMap((report) => { + return from( + generateSignificantEventsTimeline({ + rcaContext, + report, + }).then((timeline) => { + return { timeline, report }; + }) + ); + }), + switchMap(({ report, timeline }) => { + const toolMessage: EndProcessToolMessage = { + name: RCA_END_PROCESS_TOOL_NAME, + role: MessageRole.Tool, + toolCallId, + response: { + report, + timeline, + }, + }; + return of(toolMessage, EMPTY_ASSISTANT_MESSAGE); + }) + ); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts new file mode 100644 index 0000000000000..c22d28d7389fb --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_investigate_entity_tool.ts @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { from, Observable, of, switchMap } from 'rxjs'; +import { MessageRole } from '@kbn/inference-common'; +import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { InvestigateEntityToolMessage, RootCauseAnalysisContext, ToolErrorMessage } from './types'; +import { investigateEntity } from './tasks/investigate_entity'; +import { formatEntity } from './util/format_entity'; + +export function callInvestigateEntityTool({ + field, + value, + context, + toolCallId, + rcaContext, +}: { + field: string; + value: string; + context: string; + toolCallId: string; + rcaContext: RootCauseAnalysisContext; +}): Observable { + const nextEntity = { + [field]: value, + }; + + return from( + investigateEntity({ + rcaContext, + entity: nextEntity, + context, + }) + ).pipe( + switchMap((entityInvestigation) => { + if (!entityInvestigation) { + const entityNotFoundToolMessage: ToolErrorMessage = { + name: 'error', + role: MessageRole.Tool, + response: { + error: { + message: `Entity ${formatEntity(nextEntity)} not found, have + you verified it exists and if the field and value you are using + are correct?`, + }, + }, + toolCallId, + }; + + return of(entityNotFoundToolMessage); + } + + const { + attachments, + relatedEntities, + entity: investigatedEntity, + summary, + } = entityInvestigation; + const toolMessage: InvestigateEntityToolMessage = { + name: RCA_INVESTIGATE_ENTITY_TOOL_NAME, + role: MessageRole.Tool as const, + toolCallId, + response: { + entity: investigatedEntity, + relatedEntities, + summary, + }, + data: { + attachments, + }, + }; + + return of(toolMessage); + }) + ); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts new file mode 100644 index 0000000000000..06676abd729f0 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/call_observe_tool.ts @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { AssistantMessage, MessageRole } from '@kbn/inference-common'; +import { + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from '@kbn/observability-ai-common/root_cause_analysis'; +import { compact, findLast } from 'lodash'; +import { from, Observable, of, switchMap } from 'rxjs'; +import { observeInvestigationResults } from './tasks/observe_investigation_results'; +import { + InvestigateEntityToolMessage, + ObservationToolMessage, + RootCauseAnalysisContext, + RootCauseAnalysisEvent, +} from './types'; + +export function callObserveTool({ + rcaContext, + toolCallId, +}: { + rcaContext: RootCauseAnalysisContext; + toolCallId: string; +}): Observable { + const { events } = rcaContext; + + const lastAssistantMessage = findLast( + events.slice(0, -1), + (event): event is Extract => + event.role === MessageRole.Assistant + ); + + const toolMessagesByToolCallId = Object.fromEntries( + compact( + events.map((message) => + 'toolCallId' in message && + (message.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME || message.name === 'error') + ? [message.toolCallId, message] + : undefined + ) + ) + ); + + const investigationToolMessages = + lastAssistantMessage && lastAssistantMessage.toolCalls + ? compact( + lastAssistantMessage.toolCalls.map((investigateEntityToolCall) => { + if (investigateEntityToolCall.function.name !== RCA_INVESTIGATE_ENTITY_TOOL_NAME) { + return undefined; + } + return { + toolCall: investigateEntityToolCall, + toolResponse: toolMessagesByToolCallId[investigateEntityToolCall.toolCallId], + }; + }) + ) + : []; + + const investigations = investigationToolMessages + .map((toolMessage) => toolMessage.toolResponse) + .filter( + (toolResponse): toolResponse is InvestigateEntityToolMessage => + toolResponse.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME + ) + .map((toolResponse) => ({ ...toolResponse.data, ...toolResponse.response })); + + return from( + observeInvestigationResults({ + rcaContext, + investigations, + }) + ).pipe( + switchMap((summary) => { + const observationToolMessage: ObservationToolMessage = { + name: RCA_OBSERVE_TOOL_NAME, + response: { + content: summary.content, + }, + data: summary, + role: MessageRole.Tool, + toolCallId, + }; + return of(observationToolMessage); + }) + ); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts new file mode 100644 index 0000000000000..08443d593a81a --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/empty_assistant_message.ts @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { AssistantMessage, MessageRole } from '@kbn/inference-common'; +import { RootCauseAnalysisEvent } from './types'; + +export const EMPTY_ASSISTANT_MESSAGE: Extract = { + content: '', + role: MessageRole.Assistant, + toolCalls: [], +}; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts new file mode 100644 index 0000000000000..66307dc2ef9a4 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/index.ts @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export type { + RootCauseAnalysisEvent, + InvestigateEntityToolMessage, + EndProcessToolMessage, + ObservationToolMessage, + RootCauseAnalysisToolMessage, + ToolErrorMessage, + RootCauseAnalysisToolRequest, +} from './types'; +export type { SignificantEventsTimeline, SignificantEvent } from './tasks/generate_timeline'; +export type { EntityInvestigation } from './tasks/investigate_entity'; + +export { runRootCauseAnalysis } from './run_root_cause_analysis'; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts new file mode 100644 index 0000000000000..8a0a6a9064700 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/prompts/index.ts @@ -0,0 +1,345 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export const RCA_SYSTEM_PROMPT_BASE = `You are a helpful assistant for Elastic Observability. +You are a distinguished SRE, who has an established career, working in both +small shops and FAANG-level companies. You have worked with Elasticsearch +since the beginning and expertly use it in your analysis of incidents. + +You use an evidence-based strategy to determine the root cause of +an incident. You thoroughly analyze Observability data. You use your +understanding of different architectures like microservies, monoliths, +event-driven systems, and environments like Kubernetes to discover +patterns and correlations in the data ingested into the user's system. + +Your sizable experience with monitoring software systems has taught +you how to investigate issues and correlate symptoms of the investigate +service with its dependencies. + +## Capabilities + +You are highly skilled at inspecting logs, traces, alerts, and SLOs to uncover +the root cause of incidents, with a special emphasis on detecting log patterns +that reveal system behavior. You can identify related entities, such as upstream +services or the specific pod a service is running on, by searching through logs +and traces for relationships using metadata like IP addresses, session IDs, or +distributed tracing data. While you can analyze alerts and SLO-derived metrics, +you do not directly analyze other system metrics, inspect files, or execute +commands that modify the system. + +## Non-capabilities + +You lack the capabilities to analyze metrics or connect to external systems.`; + +export const RCA_PROMPT_ENTITIES = `# Entities + +In an Observability system, entities are distinct components or resources within +the infrastructure, each representing points of interest for monitoring and +troubleshooting. These entities form the backbone of log-based analysis and +allow teams to track behavior, detect anomalies, and investigate issues across +different layers of the system. Here’s a breakdown of common entities in +observability: + +1. Services: Core units of functionality in an application ecosystem, +representing individual processes or applications (e.g., user-authentication, +payment processing). Services typically expose APIs or endpoints, and logs from +these entities often capture requests, responses, and error events, which are +critical for understanding application behavior. + +2. Kubernetes (K8s) Entities: + - Pods: The smallest deployable units in Kubernetes, usually containing one +or more containers. Logs from pods provide insight into container operations, +errors, and application states. + - Namespaces: Logical groupings within a cluster for organizing and isolating +resources, helping in filtering logs by domain or responsibility. + - Nodes: Worker machines (either physical or virtual) where pods run. Node +logs often cover hardware resource events, errors, and other system-level events +relevant to pod health and performance. + - Deployments and ReplicaSets: Define and manage the desired state of pod +replication and rolling updates. Logs from these components can reveal changes +in application versions, scaling events, and configuration updates. + +3. Virtual Machines (VMs): Virtualized computing resources that generate +operating system-level logs capturing events such as application crashes, +network issues, and OS-related errors. + +4. Applications: Software systems or packages running across the infrastructure,n +which may encompass multiple services. Logs from applications track user flows, +application states, and error messages, providing context for user interactions +and system events. + +5. Serverless Functions (e.g., AWS Lambda): Code executions triggered by +specific events. Logs from serverless functions capture invocation details, +execution paths, and error traces, which are useful for understanding specific +function behaviors and pinpointing execution anomalies. + +6. Databases and Data Stores: Includes SQL/NoSQL databases, caches, and storage +solutions. Logs from these entities cover query executions, connection issues, +and transaction errors, essential for tracking data layer issues. + +7. Containers: Portable environments running individual services or processes. +Container logs capture application and system events within the containerized +environment, helping track process-level errors and status changes. + +8. Load Balancers and API Gateways: Components responsible for managing and +routing traffic. Logs from these entities include request paths, status codes, +and errors encountered, which can indicate connectivity issues or +misconfigurations. + +9. Networking Components: Entities like virtual private clouds (VPCs), +firewalls, VPNs, and network interfaces. Logs from these components track +traffic flows, connectivity issues, and security events, crucial for identifying +network-related anomalies. + +10. Clusters and Regions: Groupings of infrastructure either physically or +logically, such as across data centers or cloud regions. Cluster and region logs +help capture high-level events and error messages, useful for understanding +system-wide issues and region-specific disruptions. + +Each of these entities is typically identified by fields such as +\`service.name\`, \`kubernetes.pod.name\`, \`container.id\`, or similar fields +in log records. Observability systems use these identifiers to connect entities, +creating a map of relationships and dependencies that helps teams diagnose +issues, understand cross-entity impacts, and uncover root causes in distributed +architectures.`; + +export const RCA_PROMPT_DEPENDENCIES = `## Understanding the Flow: Upstream vs. Downstream + +- Upstream dependencies: These are the services that your service +depends on. They supply data, perform tasks, or provide resources that +your service consumes. +- Downstream dependencies: These are the services that depend on your +service. They consume the data or resources your service generates. + +When diagnosing issues, distinguishing the direction of dependency can +clarify whether a problem originates from your service’s reliance on an +external input or whether your service is causing issues for other systems. + +--- + +## When to Investigate Upstream Dependencies + +Upstream issues typically occur when your service is failing due to problems +with the responses it receives from external systems. + +1. Timeouts and Latency +- Symptoms: Slow response times, retries, or timeouts. +- Errors: HTTP 504, retrying connection, exceeded timeout threshold. +- Focus: Check the performance and availability of upstream services +(e.g., APIs, databases) and network latency. + +2. Data Integrity Issues** +- Symptoms: Inconsistent or corrupted data. +- Errors: unexpected data format, deserialization errors. +- Focus: Verify data received from upstream services, and investigate +schema or data format changes. + +3. Connection Failures +- Symptoms: Your service cannot connect to upstream services. +- Errors: DNS lookup failed, connection refused, socket timeout. +- Focus: Check upstream service health, DNS, and networking components. + +4. Authentication/Authorization Failures** +- Symptoms: Failed access to upstream resources. +- Errors: 401 Unauthorized, 403 Forbidden, token issues. +- Focus: Validate credentials or tokens and investigate upstream access +policies. + +--- + +## When to Investigate Downstream Dependencies + +Downstream issues occur when your service is functioning but its outputs cause +failures in other services that depend on it. + +1. Data or API Response Issues +- Symptoms: Downstream services receive bad or invalid data. +- Errors: data type mismatch, invalid JSON format. +- Focus: Ensure your service is returning correct data and check for API +changes. + +2. Rate-Limiting and Resource Exhaustion** +- Symptoms: Downstream services are overwhelmed. +- Errors: 429 Too Many Requests, throttling or resource exhaustion. +- Focus: Check your service’s request rates and resource usage (e.g., memory, CPU). + +3. Unexpected Behavior or Regression +- Symptoms: Downstream failures after a recent deployment. +- Errors: New downstream errors after your service changes. +- Focus: Review recent updates, API contracts, or integration points. + +4. Eventual Consistency or Queue Backlogs +- Symptoms: Delayed processing in downstream systems. +- Errors: message queue full, backlog warnings. +- Focus: Check event production rates and queue statuses in downstream services.`; + +export const RCA_PROMPT_CHANGES = `## Reasoning about Correlating Changes in Incident Investigations + +In a root cause analysis, understanding the types and timing of changes is key +to linking symptoms with underlying causes. Changes can broadly be classified +into **symptomatic changes** (indicators of system issues like elevated error +rates or degraded throughput) and **system changes** (events that modify system +configuration or structure, such as scale-downs, new version rollouts, or +significant configuration adjustments). By correlating these changes, we can +assess whether observed symptoms are likely related to specific system +modifications. + +### Identifying Correlations Between Symptomatic and System Changes + +When investigating a sudden issue—such as a 5x increase in latency—it’s +essential to evaluate both the **timing** and **nature** of associated changes +in upstream dependencies, resource utilization, and configuration events. For +instance: + +- Consistent Symptomatic Behavior: If an upstream dependency exhibits a +similar, sustained latency spike around the same time and shows log entries +indicating CPU throttling, this would suggest a correlated, persistent issue +that may directly impact the observed symptom. A scale-down event preceding the +latency increase might indicate that reduced resources are stressing the +dependency. + +- Transient vs. Persistent Issues: Another upstream dependency that +experiences a brief latency increase but recovers quickly is less likely +related. Short-lived changes that self-correct without intervention typically +have different root causes or may be unrelated noise. + +### Types of Changes to Consider in Correlation + +1. Log Pattern Changes: A shift in log patterns, especially around error +levels, provides significant insight. If there’s an increase in critical or +warning log patterns for a dependency during the latency spike, it could +indicate that the issue stems from this entity. Compare these log patterns to +past behavior to assess whether they represent an anomaly that might warrant +further investigation. + +2. Event-Driven System Changes: + - Scale Events: Scale-up or scale-down events can directly impact +performance. If a latency increase aligns with a scale-down, it may suggest that +resource reduction is straining the system. + - Release or Deployment Events: A new version rollout or config change is +a frequent source of correlated issues. Compare the timing of the latency +increase to the deployment to see if the change directly impacts the system. +Correlate with alerts or SLO breaches on endpoints to understand the immediate +effects of the release. + +3. SLO and Alert-Based Changes: SLO breaches and alerts can provide concrete +timestamps for when symptoms begin. For instance, a breach on error rates for a +specific service endpoint following a dependency’s scale-down event suggests a +possible causal link. An alert indicating sustained latency increase in a +dependency that remains unresolved points to a high-priority area for deeper +investigation. + +4. Dependency Health and Behavior: + - Related vs. Unrelated Dependencies: Similar to the latency example, +observe if multiple dependencies experience symptomatic changes simultaneously. +Related dependencies should show consistent, similar issues, while unrelated +dependencies may exhibit brief, unrelated spikes. Persistent issues across key +dependencies likely indicate a systemic cause, while isolated changes are less +likely to be relevant. + +### Examples of Reasoning Through Changes + +Consider these scenarios: +- Increase in Error Rates and a Recent Deployment: Suppose error rates for +an endpoint increase sharply post-deployment. If related logs show new error +patterns, this aligns the symptom with a deployment change. Investigate specific +changes in the deployment (e.g., code changes or resource allocation). +- Throughput Decrease and Scaling Events: If throughput dips shortly after a +scale-down event, it might suggest resource constraints. Analyze CPU or memory +throttling logs from this period in upstream dependencies to confirm. +- Cross-Service Latency Spikes: If multiple services along a call path +experience latency spikes, with CPU throttling logs, this suggests a resource +bottleneck. Trace logs and alerts related to autoscaling decisions may provide +insights into whether the system configuration caused cascading delays. + +By carefully mapping these changes and analyzing their timing, you can +distinguish between causally related events and incidental changes, allowing for +a targeted and effective investigation.`; + +export const RCA_PROMPT_CHANGE_POINTS = `## Change points + +Change points can be defined as the following type: + +- \`dip\`: a significant dip occurs at this change point +- \`distribution_change\`: the overall distribution of the values has changed +significantly +- \`non_stationary\`: there is no change point, but the values are not from a +stationary distribution +- \`spike\`: a significant spike occurs at this point +- \`stationary\`: no change point found +- \`step_change\`: the change indicates a statistically significant step up or +down in value distribution +- \`trend_change\`: there is an overall trend change occurring at this point + +For \`spike\`, and \`dip\`, this means: a short-lived spike or dip that then again +stabilizes. For persisted changes, you'd see a \`step_change\` (if the values +before and after the change point are stable), or a \`trend_change\` when the +values show an upward or downward trend after the change.`; + +export const RCA_PROMPT_SIGNIFICANT_EVENTS = `## Significant events + +Generate a timeline of significant events. These events should capture +significant observed changes in the system that can be extracted from the +analyzed data. This timeline is absolutely critical to the investigation, +and close attention has to be paid to the data, and the instructions. + +The timeline should focus on key events as captured in log patterns, including +both notable changes and unusual/critical messages. This data-driven timeline +should help establish a chain of causality, pinpointing when anomalies began, +what system behaviors were observed, and how these patterns relate to the overall incident. + +- Use ISO timestamps to ensure precision and clarity. +- Include alerts that are part of the investigation. For these, use the start +time of the alert, and mention critical information about the alert, such as +reason and grouping fields. +- Focus on log entries that signal significant system behavior (e.g., errors, +retries, anomalies). +- Highlight critical log messages or changes in patterns that may correlate +with the issue. +- Include notable anomalies, such as spikes in error rates, unexpected system +responses, or any log entries suggesting failure or degradation. + +Do not include: +- Events that are indicative of normal operations. +- Events that are unlikely to be related to the investigated issue. + +Key Elements to Include: + +- Log Patterns: Capture log messages that show unusual events or +abnormalities such as error codes, failed retries, or changes in log frequency. +- Timestamps: Ensure every entry in the timeline is time-stamped +with an accurate ISO 8601 timestamp. +- Event Description: Provide a clear, concise, and objective description of +what was observed in the logs. +- Corroborating Data: Link log anomalies to other relevant data points such +as traffic shifts, request patterns, or upstream/downstream service impacts.`; + +export const RCA_PROMPT_TIMELINE_GUIDE = ` +The timeline should focus on key events as +captured in log patterns, including both notable changes and unusual/critical +messages. This data-driven timeline should help establish a chain of causality, +pinpointing when anomalies began, what system behaviors were observed, and how +these patterns relate to the overall incident. + +- Use ISO timestamps** to ensure precision and clarity. +- Focus on log entries** that signal significant system behavior (e.g., +errors, retries, anomalies). +- Highlight critical log messages** or changes in patterns that may correlate +with the issue. +- Include notable anomalies, such as spikes in error rates, unexpected +system responses, or any log entries suggesting failure or degradation. + +Key Elements to Include: + +Log Patterns: Capture log messages that show unusual events or +abnormalities such as error codes, failed retries, or changes in log frequency. +Timestamps: Ensure every entry in the timeline is time-stamped +with an accurate ISO 8601 timestamp. +Event Description: Provide a clear, concise description of what was +observed in the logs. +Corroborating Data: Link log anomalies to other relevant data points such +as traffic shifts, request patterns, or upstream/downstream service impacts.`; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts new file mode 100644 index 0000000000000..94b14b4f3d12f --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/run_root_cause_analysis.ts @@ -0,0 +1,305 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { RulesClient } from '@kbn/alerting-plugin/server'; +import { calculateAuto } from '@kbn/calculate-auto'; +import { MessageRole, AssistantMessage, ToolMessage, ToolChoiceType } from '@kbn/inference-common'; +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { Logger } from '@kbn/logging'; +import { AlertsClient } from '@kbn/rule-registry-plugin/server'; +import { findLast, pick } from 'lodash'; +import moment from 'moment'; +import { catchError, filter, from, map, mergeMap, Observable, of, switchMap } from 'rxjs'; +import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server'; +import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { + RCA_END_PROCESS_TOOL_NAME, + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from '@kbn/observability-ai-common/root_cause_analysis'; +import { callEndRcaProcessTool } from './call_end_rca_process_tool'; +import { callInvestigateEntityTool } from './call_investigate_entity_tool'; +import { callObserveTool } from './call_observe_tool'; +import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from './prompts'; +import { RCA_TOOLS } from './tools'; +import { + EndProcessToolMessage, + InvestigateEntityToolMessage, + ObservationToolMessage, + RootCauseAnalysisContext, + RootCauseAnalysisEvent, + ToolErrorMessage, +} from './types'; +import { callTools } from './util/call_tools'; +import { formatEntity } from './util/format_entity'; +import { validateInvestigateEntityToolCalls } from './util/validate_investigate_entity_tool_call'; + +const SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE} + +Your next step is to request an observation from another agent based +on the initial context or the results of previous investigations.`; + +const SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS = `${RCA_SYSTEM_PROMPT_BASE} + +${RCA_PROMPT_ENTITIES} + +${RCA_PROMPT_CHANGES} + + To determine whether to end the process or continue analyzing another entity, +follow the advice from the previous observation, and these tips: + + Continuing the process: + - Do not investigate an entity twice. This will result in a failure. + - Logs, traces, or observability data that suggest upstream or downstream +issues (such as connection failures, timeouts, or authentication errors) +indicate further investigation is required. + + Ending the process: + - No further entities to investigate: If there are no unexplored upstream or +downstream dependencies, and all related entities have been investigated without +discovering new anomalies, it may be appropriate to end the process. + - If all investigated entities (e.g., services, hosts, containers) are +functioning normally, with no relevant issues found, and there are no signs of +dependencies being affected, you may consider ending the process. + - Avoid concluding the investigation based solely on symptoms or the absence +of immediate errors in the data. Unless a system change has been connected to +the incident, it is important to continue investigating dependencies to ensure +the root cause has been accurately identified.`; + +export function runRootCauseAnalysis({ + serviceName, + start: requestedStart, + end: requestedEnd, + esClient, + alertsClient, + rulesClient, + observabilityAIAssistantClient, + spaceId, + indices, + connectorId, + inferenceClient, + context: initialContext, + logger: incomingLogger, + prevEvents, +}: { + context: string; + serviceName: string; + logger: Logger; + inferenceClient: InferenceClient; + start: number; + end: number; + alertsClient: AlertsClient; + rulesClient: RulesClient; + esClient: ObservabilityElasticsearchClient; + observabilityAIAssistantClient: ObservabilityAIAssistantClient; + indices: { + logs: string[]; + traces: string[]; + sloSummaries: string[]; + }; + connectorId: string; + spaceId: string; + prevEvents?: RootCauseAnalysisEvent[]; +}): Observable { + const logger = incomingLogger.get('rca'); + + const entity = { 'service.name': serviceName }; + + const bucketSize = calculateAuto + .atLeast(30, moment.duration(requestedEnd - requestedStart))! + .asMilliseconds(); + + const start = Math.floor(requestedStart / bucketSize) * bucketSize; + const end = Math.floor(requestedEnd / bucketSize) * bucketSize; + + const initialMessage = { + role: MessageRole.User as const, + content: `Investigate the health status of ${formatEntity(entity)}. + + The context given for this investigation is: + + ${initialContext}`, + }; + + const nextEvents = [initialMessage, ...(prevEvents ?? [])]; + + const initialRcaContext: RootCauseAnalysisContext = { + connectorId, + start, + end, + esClient, + events: nextEvents, + indices, + inferenceClient, + initialContext, + alertsClient, + observabilityAIAssistantClient, + logger, + rulesClient, + spaceId, + tokenLimit: 32_000, + }; + + const investigationTimeRangePrompt = `## Time range + + The time range of the investigation is ${new Date(start).toISOString()} until ${new Date( + end + ).toISOString()}`; + + initialContext = `${initialContext} + + ${investigationTimeRangePrompt} + `; + + const next$ = callTools( + { + system: RCA_SYSTEM_PROMPT_BASE, + connectorId, + inferenceClient, + messages: nextEvents, + logger, + }, + ({ messages }) => { + const lastSuccessfulToolResponse = findLast( + messages, + (message) => message.role === MessageRole.Tool && message.name !== 'error' + ) as Exclude | undefined; + + const shouldWriteObservationNext = + !lastSuccessfulToolResponse || lastSuccessfulToolResponse.name !== RCA_OBSERVE_TOOL_NAME; + + const nextTools = shouldWriteObservationNext + ? pick(RCA_TOOLS, RCA_OBSERVE_TOOL_NAME) + : pick(RCA_TOOLS, RCA_END_PROCESS_TOOL_NAME, RCA_INVESTIGATE_ENTITY_TOOL_NAME); + + const nextSystem = shouldWriteObservationNext + ? SYSTEM_PROMPT_WITH_OBSERVE_INSTRUCTIONS + : SYSTEM_PROMPT_WITH_DECISION_INSTRUCTIONS; + + return { + messages, + system: `${nextSystem} + + ${investigationTimeRangePrompt}`, + tools: nextTools, + toolChoice: shouldWriteObservationNext + ? { function: RCA_OBSERVE_TOOL_NAME } + : ToolChoiceType.required, + }; + }, + ({ + toolCalls, + messages, + }): Observable< + | ObservationToolMessage + | ToolErrorMessage + | InvestigateEntityToolMessage + | EndProcessToolMessage + | AssistantMessage + > => { + const nextRcaContext = { + ...initialRcaContext, + events: messages as RootCauseAnalysisEvent[], + }; + + return of(undefined).pipe( + switchMap(() => { + return from( + validateInvestigateEntityToolCalls({ rcaContext: nextRcaContext, toolCalls }) + ); + }), + switchMap((errors) => { + if (errors.length) { + return of( + ...toolCalls.map((toolCall) => { + const toolCallErrorMessage: ToolErrorMessage = { + role: MessageRole.Tool, + name: 'error', + response: { + error: { + message: `Some ${RCA_INVESTIGATE_ENTITY_TOOL_NAME} calls were not valid: + ${errors.map((error) => `- ${error}`).join('\n')}`, + }, + }, + toolCallId: toolCall.toolCallId, + }; + return toolCallErrorMessage; + }) + ); + } + return of(...toolCalls).pipe( + mergeMap((toolCall) => { + function executeToolCall(): Observable< + | EndProcessToolMessage + | InvestigateEntityToolMessage + | ObservationToolMessage + | ToolErrorMessage + | AssistantMessage + > { + switch (toolCall.function.name) { + case RCA_END_PROCESS_TOOL_NAME: + return callEndRcaProcessTool({ + rcaContext: nextRcaContext, + toolCallId: toolCall.toolCallId, + }); + + case RCA_INVESTIGATE_ENTITY_TOOL_NAME: + return callInvestigateEntityTool({ + context: toolCall.function.arguments.context, + field: toolCall.function.arguments.entity.field, + value: toolCall.function.arguments.entity.value, + rcaContext: nextRcaContext, + toolCallId: toolCall.toolCallId, + }); + + case RCA_OBSERVE_TOOL_NAME: + return callObserveTool({ + rcaContext: nextRcaContext, + toolCallId: toolCall.toolCallId, + }); + } + } + + return executeToolCall().pipe( + catchError((error) => { + logger.error(`Failed executing task: ${error.message}`); + logger.error(error); + const toolErrorMessage: ToolErrorMessage = { + name: 'error', + role: MessageRole.Tool, + response: { + error: { + ...('toJSON' in error && typeof error.toJSON === 'function' + ? error.toJSON() + : {}), + message: error.message, + }, + }, + toolCallId: toolCall.toolCallId, + }; + return of(toolErrorMessage); + }) + ); + }, 3) + ); + }) + ); + } + ); + + return next$.pipe( + filter((event) => + Boolean(event.role !== MessageRole.Assistant || event.content || event.toolCalls?.length) + ), + map((event) => { + if (event.role === MessageRole.Assistant) { + return event as Extract; + } + return event; + }) + ); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts new file mode 100644 index 0000000000000..fe090b64c9728 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/analyze_log_patterns/index.ts @@ -0,0 +1,402 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery'; +import { formatValueForKql } from '@kbn/observability-utils-common/es/format_value_for_kql'; +import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { ShortIdTable } from '@kbn/observability-utils-common/llm/short_id_table'; +import { + P_VALUE_SIGNIFICANCE_HIGH, + P_VALUE_SIGNIFICANCE_MEDIUM, +} from '@kbn/observability-utils-common/ml/p_value_to_label'; +import { + FieldPatternResultWithChanges, + getLogPatterns, +} from '@kbn/observability-utils-server/entities/get_log_patterns'; +import { castArray, compact, groupBy, orderBy } from 'lodash'; +import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES } from '../../prompts'; +import { RootCauseAnalysisContext } from '../../types'; +import { formatEntity } from '../../util/format_entity'; +import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; + +type LogPatternRelevance = 'normal' | 'unusual' | 'warning' | 'critical'; + +export type AnalyzedLogPattern = FieldPatternResultWithChanges & { + relevance: LogPatternRelevance; + interesting: boolean; +}; + +export interface AnalyzeLogPatternOutput { + ownPatterns: AnalyzedLogPattern[]; + patternsFromOtherEntities: AnalyzedLogPattern[]; +} + +const normalDescription = `normal operations, such as such access logs`; +const unusualDescription = `something unusual and/or +appear rarely, such as startup or shutdown messages or +other rare vents`; +const warningDescription = `something being in an unexpected state, +such as error messages, rate limiting or disk usage warnings`; +const criticalDescription = `something being in a critical state, +such as startup failure messages, out-of-memory errors or crashloopbackoff +events`; + +interface LogPatternCutOff { + significance?: 'high' | 'medium' | 'low'; + pValue?: number; +} + +export async function analyzeLogPatterns({ + entity, + allAnalysis, + system, + rcaContext: { logger: parentLogger, inferenceClient, connectorId, esClient, start, end, indices }, + cutoff, + kbEntries, +}: { + entity: Record; + allAnalysis: Array<{ index: string | string[]; analysis: TruncatedDocumentAnalysis }>; + system: string; + cutoff?: LogPatternCutOff; + kbEntries: ScoredKnowledgeBaseEntry[]; + rcaContext: Pick< + RootCauseAnalysisContext, + 'indices' | 'logger' | 'inferenceClient' | 'connectorId' | 'esClient' | 'start' | 'end' + >; +}): Promise { + const kuery = getEntityKuery(entity); + + const logger = parentLogger.get('analyzeLogPatterns'); + + const fields = ['message', 'error.exception.message']; + + logger.debug(() => `Analyzing log patterns for ${JSON.stringify(entity)}`); + + const systemPrompt = `You are a helpful assistant for Elastic Observability. + You are an expert in analyzing log messages for software + systems, and you use your extensive experience as an SRE + to thoroughly analyze log patterns for things that require + attention from the user. + + ${RCA_PROMPT_CHANGES} + + ${RCA_PROMPT_ENTITIES} + + ## Entity + + The following entity is being analyzed: + + ${formatEntity(entity)} + + ${serializeKnowledgeBaseEntries(kbEntries)} + + ### Entity analysis + + ${allAnalysis.map(({ index: analyzedIndex, analysis }) => { + return `#### Indices: ${castArray(analyzedIndex).join(',')} + + ${JSON.stringify(analysis)}`; + })} + + ${system}`; + + const kueryForOtherEntities = `NOT (${kuery}) AND ${Object.values(entity) + .map( + (val) => + `(${fields.map((field) => `(${[field, formatValueForKql(val)].join(':')})`).join(' OR ')})` + ) + .join(' AND ')}`; + + const [logPatternsFromEntity, logPatternsFromElsewhere] = await Promise.all([ + getLogPatterns({ + esClient, + index: [...indices.logs, ...indices.traces], + start, + end, + kuery, + includeChanges: true, + fields, + metadata: [], + }), + getLogPatterns({ + esClient, + index: [...indices.logs], + start, + end, + kuery: kueryForOtherEntities, + metadata: Object.keys(entity), + includeChanges: true, + fields, + }), + ]); + const patternIdLookupTable = new ShortIdTable(); + + logger.debug( + () => + `Found ${logPatternsFromEntity.length} own log patterns and ${logPatternsFromElsewhere.length} from others` + ); + + logger.trace( + () => + `Found log patterns${JSON.stringify({ + entity, + logPatternsFromEntity, + logPatternsFromElsewhere, + })}` + ); + + const patternsWithIds = [...logPatternsFromEntity, ...logPatternsFromElsewhere].map((pattern) => { + return { + ...pattern, + shortId: patternIdLookupTable.take(pattern.regex), + }; + }); + + const patternsByRegex = new Map(patternsWithIds.map((pattern) => [pattern.regex, pattern])); + + const serializedOwnEntity = formatEntity(entity); + + const [ownPatterns, patternsFromOtherEntities] = await Promise.all([ + logPatternsFromEntity.length ? categorizeOwnPatterns() : [], + logPatternsFromElsewhere.length ? selectRelevantPatternsFromOtherEntities() : [], + ]); + + logger.trace( + () => + `Classified log patterns ${JSON.stringify([entity, ownPatterns, patternsFromOtherEntities])}` + ); + + const allPatterns = [...ownPatterns, ...patternsFromOtherEntities]; + + const sortedByPValueAsc = orderBy( + allPatterns.filter((pattern) => pattern.change && pattern.change.p_value), + (pattern) => { + return pattern.change.p_value; + }, + 'asc' + ); + + const pValueCutOff = getPValueCutoff({ cutoff, max: sortedByPValueAsc[0]?.change.p_value }); + + return { + ownPatterns: ownPatterns.map((pattern) => ({ + ...pattern, + interesting: isInterestingPattern(pattern, pValueCutOff), + })), + patternsFromOtherEntities: patternsFromOtherEntities.map((pattern) => ({ + ...pattern, + interesting: isInterestingPattern(pattern, pValueCutOff), + })), + }; + + function categorizeOwnPatterns() { + return inferenceClient + .output({ + id: 'analyze_log_patterns', + connectorId, + system: systemPrompt, + input: `Based on the following log patterns from + ${formatEntity(entity)}, group these patterns into + the following categories: + + - normal (patterns that are indicative of ${normalDescription}) + - unusual (patterns that are indicative of ${unusualDescription}) + - warning (patterns that are indicative of ${warningDescription}) + - critical (patterns that are indicative of ${criticalDescription}) + + ## Log patterns: + + ${preparePatternsForLlm(logPatternsFromEntity)} + `, + schema: { + type: 'object', + properties: { + categories: { + type: 'array', + items: { + type: 'object', + properties: { + relevance: { + type: 'string', + enum: ['normal', 'unusual', 'warning', 'critical'], + }, + shortIds: { + type: 'array', + description: + 'The pattern IDs you want to group here. Use the pattern short ID.', + items: { + type: 'string', + }, + }, + }, + required: ['relevance', 'shortIds'], + }, + }, + }, + required: ['categories'], + } as const, + }) + .then((outputEvent) => { + return outputEvent.output.categories.flatMap((category) => { + return mapIdsBackToPatterns(category.shortIds).map((pattern) => { + return { + ...pattern, + relevance: category.relevance, + }; + }); + }); + }); + } + + function selectRelevantPatternsFromOtherEntities() { + return inferenceClient + .output({ + id: 'select_relevant_patterns_from_other_entities', + connectorId, + system: systemPrompt, + input: `Based on the following log patterns that + are NOT from ${serializedOwnEntity}, group these + patterns into the following categories: + + - irrelevant (patterns that are not relevant for + ${serializedOwnEntity}) + - normal (patterns that relevant for + ${serializedOwnEntity} and are indicative of ${normalDescription}) + - unusual (patterns that are relevant for + ${serializedOwnEntity} and are indicative of ${unusualDescription}) + - warning (patterns that are relevant for + ${serializedOwnEntity} and are indicative of ${warningDescription}) + - critical (patterns that are relevant for + ${serializedOwnEntity} and are indicative of ${criticalDescription}) + + Relevant patterns are messages that mention the + investigated entity, or things that are indicative + of critical failures or changes in the entity + that owns the log pattern. + + ## Log patterns: + + ${preparePatternsForLlm(logPatternsFromElsewhere)} + `, + schema: { + type: 'object', + properties: { + categories: { + type: 'array', + items: { + type: 'object', + properties: { + relevance: { + type: 'string', + enum: ['irrelevant', 'normal', 'unusual', 'warning', 'critical'], + }, + shortIds: { + type: 'array', + description: + 'The pattern IDs you want to group here. Use the pattern short ID.', + items: { + type: 'string', + }, + }, + }, + required: ['relevance', 'shortIds'], + }, + }, + }, + required: ['categories'], + } as const, + }) + .then((outputEvent) => { + return outputEvent.output.categories.flatMap((category) => { + return mapIdsBackToPatterns(category.shortIds).flatMap((pattern) => { + if (category.relevance === 'irrelevant') { + return []; + } + return [ + { + ...pattern, + relevance: category.relevance, + }, + ]; + }); + }); + }); + } + + function preparePatternsForLlm(patterns: FieldPatternResultWithChanges[]): string { + const groupedByField = groupBy(patterns, (pattern) => pattern.field); + + return Object.entries(groupedByField) + .map(([field, patternsForField]) => { + return `### \`${field}\` + + #### Patterns + + ${JSON.stringify( + patternsForField.map((pattern) => { + return { + shortId: patternIdLookupTable.take(pattern.regex), + regex: pattern.regex, + sample: pattern.sample, + highlight: pattern.highlight, + change: pattern.change, + }; + }) + )} + `; + }) + .join('\n\n'); + } + + function mapIdsBackToPatterns(ids?: string[]) { + return compact( + ids?.map((shortId) => { + const lookupId = patternIdLookupTable.lookup(shortId); + if (!lookupId) { + return undefined; + } + const pattern = patternsByRegex.get(lookupId); + return pattern; + }) + ); + } +} + +function isInterestingPattern( + pattern: Omit, + pValueCutOff: number +) { + return (pattern.change.p_value ?? 1) <= pValueCutOff || pattern.relevance !== 'normal'; +} + +function getPValueCutoff({ max, cutoff }: { max?: number; cutoff?: LogPatternCutOff }) { + if (cutoff?.pValue) { + return cutoff?.pValue; + } + + if (cutoff?.significance === 'high') { + return P_VALUE_SIGNIFICANCE_HIGH; + } + + if (cutoff?.significance === 'medium') { + return P_VALUE_SIGNIFICANCE_MEDIUM; + } + + if (max === undefined) { + return Number.MAX_VALUE; + } + + if (max <= P_VALUE_SIGNIFICANCE_HIGH) { + return P_VALUE_SIGNIFICANCE_HIGH; + } + + if (max <= P_VALUE_SIGNIFICANCE_MEDIUM) { + return P_VALUE_SIGNIFICANCE_MEDIUM; + } + + return Number.MAX_VALUE; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts new file mode 100644 index 0000000000000..bd050c4c61dfd --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_entity/index.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns'; +import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { formatEntity } from '../../util/format_entity'; +import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; +import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts'; + +export async function describeEntity({ + inferenceClient, + connectorId, + entity, + contextForEntityInvestigation, + analysis, + ownPatterns, + kbEntries, +}: { + inferenceClient: InferenceClient; + connectorId: string; + entity: Record; + analysis: TruncatedDocumentAnalysis; + contextForEntityInvestigation: string; + ownPatterns: FieldPatternResultWithChanges[]; + kbEntries: ScoredKnowledgeBaseEntry[]; +}) { + const system = RCA_SYSTEM_PROMPT_BASE; + + const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })} + + ## Context for investigating ${formatEntity(entity)} + + ${contextForEntityInvestigation} + + ${serializeKnowledgeBaseEntries(kbEntries)} + + ## Data samples + + ${JSON.stringify(analysis)} + + ## Log patterns + + ${JSON.stringify(ownPatterns.map(({ regex, sample }) => ({ regex, sample })))} + + ## Current task + + Describe the entity characteristics based on the sample documents and log + patterns. Put it in context of the investigation process. Mention the reason + why it's being investigated, and how it is related other entities that were + previously investigated. Mention these three things: + + - infrastructure & environment + - communication characteristics (protocols and endpoints) + - context of entity in investigation + + You shouldn't mention the log patterns, they will be analyzed elsewhere. + `; + + const response = await inferenceClient.output({ + id: 'describe_entity', + connectorId, + system, + input, + }); + + return response.content; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts new file mode 100644 index 0000000000000..ea5cc75eea1de --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/describe_log_patterns/index.ts @@ -0,0 +1,189 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { omit, partition, sumBy } from 'lodash'; +import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { formatEntity } from '../../util/format_entity'; +import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries'; +import { AnalyzedLogPattern } from '../analyze_log_patterns'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; +import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts'; + +export interface LogPatternDescription { + content: string; + docCount: number; + interestingPatternCount: number; + ignoredPatternCount: number; + ignoredDocCount: number; +} + +export async function describeLogPatterns({ + inferenceClient, + connectorId, + entity, + contextForEntityInvestigation, + analysis, + ownPatterns: allOwnPatterns, + patternsFromOtherEntities, + kbEntries, +}: { + inferenceClient: InferenceClient; + connectorId: string; + entity: Record; + analysis: TruncatedDocumentAnalysis; + contextForEntityInvestigation: string; + ownPatterns: AnalyzedLogPattern[]; + patternsFromOtherEntities: AnalyzedLogPattern[]; + kbEntries: ScoredKnowledgeBaseEntry[]; +}): Promise { + const system = RCA_SYSTEM_PROMPT_BASE; + + const [ownInterestingPatterns, ignoredOwnPatterns] = partition( + allOwnPatterns, + (pattern) => pattern.interesting + ); + + const stats = { + docCount: sumBy(allOwnPatterns, (pattern) => pattern.count), + interestingPatternCount: ownInterestingPatterns.length, + otherInterestingPatternCount: patternsFromOtherEntities.length, + ignoredPatternCount: ignoredOwnPatterns.length, + ignoredDocCount: sumBy(ignoredOwnPatterns, (pattern) => pattern.count), + }; + + const header = `## Log analysis + + ### Stats for own log patterns: + - ${stats.docCount} documents analyzed + - ${stats.interestingPatternCount} interesting patterns + - ${stats.ignoredPatternCount} ignored patterns, accounting for + ${stats.ignoredDocCount} out of ${stats.docCount} documents + - ${stats.otherInterestingPatternCount} relevant patterns from + other entities`; + + if (!stats.interestingPatternCount && !stats.otherInterestingPatternCount) { + return { + ...stats, + content: `${header}\n\nNo interesting log patterns`, + }; + } + + const ownLogPatternsPrompt = ownInterestingPatterns.length + ? JSON.stringify( + ownInterestingPatterns.map(({ regex, sample, change, count, timeseries }) => ({ + regex, + sample, + change, + count, + timeseries: timeseries.map(({ x, y }, index) => { + if (index === change.change_point) { + return `${change.type} at ${new Date(x).toISOString()}: ${y}`; + } + return `${new Date(x).toISOString()}: ${y}`; + }), + })) + ) + : 'No own log patterns found'; + + const otherLogPatternsPrompt = patternsFromOtherEntities.length + ? JSON.stringify( + patternsFromOtherEntities.map( + ({ regex, sample, change, count, timeseries, metadata, field, highlight }) => ({ + regex, + sample, + change, + count, + timeseries: timeseries.map(({ x, y }, index) => { + if (index === change.change_point) { + return `${change.type} at ${new Date(x).toISOString()}: ${y}`; + } + return `${new Date(x).toISOString()}: ${y}`; + }), + entity: omit(metadata, field), + highlight, + }) + ) + ) + : 'No relevant log patterns found from other entities'; + + const input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })} + + ## Context for investigating ${formatEntity(entity)} + + ${contextForEntityInvestigation} + + ${serializeKnowledgeBaseEntries(kbEntries)} + + ## Data samples + + ${JSON.stringify(analysis)} + + ## Log patterns from ${formatEntity(entity)} + + ${ownLogPatternsPrompt} + + ## Possibly relevant log patterns from other entities + + ${otherLogPatternsPrompt} + + ### Interpreting log patterns and samples + + The pattern itself is what is consistent across all messages. The values from these parts + are separately given in "constants". There's also a single (random) _sample_ included, with + the variable part being given as well. E.g., if the failure in the sample is not part of the pattern + itself, you should mention that in your analysis. + + ## Task + + Using only the log patterns, describe your observations about the entity. + + Group these pattterns together based on topic. Some examples of these topics: + + - normal operations such as request logs + - connection issues to an upstream dependency + - startup messages + - garbage collection messages + + For patterns with change points, describe the trend before and after the change point based + on the data points. E.g.: + - A persisted drop to near-zero after 2020-01-01T05:00:00.000Z + - A spike from 10 to 100 at 2020-01-01T05:00:00.000Z, which went back down + to the average after 2020-01-01T05:02:00.000Z + - A trend change after 2020-01-01T05:00:00.000Z. The values ranged from 10 + to 20 before, but then after increased from 20 to 100 until + 2020-01-01T05:02:00.000Z. + + Do not: + - repeat the variables, instead, repeat the constants. + - repeat the timeseries as a whole, verbatim, in full. However, you can use individual data points + timestamps to illustrate the magnitude of the change, as in the example previously given. + - make up timestamps. + - do not separately list individual events if you have already mentioned + the pattern. + + Statistics: + + - ${stats.interestingPatternCount} patterns from ${formatEntity(entity)} + were collected + - ${stats.docCount} logs were categorized + - ${stats.ignoredPatternCount} patterns were deemed uninteresting and accounted + for ${stats.ignoredDocCount} out of the total amount of logs + `; + + const response = await inferenceClient.output({ + id: 'describe_log_patterns', + connectorId, + system, + input, + }); + + return { + ...stats, + content: response.content, + }; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts new file mode 100644 index 0000000000000..893533629ff06 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/analyze_fetched_related_entities.ts @@ -0,0 +1,438 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { Logger } from '@kbn/logging'; +import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery'; +import { + DocumentAnalysis, + TruncatedDocumentAnalysis, +} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields'; +import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents'; +import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns'; +import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { kqlQuery } from '@kbn/observability-utils-server/es/queries/kql_query'; +import { rangeQuery } from '@kbn/observability-utils-server/es/queries/range_query'; +import { chunk, isEmpty, isEqual } from 'lodash'; +import pLimit from 'p-limit'; +import { + RCA_PROMPT_DEPENDENCIES, + RCA_PROMPT_ENTITIES, + RCA_SYSTEM_PROMPT_BASE, +} from '../../prompts'; +import { chunkOutputCalls } from '../../util/chunk_output_calls'; +import { formatEntity } from '../../util/format_entity'; +import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries'; +import { toBlockquote } from '../../util/to_blockquote'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; +import { RelatedEntityKeywordSearch } from './write_keyword_searches_for_related_entities'; + +export interface RelatedEntityFromSearchResults { + entity: { [x: string]: string }; + highlight: Record; + analysis: TruncatedDocumentAnalysis; +} + +function getPromptForFoundEntity({ entity, analysis, highlight }: RelatedEntityFromSearchResults) { + return `## Entity: ${formatEntity(entity)} + + ${toBlockquote(`### Search highlights for ${formatEntity(entity)} + ${JSON.stringify(highlight)}`)} + `; +} + +function getInputPromptBase({ + entity, + analysis, + ownPatterns, + patternsFromOtherEntities, + searches, + context, + kbEntries, +}: { + entity: Record; + analysis: TruncatedDocumentAnalysis; + ownPatterns: FieldPatternResultWithChanges[]; + patternsFromOtherEntities: FieldPatternResultWithChanges[]; + searches: RelatedEntityKeywordSearch[]; + context: string; + kbEntries: ScoredKnowledgeBaseEntry[]; +}) { + const otherPatternsPrompt = patternsFromOtherEntities.length + ? JSON.stringify( + patternsFromOtherEntities.map((pattern) => ({ + sample: pattern.sample, + regex: pattern.regex, + })) + ) + : 'No relevant log patterns from other entities found'; + const logPatternsPrompt = ownPatterns.length + ? JSON.stringify( + ownPatterns.map((pattern) => { + return { sample: pattern.sample, regex: pattern.regex }; + }) + ) + : 'No log patterns found'; + return `Describe possible relationships to the investigated entity ${formatEntity(entity)}. + + ## Context + + ${toBlockquote(context)} + + ${serializeKnowledgeBaseEntries(kbEntries)} + + ## Data analysis + ${JSON.stringify(analysis)} + + ## Log patterns for ${formatEntity(entity)} + + ${logPatternsPrompt} + + ## Patterns from other entities + + ${otherPatternsPrompt} + + ## Search keywords + + ${searches + .map(({ fragments, appearsAs }) => { + return `## Appears as: ${appearsAs} + + ### Fragments: + ${fragments.map((fragment) => `- \`${fragment}\``).join('\n')}`; + }) + .join('\n')}`; +} + +function getInputPromptInstructions({ entity }: { entity: Record }) { + return `### Indicator strength + +In an Observability system, indicators of relationships between entities like +services, hosts, users, or requests can vary in strength. Some indicators +clearly define relationships, while others only suggest correlations. Here’s a +breakdown of these indicators into strong, average, and weak categories, with an +additional look at how weak indicators can become strong when combined. + +Strong indicators provide definitive links between entities. Distributed tracing +IDs (trace, span, and parent) are among the strongest indicators, as they map +the complete request path across services, showing exact service interactions. +Session or user IDs are also strong indicators, capturing a user’s actions +across services or hosts and revealing issues specific to particular users. + +Average indicators give helpful context but may require supporting data to +clarify relationships. IP addresses, for instance, are moderately strong for +tracking inter-service calls within controlled environments but are weaker +across public or shared networks where IP reuse is common. URL paths also fall +in this category; they link entities to specific endpoints or service functions +and are moderately strong for tracking interactions between microservices with +known APIs. Port numbers are another average indicator. While they suggest the +service interaction type (HTTP, database), they generally need pairing with IP +addresses or URLs for more accuracy, as port numbers alone are often shared +across different services. + +Weak indicators are often too generic to imply a direct relationship but can +suggest possible correlations. Host names, for example, are broad and typically +cover a range of services or applications, especially in large clusters. +Time-based indicators, such as timestamps or TTL values, suggest possible timing +correlations but don’t establish a definitive link on their own. Status codes, +like HTTP 500 errors, indicate issues but don’t specify causality, often +requiring corroboration with stronger indicators like trace or session IDs. + +However, weak indicators can become strong when they appear together. For +instance, a combination of IP address, port, and timestamp can strongly suggest +a direct interaction between services, especially when the same combination is +seen repeatedly or in conjunction with related URLs. Similarly, a host name +combined with a unique URL path can strongly suggest that a specific service or +pod is generating particular request patterns, even if each alone is too +general. + +## Relevance to the investigation + +Given the context of the investigation, some entities might be very relevant +even if there is no strong evidence of them being a direct dependency of +${formatEntity(entity)}. For instance, the related entity might be an +orchestrating entity, or it might be involved in a specific operation related +to the ongoing issue. + +## Identifying entity relationships + +Your current task is to identify possible entity relationships for the +investigated entity ${formatEntity(entity)}. You will get some context, document +analysis for the investigated entity, and results from keyword searches that were +extracted from the entity. Based on this data, list entities that could possibly +be related to the given entity and/or the initial context. List the highly +relevant entities first. + +## Output + +For each possible relationship, describe the following things: +- The related entity (as a key-value pair) +- The indicators you have observed as evidence of the relationship. Include the +strength of the indicator, and the exact pieces of data that are related to it +(field names and values, in both the investigated entity, and the possibly +related entity). +- Reason how the related entity is related to both ${formatEntity(entity)} as a +dependency and the context. For instance, describe who is the caller and callee +or whether that is unclear, based on the data, or explain how it might be +related to the context. +- The overall likeliness of it being a relevant entity.`; +} + +export async function analyzeFetchedRelatedEntities({ + connectorId, + inferenceClient, + esClient, + start, + end, + searches, + groupingFields, + index, + entity, + ownPatterns, + analysis, + patternsFromOtherEntities, + logger: parentLogger, + context, + kbEntries, +}: { + connectorId: string; + inferenceClient: InferenceClient; + esClient: ObservabilityElasticsearchClient; + start: number; + end: number; + searches: RelatedEntityKeywordSearch[]; + groupingFields: string[]; + index: string | string[]; + entity: Record; + analysis: { + truncated: TruncatedDocumentAnalysis; + full: DocumentAnalysis; + }; + ownPatterns: FieldPatternResultWithChanges[]; + patternsFromOtherEntities: FieldPatternResultWithChanges[]; + context: string; + logger: Logger; + kbEntries: ScoredKnowledgeBaseEntry[]; +}): Promise<{ + summaries: string[]; + foundEntities: RelatedEntityFromSearchResults[]; +}> { + const entityFields = Object.keys(entity); + + const logger = parentLogger.get('findRelatedEntities'); + + logger.debug( + () => `Finding related entities: ${JSON.stringify({ entity, groupingFields, searches })}` + ); + + const allValuesFromEntity = Array.from( + new Set(analysis.full.fields.flatMap((field) => field.values)) + ); + + const foundEntities = ( + await Promise.all( + groupingFields.map((groupingField) => getResultsForGroupingField(groupingField)) + ) + ).flat(); + + logger.debug(() => `Found ${foundEntities.length} entities via keyword searches`); + + const system = `${RCA_SYSTEM_PROMPT_BASE} + + ${RCA_PROMPT_ENTITIES} + + ${RCA_PROMPT_DEPENDENCIES}`; + + const inputPromptBase = getInputPromptBase({ + entity, + analysis: analysis.truncated, + ownPatterns, + patternsFromOtherEntities, + searches, + context, + kbEntries, + }); + + const foundEntityPrompts = foundEntities.map((foundEntity) => { + return { + text: getPromptForFoundEntity(foundEntity), + id: formatEntity(foundEntity.entity), + }; + }); + + const inputPromptInstructions = getInputPromptInstructions({ entity }); + + // don't do more than 10 entities in a response, we'll run out of + // tokens + const requests = chunk(foundEntityPrompts, 10).flatMap((texts) => + chunkOutputCalls({ + system, + input: `${inputPromptBase} ${inputPromptInstructions}`, + texts, + tokenLimit: 32_000 - 6_000, + }) + ); + + const allRelevantEntityDescriptions = await Promise.all( + requests.map(async (request) => { + const outputCompleteEvent = await inferenceClient.output({ + id: 'describe_relevant_entities', + connectorId, + system: request.system, + input: `${inputPromptBase} + + # Found entities + + ${request.texts.map((text) => text.text).join('\n\n')} + + ${inputPromptInstructions}`, + }); + + return outputCompleteEvent.content; + }) + ); + + return { + summaries: allRelevantEntityDescriptions, + foundEntities, + }; + + async function getResultsForGroupingField( + groupingField: string + ): Promise { + const excludeQuery = isEqual([groupingField], entityFields) + ? `NOT (${groupingField}:"${entity[groupingField]}")` + : ``; + + const fieldCaps = await esClient.fieldCaps('check_if_grouping_field_exists', { + fields: [groupingField], + index, + index_filter: { + bool: { + filter: [...rangeQuery(start, end)], + }, + }, + }); + + if (isEmpty(fieldCaps.fields[groupingField])) { + return []; + } + + const keywordSearchResults = await esClient.search( + 'find_related_entities_via_keyword_searches', + { + track_total_hits: false, + index, + query: { + bool: { + must: [...rangeQuery(start, end), ...kqlQuery(excludeQuery)], + should: [ + { + multi_match: { + query: searches.flatMap((search) => search.fragments).join(' '), + fields: '*', + }, + }, + ], + minimum_should_match: 1, + }, + }, + fields: [groupingField], + collapse: { + field: groupingField, + }, + highlight: { + fields: { + '*': {}, + }, + }, + _source: false, + size: 1_000, + } + ); + + if (!keywordSearchResults.hits.hits.length) { + logger.debug(() => `No hits: ${JSON.stringify({ entity, groupingField, searches })}`); + return []; + } + + logger.trace( + () => + `Hits: ${JSON.stringify({ + entity, + groupingField, + searches, + count: keywordSearchResults.hits.hits.length, + hits: keywordSearchResults.hits.hits, + })}` + ); + + const limiter = pLimit(20); + + const groupingFieldAnalysis = await Promise.all( + keywordSearchResults.hits.hits.map(async (hit) => { + return limiter(async () => { + const groupValue = hit.fields![groupingField][0] as string; + + const analysisForGroupingField = await analyzeDocuments({ + esClient, + start, + end, + index, + kuery: getEntityKuery({ + [groupingField]: groupValue, + }), + }); + + const analysisWithRelevantValues = { + ...analysisForGroupingField, + fields: analysisForGroupingField.fields + .filter((field) => { + return !field.empty; + }) + .map((field) => { + const valuesFoundInEntity = field.values.filter((value) => { + return ( + allValuesFromEntity.includes(value) || + allValuesFromEntity.some((valueFromEntity) => { + return ( + typeof valueFromEntity === 'string' && + typeof value === 'string' && + (value.includes(valueFromEntity) || valueFromEntity.includes(value)) + ); + }) + ); + }); + return { + ...field, + values: valuesFoundInEntity, + }; + }), + }; + + return { + groupingField, + key: groupValue, + highlight: hit.highlight!, + analysis: sortAndTruncateAnalyzedFields(analysisWithRelevantValues), + }; + }); + }) + ); + + return groupingFieldAnalysis.map(({ key, highlight, analysis: analysisForGroupingField }) => { + return { + entity: { + [groupingField]: key, + }, + highlight, + analysis: analysisForGroupingField, + }; + }); + } +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts new file mode 100644 index 0000000000000..5480a76eb2096 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/extract_related_entities.ts @@ -0,0 +1,159 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import stringify from 'json-stable-stringify'; +import pLimit from 'p-limit'; +import { RelatedEntityFromSearchResults } from '.'; +import { + RCA_PROMPT_DEPENDENCIES, + RCA_PROMPT_ENTITIES, + RCA_SYSTEM_PROMPT_BASE, +} from '../../prompts'; +import { RootCauseAnalysisContext } from '../../types'; +import { formatEntity } from '../../util/format_entity'; +import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities'; +import { toBlockquote } from '../../util/to_blockquote'; + +export interface RelatedEntityDescription { + entity: Record; + reason: string; + confidence: string; +} + +export async function extractRelatedEntities({ + entity, + entityReport, + summaries, + foundEntities, + context, + rcaContext: { events, connectorId, inferenceClient }, +}: { + foundEntities: RelatedEntityFromSearchResults[]; + entity: Record; + entityReport: string; + summaries: string[]; + context: string; + rcaContext: Pick; +}): Promise<{ relatedEntities: RelatedEntityDescription[] }> { + const system = `${RCA_SYSTEM_PROMPT_BASE} + + ${RCA_PROMPT_ENTITIES} + + ${RCA_PROMPT_DEPENDENCIES}`; + + const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities({ events }); + + const previouslyInvestigatedEntitiesPrompt = previouslyInvestigatedEntities.length + ? `## Previously investigated entities + + ${previouslyInvestigatedEntities + .map((prevEntity) => `- ${formatEntity(prevEntity)}`) + .join('\n')}` + : ''; + + const prompts = summaries.map((summary) => { + return ` + # Investigated entity + + ${formatEntity(entity)} + + # Report + + ${toBlockquote(entityReport)} + + # Related entities report + + ${toBlockquote(summary)} + + ${previouslyInvestigatedEntitiesPrompt} + + # Context + + ${context} + + # Task + + Your current task is to extract relevant entities as a data structure from the + related entities report. Order them by relevance to the investigation, put the + most relevant ones first. + `; + }); + + const limiter = pLimit(5); + + const allEvents = await Promise.all( + prompts.map(async (input) => { + const completeEvent = await limiter(() => + inferenceClient.output({ + id: 'get_entity_relationships', + connectorId, + system, + input, + schema: { + type: 'object', + properties: { + related_entities: { + type: 'array', + items: { + type: 'object', + properties: { + entity: { + type: 'object', + properties: { + field: { + type: 'string', + }, + value: { + type: 'string', + }, + }, + required: ['field', 'value'], + }, + reason: { + type: 'string', + description: 'Describe why this entity might be relevant. Provide evidence.', + }, + confidence: { + type: 'string', + description: + 'Describe how confident you are in your conclusion about this relationship: low, moderate, high', + }, + }, + + required: ['entity', 'reason', 'confidence'], + }, + }, + }, + required: ['related_entities'], + } as const, + }) + ); + return completeEvent.output; + }) + ); + + const foundEntityIds = foundEntities.map(({ entity: foundEntity }) => stringify(foundEntity)); + + const relatedEntities = allEvents + .flat() + .flatMap((event) => { + return event.related_entities.map((item) => { + return { + entity: { [item.entity.field]: item.entity.value }, + reason: item.reason, + confidence: item.confidence, + }; + }); + }) + .filter((item) => { + return foundEntityIds.includes(stringify(item.entity)); + }); + + return { + relatedEntities, + }; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts new file mode 100644 index 0000000000000..05392db2d502c --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/index.ts @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { Logger } from '@kbn/logging'; +import { + DocumentAnalysis, + TruncatedDocumentAnalysis, +} from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns'; +import { + analyzeFetchedRelatedEntities, + RelatedEntityFromSearchResults, +} from './analyze_fetched_related_entities'; +import { + RelatedEntityKeywordSearch, + writeKeywordSearchForRelatedEntities, +} from './write_keyword_searches_for_related_entities'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; + +export type { RelatedEntityFromSearchResults }; + +export async function findRelatedEntities({ + connectorId, + inferenceClient, + start, + end, + index, + esClient, + entity, + analysis, + logger, + context, + ownPatterns, + patternsFromOtherEntities, + kbEntries, +}: { + connectorId: string; + inferenceClient: InferenceClient; + start: number; + end: number; + index: string | string[]; + esClient: ObservabilityElasticsearchClient; + entity: Record; + analysis: { + truncated: TruncatedDocumentAnalysis; + full: DocumentAnalysis; + }; + logger: Logger; + context: string; + ownPatterns: FieldPatternResultWithChanges[]; + patternsFromOtherEntities: FieldPatternResultWithChanges[]; + kbEntries: ScoredKnowledgeBaseEntry[]; +}): Promise<{ + searches: RelatedEntityKeywordSearch[]; + summaries: string[]; + foundEntities: RelatedEntityFromSearchResults[]; +}> { + const { groupingFields, searches } = await writeKeywordSearchForRelatedEntities({ + connectorId, + inferenceClient, + entity, + analysis: analysis.truncated, + ownPatterns, + context, + kbEntries, + }); + + const { summaries, foundEntities } = await analyzeFetchedRelatedEntities({ + entity, + connectorId, + start, + end, + esClient, + index, + inferenceClient, + searches, + groupingFields, + logger, + analysis, + ownPatterns, + patternsFromOtherEntities, + context, + kbEntries, + }); + + return { + searches, + summaries, + foundEntities, + }; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts new file mode 100644 index 0000000000000..6089ce748eb21 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/find_related_entities/write_keyword_searches_for_related_entities.ts @@ -0,0 +1,199 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import { FieldPatternResultWithChanges } from '@kbn/observability-utils-server/entities/get_log_patterns'; +import { RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { formatEntity } from '../../util/format_entity'; +import { serializeKnowledgeBaseEntries } from '../../util/serialize_knowledge_base_entries'; +import { toBlockquote } from '../../util/to_blockquote'; +import { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; + +const SYSTEM_PROMPT_ADDENDUM = `# Guide: Constructing Keyword Searches to Find Related Entities + +When investigating issues like elevated failure rates for a +specific endpoint, you can use the metadata at hand (IP addresses, +URLs, session IDs, tracing IDs, etc.) to build targeted keyword searches. +By extracting meaningful fragments from the data, you can correlate +related services or hosts across distributed systems. Here’s how +you can break down the metadata and format your searches. + +## Grouping fields + +Define grouping fields for the entities you want to extract. For +instance, "service.name" if you are looking for services, or +"kubernetes.pod.name" if you are looking for pods. Focus +on services, unless you are looking for deployment or +configuration changes. + +--- + +## Key Metadata and Search Format + +### Example: Investigating a service failure for \`/api/products\` + +You can break down various pieces of metadata into searchable +fragments. For each value, include a short description of its +relationship to the investigation. This value will be used +by the system to determine the relevance of a given entity +that matches the search request. + +### 1. **IP Address and Port** +- **Fragments:** + - \`"10.44.0.11:8080"\`: Full address. + - \`"10.44.0.11"\`: IP address only. + - \`"8080"\`: Port number. +- **Appears as:** This IP address and port are referenced as + and in the investigated entity +.. + +### 2. **Outgoing Request URL** +- **Fragments:** + - \`"http://called-service/api/product"\`: Full outgoing URL. + - \`"/api/product*"\`: Endpoint path. + - \`"called-service"\`: Service name of the upstream dependency. + - **Appears as:** These URL fragments appear as attributes.request.url + in the investigated entity . They could appear as referer + in the upstream dependency. + +### 3. **Parent and Span IDs** + - **Fragments:** + - \`"000aa"\`: Parent ID. + - \`"000bbb"\`: Span ID. + - **Relationship:** These ids appear as span.id and parent.id in the + investigated entity . They could be referring to spans + found on upstream or downstream services. + +--- + +## Example Search Format in JSON + +To structure your keyword search, format the fragments and their +relationships in a JSON array like this: + +\`\`\`json +{ + "groupingFields": [ "service.name" ], + "values": [ + { + "fragments": [ + "10.44.0.11:8080", + "10.44.0.11", + "8080" + ], + "appearsAs": "This IP address and port are referenced as and in the investigated entity ." + }, + { + "fragments": [ + "http:///api/product", + "/api/product", + "" + ], + "relationship": "These URL fragments appear as attributes.request.url in the investigated entity ." + }, + { + "fragments": [ + "000aa", + "000bbb" + ], + "relationship": " These ids appear as span.id and parent.id in the investigated entity . They could be referring to spans found on upstream or downstream services" + } + ] +}`; + +export interface RelatedEntityKeywordSearch { + fragments: string[]; + appearsAs: string; +} + +export async function writeKeywordSearchForRelatedEntities({ + connectorId, + inferenceClient, + entity, + analysis, + ownPatterns, + context, + kbEntries, +}: { + connectorId: string; + inferenceClient: InferenceClient; + entity: Record; + analysis: TruncatedDocumentAnalysis; + ownPatterns: FieldPatternResultWithChanges[]; + context: string; + kbEntries: ScoredKnowledgeBaseEntry[]; +}): Promise<{ + groupingFields: string[]; + searches: RelatedEntityKeywordSearch[]; +}> { + const logPatternsPrompt = ownPatterns.length + ? JSON.stringify( + ownPatterns.map((pattern) => ({ regex: pattern.regex, sample: pattern.sample })) + ) + : 'No log patterns found'; + + return inferenceClient + .output({ + id: 'extract_keyword_searches', + connectorId, + system: `${RCA_SYSTEM_PROMPT_BASE} + + ${RCA_PROMPT_ENTITIES}`, + input: `Your current task is to to extract keyword searches + to find related entities to the entity ${formatEntity(entity)}, + based on the following context: + + ## Investigation context + ${toBlockquote(context)} + + ${serializeKnowledgeBaseEntries(kbEntries)} + + ## Data analysis + ${JSON.stringify(analysis)} + + ## Log patterns + + ${logPatternsPrompt} + + ## Instructions + ${SYSTEM_PROMPT_ADDENDUM}`, + schema: { + type: 'object', + properties: { + groupingFields: { + type: 'array', + items: { + type: 'string', + }, + }, + searches: { + type: 'array', + items: { + type: 'object', + properties: { + fragments: { + type: 'array', + items: { + type: 'string', + }, + }, + appearsAs: { + type: 'string', + description: + 'Describe in what fields these values appear as in the investigated entity. You can mention multiple fields if applicable', + }, + }, + required: ['fragments', 'appearsAs'], + }, + }, + }, + required: ['searches', 'groupingFields'], + } as const, + }) + .then((event) => event.output); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts new file mode 100644 index 0000000000000..799755abec2e5 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/generate_timeline/index.ts @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { RootCauseAnalysisContext } from '../../types'; +import { stringifySummaries } from '../../util/stringify_summaries'; + +type SignificantEventSeverity = 'info' | 'unusual' | 'warning' | 'critical'; + +type SignificantEventType = 'alert' | 'slo' | 'event'; + +export interface SignificantEvent { + severity: SignificantEventSeverity; + '@timestamp'?: string; + description: string; + type: SignificantEventType; +} + +export interface SignificantEventsTimeline { + events: SignificantEvent[]; +} + +export async function generateSignificantEventsTimeline({ + report, + rcaContext, +}: { + report: string; + rcaContext: RootCauseAnalysisContext; +}): Promise { + const { connectorId, inferenceClient } = rcaContext; + + return await inferenceClient + .output({ + id: 'generate_timeline', + system: RCA_SYSTEM_PROMPT_BASE, + connectorId, + input: `Your current task is to generate a timeline + of significant events, based on the given RCA report, + according to a structured schema. This timeline will + be presented to the user as a visualization. + + ${stringifySummaries(rcaContext)} + + # Report + + ${report} + `, + schema: { + type: 'object', + properties: { + events: { + type: 'array', + items: { + type: 'object', + properties: { + timestamp: { + type: 'string', + description: 'The ISO timestamp of when the event occurred', + }, + severity: { + type: 'string', + enum: ['info', 'unusual', 'warning', 'critical'], + }, + type: { + type: 'string', + enum: ['alert', 'slo', 'event'], + }, + description: { + type: 'string', + description: 'A description of the event', + }, + }, + required: ['severity', 'description'], + }, + }, + }, + required: ['events'], + } as const, + }) + .then((timelineCompleteEvent) => { + return { + events: timelineCompleteEvent.output.events.map((event) => { + return { + '@timestamp': event.timestamp, + severity: event.severity, + type: event.type ?? 'event', + description: event.description, + }; + }), + }; + }); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts new file mode 100644 index 0000000000000..8fbe9a43defea --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/get_knowledge_base_entries/index.ts @@ -0,0 +1,185 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ShortIdTable } from '@kbn/observability-ai-assistant-plugin/common'; +import { decode, encode } from 'gpt-tokenizer'; +import { orderBy, sumBy } from 'lodash'; +import { RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { RootCauseAnalysisContext } from '../../types'; +import { formatEntity } from '../../util/format_entity'; +import { toBlockquote } from '../../util/to_blockquote'; + +export interface ScoredKnowledgeBaseEntry { + id: string; + text: string; + tokens: number; + score: number; + truncated?: { + tokens: number; + text: string; + }; +} + +export async function getKnowledgeBaseEntries({ + entity, + context, + rcaContext, + maxTokens: maxTokensForEntries, +}: { + entity: Record; + context: string; + rcaContext: RootCauseAnalysisContext; + maxTokens: number; +}): Promise { + const response = await rcaContext.observabilityAIAssistantClient.recall({ + queries: [ + ...Object.values(entity).map((value) => ({ text: value, boost: 3 })), + { text: context }, + ], + limit: { + tokenCount: Number.MAX_VALUE, + }, + }); + + const { inferenceClient, connectorId } = rcaContext; + + const shortIdTable = new ShortIdTable(); + + const system = RCA_SYSTEM_PROMPT_BASE; + + const input = `Re-order the attached documents, based on relevance to the context. + Score them between 1 and 5, based on their relative relevance to each other. The + most relevant doc should be scored 5, and the least relevant doc should be scored + 1. + + # Entity + + ${formatEntity(entity)} + + # Context + + ${toBlockquote(context)} + `; + + const maxTokensForScoring = rcaContext.tokenLimit - encode(system + input).length - 1_000; + + const entriesWithTokens = response.map((entry) => { + return { + id: entry.id, + text: entry.text, + tokens: encode(entry.text), + }; + }); + + const totalTokenCount = sumBy(entriesWithTokens, (entry) => entry.tokens.length); + + const truncatedEntriesWithShortIds = entriesWithTokens.map((entry) => { + const tokensForEntry = Math.floor( + (entry.tokens.length / totalTokenCount) * maxTokensForScoring + ); + + const truncatedText = decode(entry.tokens.slice(0, tokensForEntry)); + const isTruncated = tokensForEntry < entry.tokens.length; + + return { + id: entry.id, + tokens: entry.tokens, + shortId: shortIdTable.take(entry.id), + text: entry.text, + truncatedText, + isTruncated, + }; + }); + + const scoredEntries = await inferenceClient.output({ + id: 'score_entries', + connectorId, + system: RCA_SYSTEM_PROMPT_BASE, + input: `${input} + + ${truncatedEntriesWithShortIds + .map((entry) => { + return `# ID: ${entry.shortId} + + ## Text (${entry.isTruncated ? `truncated` : `not truncated `}) + + ${toBlockquote(entry.truncatedText)} + `; + }) + .join('\n\n')} + `, + stream: false, + schema: { + type: 'object', + properties: { + docs: { + type: 'array', + items: { + type: 'object', + properties: { + score: { + type: 'number', + description: + 'A score between 1 and 5, with 5 being most relevant, and 1 being least relevant', + }, + id: { + type: 'string', + }, + }, + required: ['score', 'id'], + }, + }, + }, + required: ['docs'], + }, + } as const); + + const scoresById = new Map(scoredEntries.output.docs.map((doc) => [doc.id, doc.score])); + + const entriesWithScore = truncatedEntriesWithShortIds.map((entry) => { + const score = scoresById.get(entry.shortId) ?? 0; + return { + ...entry, + score, + }; + }); + + const sortedEntries = orderBy(entriesWithScore, (entry) => entry.score, 'desc'); + + const returnedEntries: ScoredKnowledgeBaseEntry[] = []; + + const tokensLeft = maxTokensForEntries; + + sortedEntries.forEach((entry) => { + if (entry.tokens.length <= tokensLeft) { + returnedEntries.push({ + id: entry.id, + text: entry.text, + tokens: entry.tokens.length, + score: entry.score, + }); + return; + } + + const tokensToTake = tokensLeft; + if (tokensToTake > 0) { + const tookTokens = entry.tokens.slice(0, tokensToTake); + returnedEntries.push({ + id: entry.id, + text: entry.text, + tokens: entry.tokens.length, + score: entry.score, + truncated: { + text: decode(tookTokens), + tokens: tookTokens.length, + }, + }); + } + }); + + return returnedEntries; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts new file mode 100644 index 0000000000000..1e9ae7147211b --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/index.ts @@ -0,0 +1,268 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { getEntityKuery } from '@kbn/observability-utils-common/entities/get_entity_kuery'; +import { sortAndTruncateAnalyzedFields } from '@kbn/observability-utils-common/llm/log_analysis/sort_and_truncate_analyzed_fields'; +import { analyzeDocuments } from '@kbn/observability-utils-server/entities/analyze_documents'; +import { getDataStreamsForEntity } from '@kbn/observability-utils-server/entities/get_data_streams_for_entity'; +import { getAlertsForEntity } from '@kbn/observability-utils-server/entities/signals/get_alerts_for_entity'; +import { getSlosForEntity } from '@kbn/observability-utils-server/entities/signals/get_slos_for_entity'; +import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { RootCauseAnalysisContext } from '../../types'; +import { stringifySummaries } from '../../util/stringify_summaries'; +import { analyzeLogPatterns } from '../analyze_log_patterns'; +import { describeEntity } from '../describe_entity'; +import { describeLogPatterns } from '../describe_log_patterns'; +import { findRelatedEntities } from '../find_related_entities'; +import { extractRelatedEntities } from '../find_related_entities/extract_related_entities'; +import { writeEntityInvestigationReport } from '../write_entity_investigation_report'; +import { EntityInvestigation } from './types'; +import { getKnowledgeBaseEntries } from '../get_knowledge_base_entries'; + +export type { EntityInvestigation }; + +export interface EntityInvestigationParameters { + entity: Record; + rcaContext: RootCauseAnalysisContext; + context: string; +} + +export async function investigateEntity( + parameters: EntityInvestigationParameters +): Promise { + const { + entity, + rcaContext, + rcaContext: { + inferenceClient, + connectorId, + start, + end, + esClient, + logger: parentLogger, + indices, + }, + context, + } = parameters; + const kuery = getEntityKuery(entity); + + const logger = parentLogger.get('investigateEntity'); + + logger.debug(() => `Investigating entity: ${JSON.stringify(parameters.entity)}`); + + const kbPromise = getKnowledgeBaseEntries({ + entity, + context, + rcaContext, + maxTokens: 4_000, + }).catch((error) => { + logger.error(`Could not fetch entries from knowledge base`); + logger.error(error); + return []; + }); + + const [{ dataStreams }, alerts, slos] = await getSignals({ ...parameters, kuery }); + + logger.debug( + () => + `Signals for entity ${JSON.stringify(entity)}: ${dataStreams.length} data streams, ${ + alerts.length + } alerts, ${slos.length} slos` + ); + + if (!dataStreams.length) { + return undefined; + } + + const fullAnalysis = await analyzeDataStreamsForEntity({ + start, + end, + esClient, + kuery, + dataStreams, + }); + + const truncatedAnalysis = sortAndTruncateAnalyzedFields(fullAnalysis); + + const kbEntries = await kbPromise; + + const { ownPatterns, patternsFromOtherEntities } = await analyzeLogPatterns({ + allAnalysis: [{ index: dataStreams, analysis: truncatedAnalysis }], + entity, + system: stringifySummaries(rcaContext), + cutoff: { + significance: 'high', + }, + rcaContext, + kbEntries, + }); + + logger.trace( + () => `Analyzed log patterns: ${JSON.stringify({ ownPatterns, patternsFromOtherEntities })}` + ); + + const entityReportPromise = Promise.all([ + describeEntity({ + inferenceClient, + analysis: truncatedAnalysis, + connectorId, + contextForEntityInvestigation: context, + entity, + ownPatterns, + kbEntries, + }), + describeLogPatterns({ + analysis: truncatedAnalysis, + connectorId, + contextForEntityInvestigation: context, + entity, + inferenceClient, + ownPatterns, + patternsFromOtherEntities, + kbEntries, + }), + ]).then(([entityDescription, logPatternDescription]) => { + return writeEntityInvestigationReport({ + connectorId, + inferenceClient, + entityDescription, + logPatternDescription, + contextForEntityInvestigation: context, + entity, + }).then((report) => { + return { + description: entityDescription, + logPatternDescription, + report, + }; + }); + }); + + const [entityReport, relatedEntitiesResults] = await Promise.all([ + entityReportPromise, + findRelatedEntities({ + connectorId, + end, + entity, + esClient, + index: indices.logs, + inferenceClient, + logger, + start, + context, + analysis: { + full: fullAnalysis, + truncated: truncatedAnalysis, + }, + ownPatterns, + patternsFromOtherEntities, + kbEntries, + }).then(async ({ searches, summaries, foundEntities }) => { + const report = await entityReportPromise; + + const { relatedEntities } = await extractRelatedEntities({ + entityReport: report.report, + summaries, + entity, + foundEntities, + context, + rcaContext, + }); + + return { + relatedEntities, + foundEntities, + searches, + summaries, + }; + }), + ]); + + return { + entity, + summary: [ + entityReport.description, + entityReport.logPatternDescription.content, + entityReport.report, + ].join('\n\n'), + relatedEntities: relatedEntitiesResults.relatedEntities, + attachments: { + alerts, + slos, + analysis: truncatedAnalysis, + ownPatterns, + patternsFromOtherEntities, + searches: relatedEntitiesResults.searches, + relatedEntitiesSummaries: relatedEntitiesResults.summaries, + kbEntries, + }, + }; +} + +async function getSignals({ + entity, + kuery, + rcaContext: { start, end, esClient, rulesClient, alertsClient, indices, spaceId }, +}: { + kuery: string; + entity: Record; + rcaContext: Pick< + RootCauseAnalysisContext, + 'start' | 'end' | 'esClient' | 'rulesClient' | 'alertsClient' | 'indices' | 'spaceId' + >; +}) { + return await Promise.all([ + getDataStreamsForEntity({ + esClient, + kuery, + index: indices.logs.concat(indices.traces), + }), + getAlertsForEntity({ entity, rulesClient, alertsClient, start, end, size: 10 }).then( + (alertsResponse) => { + return alertsResponse.hits.hits.map((hit) => hit._source!); + } + ), + getSlosForEntity({ + entity, + start, + end, + esClient, + size: 1000, + sloSummaryIndices: indices.sloSummaries, + spaceId, + }).then((slosResponse) => { + return slosResponse.hits.hits.map((hit) => hit._source); + }), + ]); +} + +async function analyzeDataStreamsForEntity({ + start, + end, + dataStreams, + esClient, + kuery, +}: { + start: number; + end: number; + kuery: string; + dataStreams: string[]; + esClient: ObservabilityElasticsearchClient; +}) { + const analysis = await analyzeDocuments({ + esClient, + start, + end, + index: dataStreams, + kuery, + }); + + return { + ...analysis, + fields: analysis.fields.filter((field) => !field.empty), + }; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts new file mode 100644 index 0000000000000..e9d042e88a6ec --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/prompts.ts @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { formatEntity } from '../../util/format_entity'; +import { toBlockquote } from '../../util/to_blockquote'; + +export const getInvestigateEntityTaskPrompt = ({ + entity, + contextForEntityInvestigation, +}: { + entity: Record; + contextForEntityInvestigation: string; +}) => `## Entity-Based Investigation: Task Guide + +In the investigation process, you are currently investigating the entity +${formatEntity(entity)}. The context given for this investigation is: + +${toBlockquote(contextForEntityInvestigation)}`; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts new file mode 100644 index 0000000000000..e29735c894f22 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/investigate_entity/types.ts @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ParsedTechnicalFields } from '@kbn/rule-registry-plugin/common'; +import type { TruncatedDocumentAnalysis } from '@kbn/observability-utils-common/llm/log_analysis/document_analysis'; +import type { AnalyzeLogPatternOutput } from '../analyze_log_patterns'; +import type { RelatedEntityDescription } from '../find_related_entities/extract_related_entities'; +import type { RelatedEntityKeywordSearch } from '../find_related_entities/write_keyword_searches_for_related_entities'; +import type { ScoredKnowledgeBaseEntry } from '../get_knowledge_base_entries'; + +export interface EntityInvestigation { + entity: Record; + summary: string; + relatedEntities: RelatedEntityDescription[]; + attachments: { + analysis: TruncatedDocumentAnalysis; + slos: Array< + Record & { + status: 'VIOLATED' | 'DEGRADED' | 'HEALTHY' | 'NO_DATA'; + } + >; + alerts: ParsedTechnicalFields[]; + searches: RelatedEntityKeywordSearch[]; + relatedEntitiesSummaries: string[]; + kbEntries: ScoredKnowledgeBaseEntry[]; + } & AnalyzeLogPatternOutput; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts new file mode 100644 index 0000000000000..c0b5367cded31 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/observe_investigation_results/index.ts @@ -0,0 +1,239 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { MessageRole } from '@kbn/inference-common'; +import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { RCA_PROMPT_CHANGES, RCA_PROMPT_ENTITIES, RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { ObservationToolMessage, RootCauseAnalysisContext } from '../../types'; +import { formatEntity } from '../../util/format_entity'; +import { getPreviouslyInvestigatedEntities } from '../../util/get_previously_investigated_entities'; +import { stringifySummaries } from '../../util/stringify_summaries'; +import { toBlockquote } from '../../util/to_blockquote'; +import { EntityInvestigation } from '../investigate_entity/types'; + +const INITIAL_OBSERVATION_TASK_GUIDE = `Your current task is to write observations based on the initial context. You +should acknowledge the context briefly, and mention key observations from the +initial context. + +Then, briefly describe what change you are looking for. Are the symptoms: + +- rapid, or gradual onset? +- subtle or prounounced? + +If possible, mention the time of the change. + +When considering the initial context, reason about relevant changes to observe, +such as short-lived versus persistent changes or singular events, like scale +events, rollouts, or configuration changes. + +After, taking into account the capabilities you have, plan for next steps. + +Describe the next step, which is to investigate the entity found in the initial +context. Only mention the entity (as a field/value). Do not mention any +additional filters. + +Be brief, accurate, and critical.`; + +const INVESTIGATION_ADDENDUM = ` +**Task Guide: Observe the investigation results** + +You will receive one or more investigations. These investigations mention: +- a general characterization of the entity based on its data +- relevant log patterns +- other signals, like SLOs or alerts +- possibly related entities, and investigation suggestions + +First, you should briefly acknowledge the initial context of the investigation +and where it stands. + +Next, you should note key observations from the investigations, and how they relate +to the ongoing investigation. + +After, you should generate a timeline of significant events. For this timeline, +include events from previous observations. Additionally, include significant +events from the inspected investigations. Group events together in a topic +if needed. Significant events are things like: an increase in errors, deployment +events, a drop to zero for access logs, etc. In most cases, you do not want to +mention individual log messages, unless it is a particularly significant event +by itself. + +For each event, mention: + +- the timestamp of the event +- the nature of the change, if applicable +- data from the event, such as specific log patterns, alerts or slos +- the meaning of the event and how it is related to the initial context + +Do not include: +- the time range from the investigation itself (start/end) +- other events that occurred during the investigation itself, like running +log analysis or other patterns + +## Correlating significant events + +When correlating significant events, pay close attention to the timestamp of +the mentioned change, and how it correlates to the timestamp of the change you +want to correlate it to, such as the start time of an alert. An alert might be +delayed, but if you see many changes around a specific timestamp, and some of +them being significantly earlier, or later, the latter group is likely not +relevant. + +## Context and reasoning + +Next, use the timeline of events and the new observations to revise your +analysis of the initial context and the ongoing investigation. Reason about +how changes could be related: are they close in time, or far removed, compared +to others? Is the type of change similar? Is the magnitude of the change similar?`; + +const SUGGEST_NEXT_STEPS_PROMPT = ` +Next, consider next steps. it's always important to contextualize the significant +in the initial context of the investigation. Focus on your strongest pieces of +evidence. Your observations should be related to finding out the cause of the +initial context of the investigation - you should not concern yourself with the +impact on _other_ entities. + +Suggest to conclude the process when: + +- there is a clear and obvious root cause +- you have investigated more than 10 entities +- OR you cannot find any unhealthy entities +- there are no more entities to investigate + +If the conclusion is you need to continue your investigation, mention the entities +that should be investigated. Do this only if there is a significant change one of +the related entities will give you new insights into the root cause (instead of +just the impact). DO NOT investigate an entity more than once.`; + +const CONCLUDE_PROCESS_PROMPT = ` +You must suggest to conclude the process and write the final report, as your +capabilities do not allow you go investigate more entities.`; + +function getInitialPrompts(initialContext: string) { + return { + system: `${RCA_SYSTEM_PROMPT_BASE} + + ${RCA_PROMPT_ENTITIES} + + ${RCA_PROMPT_CHANGES}`, + input: `## Context + + ${initialContext} + + ${INITIAL_OBSERVATION_TASK_GUIDE}`, + }; +} + +function getObserveInvestigationsPrompts({ + investigations, + summaries, + rcaContext, +}: { + investigations: EntityInvestigation[]; + summaries: ObservationStepSummary[]; + rcaContext: RootCauseAnalysisContext; +}) { + const previouslyInvestigatedEntities = getPreviouslyInvestigatedEntities(rcaContext); + + const canContinue = + summaries.length <= 5 && + investigations.filter((investigation) => 'summary' in investigation).length <= 10; + + const investigationsPrompt = `Observe the following investigations that recently concluded: + ${investigations + .map((investigation, index) => { + return `## ${index + 1}: investigation of ${formatEntity(investigation.entity)} + + ${toBlockquote(investigation.summary)} + + ${ + investigation.relatedEntities.length + ? `### Relationships to ${formatEntity(investigation.entity)} + + ${toBlockquote(JSON.stringify(investigation.relatedEntities))} + + ` + : `` + } + `; + }) + .join('\n\n')} + + ${INVESTIGATION_ADDENDUM} + + ${ + canContinue + ? `${SUGGEST_NEXT_STEPS_PROMPT} + + ${ + previouslyInvestigatedEntities.length + ? `The following entities have been investigated previously. + Do not investigate them again: + + ${previouslyInvestigatedEntities.map((entity) => `- ${JSON.stringify(entity)}`).join('\n')}` + : `` + } + + ` + : CONCLUDE_PROCESS_PROMPT + } + + `; + + const systemPrompt = `${RCA_SYSTEM_PROMPT_BASE} + + ${RCA_PROMPT_ENTITIES} + + ${stringifySummaries(rcaContext)}`; + + return { + system: systemPrompt, + input: investigationsPrompt, + }; +} + +export interface ObservationStepSummary { + investigations: EntityInvestigation[]; + content: string; +} + +export function observeInvestigationResults({ + rcaContext, + rcaContext: { logger, events, initialContext, inferenceClient, connectorId }, + investigations, +}: { + rcaContext: RootCauseAnalysisContext; + investigations: EntityInvestigation[]; +}): Promise { + const summaries = events + .filter((event): event is ObservationToolMessage => { + return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME; + }) + .map((event) => event.data); + + logger.debug( + () => + `Observing ${investigations.length} investigations (${summaries.length} previous summaries)` + ); + + const { system, input } = investigations.length + ? getObserveInvestigationsPrompts({ summaries, investigations, rcaContext }) + : getInitialPrompts(initialContext); + + return inferenceClient + .output({ + id: 'observe', + system, + input, + connectorId, + }) + .then((outputCompleteEvent) => { + return { + content: outputCompleteEvent.content, + investigations, + }; + }); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts new file mode 100644 index 0000000000000..58ef6f4ab5d04 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_entity_investigation_report/index.ts @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { RCA_PROMPT_SIGNIFICANT_EVENTS, RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { formatEntity } from '../../util/format_entity'; +import { toBlockquote } from '../../util/to_blockquote'; +import { LogPatternDescription } from '../describe_log_patterns'; +import { getInvestigateEntityTaskPrompt } from '../investigate_entity/prompts'; + +export async function writeEntityInvestigationReport({ + inferenceClient, + connectorId, + entity, + contextForEntityInvestigation, + entityDescription, + logPatternDescription, +}: { + inferenceClient: InferenceClient; + connectorId: string; + entity: Record; + contextForEntityInvestigation: string; + entityDescription: string; + logPatternDescription: LogPatternDescription; +}): Promise { + const system = RCA_SYSTEM_PROMPT_BASE; + + const shouldGenerateTimeline = logPatternDescription.interestingPatternCount > 0; + + let input = `${getInvestigateEntityTaskPrompt({ entity, contextForEntityInvestigation })} + + ## Entity description + + ${toBlockquote(entityDescription)} + + ## Log pattern analysis + + ${toBlockquote(logPatternDescription.content)} + + # Current task + + Your current task is to write a report the investigation into ${formatEntity(entity)}. + The log pattern analysis and entity description will be added to your report (at the + top), so you don't need to repeat anything in it.`; + + if (shouldGenerateTimeline) { + input += `${RCA_PROMPT_SIGNIFICANT_EVENTS}\n\n`; + } + + input += `## Context and reasoning + + Reason about the role that the entity plays in the investigation, given the context. + mention evidence (hard pieces of data) when reasoning. + + Do not suggest next steps - this will happen in a follow-up task.`; + + if (shouldGenerateTimeline) { + input += `## Format + + Your reply should only contain two sections: + + - Timeline of significant events + - Context and reasoning + `; + } else { + input += `## Format + Your reply should only contain one section: + - Context and reasoning + `; + } + + const response = await inferenceClient.output({ + id: 'generate_entity_report', + connectorId, + input, + system, + }); + + return response.content; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts new file mode 100644 index 0000000000000..db7aa9f3b7aa3 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tasks/write_final_report/index.ts @@ -0,0 +1,191 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { RCA_PROMPT_TIMELINE_GUIDE, RCA_SYSTEM_PROMPT_BASE } from '../../prompts'; +import { RootCauseAnalysisContext } from '../../types'; +import { stringifySummaries } from '../../util/stringify_summaries'; + +const SYSTEM_PROMPT_ADDENDUM = ` +# Guide: Writing a Root Cause Analysis (RCA) Report + +A Root Cause Analysis (RCA) report is the final step in a thorough +investigation. Its purpose is to provide a clear, evidence-backed explanation of +the underlying cause of an issue, as well as the impact. Even if no definitive +root cause is identified, the report should reflect the findings, the hypotheses +considered, and why certain assumptions were rejected. This guide will help +structure an RCA that distinguishes between cause and effect, organizes +evidence, and presents a timeline of key events. + +--- + +## 1. Introduction + +Start by summarizing the reason for the investigation. Provide a brief overview +of the incident, the affected services or entities, and the initial alerts or +issues that triggered the investigation. + +- **What prompted the investigation?** +- **Which entities were investigated?** +- **Was there a specific hypothesis proposed at the outset?** + +### Example: +- **Overview:** This RCA report investigates the elevated error rates in +\`myservice\` and its downstream dependencies, first identified through an SLO +breach for the \`/api/submit\` endpoint. The investigation considered multiple +entities and possible causes, including resource exhaustion and upstream service +failures. + +--- + +## 2. Investigation Summary + +Summarize the key steps of the investigation, outlining: +- **What hypotheses were proposed and why.** +- **Which entities were investigated (e.g., \`myservice\`, \`myotherservice\`, +\`notification-service\`).** +- **Which hypotheses were discarded and why.** + +For each hypothesis, present the supporting or contradicting evidence. + +- **Strong Indicators:** Clear, repeated evidence pointing toward or against a +hypothesis. +- **Weak Indicators:** Inconsistent or ambiguous data that did not provide +conclusive answers. + +#### Example Format: +- **Hypothesis 1:** Resource exhaustion in \`myservice\` caused elevated error +rates. + - **Evidence:** + - **Strong:** Memory usage exceeded 90% during the incident. + - **Weak:** CPU usage remained stable, making resource exhaustion a partial +explanation. + +- **Hypothesis 2:** Upstream latency from \`myotherservice\` caused delays. + - **Evidence:** + - **Strong:** API logs showed frequent retries and timeouts from +\`myotherservice\`. + - **Weak:** No errors were observed in \`myotherservice\` logs, suggesting an +issue isolated to \`myservice\`. + +--- + +## 3. Cause and Effect + +Differentiate between the **cause** (what initiated the issue) and the +**effect** (the impact or symptoms seen across the system). The cause should +focus on the root, while the effect describes the wider system response or +failure. + +- **Root Cause:** Identify the underlying problem, supported by strong evidence. +If no root cause is found, clearly state that the investigation did not lead to +a conclusive root cause. + +- **Impact:** Describe the downstream effects on other services, performance +degradation, or SLO violations. + +#### Example: +- **Cause:** The root cause of the elevated error rate was identified as a +memory leak in \`myservice\` that gradually led to resource exhaustion. +- **Effect:** This led to elevated latency and increased error rates at the +\`/api/submit\` endpoint, impacting downstream services like +\`notification-service\` that rely on responses from \`myservice\`. + +--- + +## 4. Evidence for Root Cause + +Present a structured section summarizing all the evidence that supports the +identified root cause. If no root cause is identified, outline the most +significant findings that guided or limited the investigation. + +- **Log Patterns:** Describe any abnormal log patterns observed, including +notable change points. +- **Alerts and SLOs:** Mention any alerts or breached SLOs that were triggered, +including their relevance to the investigation. +- **Data Analysis:** Include any data trends or patterns that were analyzed +(e.g., resource usage spikes, network traffic). + +#### Example: +- **Memory Usage:** Logs showed a steady increase in memory consumption starting +at 10:00 AM, peaking at 12:00 PM, where memory usage surpassed 90%, triggering +the alert. +- **Error Rate Logs:** Error rates for \`/api/submit\` began increasing around +11:30 AM, correlating with the memory pressure in \`myservice\`. +- **API Logs:** \`myotherservice\` API logs showed no internal errors, ruling out +an upstream dependency as the primary cause. + +--- + +## 5. Proposed Impact + +Even if the root cause is clear, it is important to mention the impact of the +issue on the system, users, and business operations. This includes: +- **Affected Services:** Identify the services impacted (e.g., downstream +dependencies). +- **Performance Degradation:** Describe any SLO breaches or performance +bottlenecks. +- **User Impact:** Explain how users or clients were affected (e.g., higher +latency, failed transactions). + +#### Example: +- **Impact:** The memory leak in \`myservice\` caused service degradation over a +2-hour window. This affected \`/api/submit\`, causing delays and failed +requests, ultimately impacting user-facing services relying on that endpoint. + +--- + +## 6. Timeline of Significant Events + +${RCA_PROMPT_TIMELINE_GUIDE} + +--- + +## 7. Conclusion and Next Steps + +Summarize the conclusions of the investigation: +- If a root cause was identified, confirm it with the strongest supporting +evidence. +- If no root cause was found, state that clearly and suggest areas for further +investigation or monitoring. + +Finally, outline the next steps: +- **Fixes or Mitigations:** Recommend any immediate actions (e.g., patch +deployment, configuration changes). +- **Monitoring Improvements:** Suggest new alerts or monitoring metrics based on +lessons learned. +- **Further Investigations:** If necessary, propose any follow-up investigations +to gather more evidence. + +#### Example: +- **Conclusion:** The root cause of the incident was a memory leak in +\`myservice\`, leading to resource exhaustion and elevated error rates at +\`/api/submit\`. The leak has been patched, and monitoring has been improved to +detect memory spikes earlier. +- **Next Steps:** Monitor memory usage for the next 24 hours to ensure no +recurrence. Investigate adding a memory ceiling for \`myservice\` to prevent +future resource exhaustion.`; + +export async function writeFinalReport({ + rcaContext, +}: { + rcaContext: RootCauseAnalysisContext; +}): Promise { + const { inferenceClient, connectorId } = rcaContext; + + return await inferenceClient + .output({ + id: 'write_final_report', + connectorId, + system: `${RCA_SYSTEM_PROMPT_BASE} + + ${SYSTEM_PROMPT_ADDENDUM}`, + input: `Write the RCA report, based on the observations. + + ${stringifySummaries(rcaContext)}`, + }) + .then((event) => event.content); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts new file mode 100644 index 0000000000000..8c3082aa320c4 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/tools.ts @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + RCA_END_PROCESS_TOOL_NAME, + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from '@kbn/observability-ai-common/root_cause_analysis/tool_names'; + +export const RCA_TOOLS = { + [RCA_OBSERVE_TOOL_NAME]: { + description: `Request an observation from another agent on + the results of the returned investigations. The title should + cover key new observations from the initial context or + completed investigations, not anything about next steps.`, + schema: { + type: 'object', + properties: { + title: { + type: 'string', + description: `A short title w/ the key new observations that will be displayed on top of a collapsible panel.`, + }, + }, + required: ['title'], + }, + }, + [RCA_END_PROCESS_TOOL_NAME]: { + description: `End the RCA process by requesting a + written report from another agent`, + schema: { + type: 'object', + properties: { + endProcess: { + type: 'boolean', + }, + }, + required: ['endProcess'], + }, + }, + [RCA_INVESTIGATE_ENTITY_TOOL_NAME]: { + description: `Investigate an entity`, + schema: { + type: 'object', + properties: { + context: { + type: 'string', + description: `Context that will be used in the investigation of the entity. Mention the initial context + of the investigation, a very short summary of the last observation if applicable, and pieces + of data that can be relevant for the investigation into the entity, such as timestamps or + keywords`, + }, + entity: { + type: 'object', + description: `The entity you want to investigate, such as a service. Use + the Elasticsearch field names and values. For example, for services, use + the following structure: ${JSON.stringify({ + entity: { field: 'service.name', value: 'opbeans-java' }, + })}`, + properties: { + field: { + type: 'string', + }, + value: { + type: 'string', + }, + }, + required: ['field', 'value'], + }, + }, + required: ['context', 'entity'], + }, + }, +} as const; diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts new file mode 100644 index 0000000000000..6b6fa1886309c --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/types.ts @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + ToolMessage, + UserMessage, + ToolCallsOf, + ToolChoice, + AssistantMessageOf, +} from '@kbn/inference-common'; +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { Logger } from '@kbn/logging'; +import { AlertsClient } from '@kbn/rule-registry-plugin/server'; +import { RulesClient } from '@kbn/alerting-plugin/server'; +import { ObservabilityAIAssistantClient } from '@kbn/observability-ai-assistant-plugin/server'; +import { ObservabilityElasticsearchClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { + RCA_END_PROCESS_TOOL_NAME, + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from '@kbn/observability-ai-common/root_cause_analysis'; +import { ObservationStepSummary } from './tasks/observe_investigation_results'; +import { EntityInvestigation } from './tasks/investigate_entity'; +import { SignificantEventsTimeline } from './tasks/generate_timeline'; +import { RCA_TOOLS } from './tools'; + +export type EndProcessToolMessage = ToolMessage< + typeof RCA_END_PROCESS_TOOL_NAME, + { + report: string; + timeline: SignificantEventsTimeline; + } +>; + +export type ObservationToolMessage = ToolMessage< + typeof RCA_OBSERVE_TOOL_NAME, + { + content: string; + }, + ObservationStepSummary +>; + +export type InvestigateEntityToolMessage = ToolMessage< + typeof RCA_INVESTIGATE_ENTITY_TOOL_NAME, + Pick, + { attachments: EntityInvestigation['attachments'] } +>; + +export type ToolErrorMessage = ToolMessage< + 'error', + { + error: { + message: string; + }; + } +>; + +export type RootCauseAnalysisEvent = + | RootCauseAnalysisToolMessage + | ToolErrorMessage + | UserMessage + | AssistantMessageOf<{ + tools: typeof RCA_TOOLS; + toolChoice?: ToolChoice; + }>; + +export type RootCauseAnalysisToolRequest< + TToolName extends keyof typeof RCA_TOOLS = keyof typeof RCA_TOOLS +> = ToolCallsOf<{ + tools: Pick; +}>['toolCalls'][number]; + +export type RootCauseAnalysisToolMessage = + | EndProcessToolMessage + | InvestigateEntityToolMessage + | ObservationToolMessage; + +export interface RootCauseAnalysisContext { + initialContext: string; + start: number; + end: number; + events: RootCauseAnalysisEvent[]; + indices: { + logs: string[]; + traces: string[]; + sloSummaries: string[]; + }; + inferenceClient: InferenceClient; + tokenLimit: number; + connectorId: string; + esClient: ObservabilityElasticsearchClient; + alertsClient: AlertsClient; + rulesClient: RulesClient; + logger: Logger; + spaceId: string; + observabilityAIAssistantClient: ObservabilityAIAssistantClient; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts new file mode 100644 index 0000000000000..046d6d401c33a --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/call_tools.ts @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + Message, + ToolDefinition, + ToolChoice, + ToolCallsOf, + withoutChunkEvents, + withoutTokenCountEvents, + ToolMessage, + MessageOf, + MessageRole, +} from '@kbn/inference-common'; +import { InferenceClient } from '@kbn/inference-plugin/server'; +import { Logger } from '@kbn/logging'; +import { + defer, + last, + merge, + Observable, + of, + OperatorFunction, + share, + switchMap, + toArray, +} from 'rxjs'; + +interface CallToolOptions extends CallToolTools { + system: string; + messages: Message[]; + inferenceClient: InferenceClient; + connectorId: string; + logger: Logger; +} + +interface CallToolTools { + tools: Record; + toolChoice?: ToolChoice; +} + +type CallbackOf< + TCallToolTools extends CallToolTools, + TEmittedMessage extends Message +> = (parameters: { + messages: Message[]; + toolCalls: ToolCallsOf['toolCalls']; +}) => Observable; + +type GetNextRequestCallback = ({ + messages, + system, +}: { + messages: Message[]; + system: string; +}) => { system: string; messages: Message[] } & TCallToolTools; + +export function callTools( + { system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: TCallToolOptions, + callback: CallbackOf +): Observable>; + +export function callTools< + TCallToolOptions extends Omit = never, + TCallToolTools extends CallToolTools = never, + TEmittedMessage extends Message = never +>( + options: TCallToolOptions, + getNextRequest: GetNextRequestCallback, + callback: CallbackOf +): Observable; + +export function callTools( + { system, messages, inferenceClient, connectorId, tools, toolChoice, logger }: CallToolOptions, + ...callbacks: + | [GetNextRequestCallback, CallbackOf] + | [CallbackOf] +): Observable { + const callback = callbacks.length === 2 ? callbacks[1] : callbacks[0]; + + const getNextRequest = + callbacks.length === 2 + ? callbacks[0] + : (next: { messages: Message[]; system: string }) => { + return { + ...next, + tools, + toolChoice, + }; + }; + + const nextRequest = getNextRequest({ system, messages }); + + const chatComplete$ = defer(() => + inferenceClient.chatComplete({ + connectorId, + stream: true, + ...nextRequest, + }) + ); + + const asCompletedMessages$ = chatComplete$.pipe( + withoutChunkEvents(), + withoutTokenCountEvents(), + switchMap((event) => { + return of({ + role: MessageRole.Assistant as const, + content: event.content, + toolCalls: event.toolCalls, + }); + }) + ); + + const withToolResponses$ = asCompletedMessages$ + .pipe( + switchMap((message) => { + if (message.toolCalls.length) { + return merge( + of(message), + callback({ toolCalls: message.toolCalls, messages: messages.concat(message) }) + ); + } + return of(message); + }) + ) + .pipe(handleNext()); + + return withToolResponses$; + + function handleNext(): OperatorFunction { + return (source$) => { + const shared$ = source$.pipe(share()); + + const next$ = merge( + shared$, + shared$.pipe( + toArray(), + last(), + switchMap((nextMessages) => { + logger.debug(() => + JSON.stringify( + nextMessages.map((message) => { + return { + role: message.role, + toolCalls: 'toolCalls' in message ? message.toolCalls : undefined, + toolCallId: 'toolCallId' in message ? message.toolCallId : undefined, + }; + }) + ) + ); + + if (nextMessages[nextMessages.length - 1].role !== MessageRole.Assistant) { + const options: CallToolOptions = { + system, + connectorId, + inferenceClient, + messages: messages.concat(nextMessages), + tools, + toolChoice, + logger, + }; + const after$ = callTools(options, getNextRequest, callback); + return after$; + } + return of(); + }) + ) + ); + + return next$; + }; + } +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts new file mode 100644 index 0000000000000..0a174b6451604 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/chunk_output_calls.ts @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { encode } from 'gpt-tokenizer'; +import { uniqueId } from 'lodash'; + +interface TextWithId { + id: string; + text: string; +} + +interface Parameters { + system: string; + input: string; + tokenLimit: number; +} + +interface ChunkedOutputRequest { + input: string; + system: string; +} + +export function chunkOutputCalls({}: Parameters & { texts: string[] }): Array< + ChunkedOutputRequest & { + texts: string[]; + } +>; + +export function chunkOutputCalls({}: Parameters & { texts: TextWithId[] }): Array< + ChunkedOutputRequest & { + texts: TextWithId[]; + } +>; + +export function chunkOutputCalls({ + system, + input, + texts, + tokenLimit, +}: Parameters & { + texts: string[] | TextWithId[]; +}) { + const inputAndSystemPromptCount = encode(system).length + encode(input).length; + + if (!texts.length) { + return [{ system, input, texts: [] }]; + } + + const textWithIds = texts.map((text) => { + if (typeof text === 'string') { + return { + id: uniqueId(), + text, + }; + } + return text; + }); + + const textsWithCount = textWithIds.map(({ text, id }) => ({ + tokenCount: encode(text).length, + text, + id, + })); + + const chunks: Array<{ tokenCount: number; texts: TextWithId[] }> = []; + + textsWithCount.forEach(({ text, id, tokenCount }) => { + let chunkWithRoomLeft = chunks.find((chunk) => { + return chunk.tokenCount + tokenCount <= tokenLimit; + }); + + if (!chunkWithRoomLeft) { + chunkWithRoomLeft = { texts: [], tokenCount: inputAndSystemPromptCount }; + chunks.push(chunkWithRoomLeft); + } + chunkWithRoomLeft.texts.push({ text, id }); + chunkWithRoomLeft.tokenCount += tokenCount; + }); + + const hasTextWithIds = texts.some((text) => typeof text !== 'string'); + + return chunks.map((chunk) => { + const textsForChunk = hasTextWithIds + ? chunk.texts + : chunk.texts.map((text) => (typeof text === 'string' ? text : text.text)); + + return { + system, + input, + texts: textsForChunk, + }; + }); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts new file mode 100644 index 0000000000000..d4fa1e545653e --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/format_entity.ts @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export function formatEntity(entity: Record) { + return Object.entries(entity) + .map(([field, value]) => `${field}:${value}`) + .join('/'); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts new file mode 100644 index 0000000000000..e42a6a8039e23 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/get_previously_investigated_entities.ts @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { MessageRole } from '@kbn/inference-common'; +import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { InvestigateEntityToolMessage, RootCauseAnalysisContext } from '../types'; + +export function getPreviouslyInvestigatedEntities({ + events, +}: Pick) { + const investigationToolResponses = events.filter( + (event): event is InvestigateEntityToolMessage => { + return event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME; + } + ); + + return investigationToolResponses.map((event) => event.response.entity); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts new file mode 100644 index 0000000000000..39bc1a5b9a603 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/serialize_knowledge_base_entries.ts @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ScoredKnowledgeBaseEntry } from '../tasks/get_knowledge_base_entries'; +import { toBlockquote } from './to_blockquote'; + +export function serializeKnowledgeBaseEntries(entries: ScoredKnowledgeBaseEntry[]) { + if (!entries.length) { + return `## Knowledge base + + No relevant knowledge base entries were found. + `; + } + + const serializedEntries = entries + .filter((entry) => entry.score >= 3) + .map( + (entry) => `## Entry \`${entry.id}\ (score: ${entry.score}, ${ + entry.truncated ? `truncated` : `not truncated` + }) + + ${toBlockquote(entry.text)}` + ); + + return `## Knowledge base + + The following relevant entries were found in the knowledge base + + ${serializedEntries.join('\n\n')}`; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts new file mode 100644 index 0000000000000..80e2ab85a408b --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/stringify_summaries.ts @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { compact } from 'lodash'; +import { MessageRole } from '@kbn/inference-common'; +import { RCA_OBSERVE_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { formatEntity } from './format_entity'; +import { toBlockquote } from './to_blockquote'; +import { ObservationToolMessage, RootCauseAnalysisContext } from '../types'; + +export function stringifySummaries({ events }: RootCauseAnalysisContext): string { + const summaries = events + .filter((event): event is ObservationToolMessage => { + return event.role === MessageRole.Tool && event.name === RCA_OBSERVE_TOOL_NAME; + }) + .map((event) => event.data); + + if (!summaries.length) { + return `# Previous observations + + No previous observations`; + } + + return `# Previous observations + + ${summaries.map((summary, index) => { + const header = `## Observation #${index + 1}`; + + const entitiesHeader = summary.investigations.length + ? `### Investigated entities + + ${summary.investigations + .map((investigation) => `- ${formatEntity(investigation.entity)}`) + .join('\n')}` + : undefined; + + const summaryBody = `### Summary + + ${toBlockquote(summary.content)}`; + + return compact([header, entitiesHeader, summaryBody]).join('\n\n'); + })}`; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts new file mode 100644 index 0000000000000..38af158de611b --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/to_blockquote.ts @@ -0,0 +1,13 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export function toBlockquote(input: string): string { + return input + .split('\n') + .map((line) => `> ${line}`) + .join('\n'); +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts new file mode 100644 index 0000000000000..76fa0fcc379a3 --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/root_cause_analysis/util/validate_investigate_entity_tool_call.ts @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { MessageRole, ToolCallsOf } from '@kbn/inference-common'; +import { entityQuery } from '@kbn/observability-utils-common/es/queries/entity_query'; +import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { isEqual } from 'lodash'; +import { getEntitiesByFuzzySearch } from '@kbn/observability-utils-server/entities/get_entities_by_fuzzy_search'; +import { RCA_TOOLS } from '../tools'; +import { + InvestigateEntityToolMessage, + RootCauseAnalysisContext, + RootCauseAnalysisToolRequest, +} from '../types'; +import { formatEntity } from './format_entity'; + +interface EntityExistsResultExists { + exists: true; + entity: Record; +} + +interface EntityExistsResultDoesNotExist { + exists: false; + entity: Record; + suggestions: string[]; +} + +type EntityExistsResult = EntityExistsResultExists | EntityExistsResultDoesNotExist; + +export async function validateInvestigateEntityToolCalls({ + rcaContext, + toolCalls, +}: { + rcaContext: Pick; + toolCalls: RootCauseAnalysisToolRequest[]; +}) { + const { events, esClient, indices, start, end } = rcaContext; + + const previouslyInvestigatedEntities = events + .filter( + (event): event is InvestigateEntityToolMessage => + event.role === MessageRole.Tool && event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME + ) + .map((toolResponse) => toolResponse.response.entity); + + const investigateEntityToolCalls = toolCalls.filter( + ( + toolCall + ): toolCall is ToolCallsOf<{ + tools: Pick; + }>['toolCalls'][number] => toolCall.function.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME + ); + + if (!investigateEntityToolCalls.length) { + return []; + } + + const entitiesToInvestigate = investigateEntityToolCalls.map((toolCall) => { + const { entity: entityToInvestigate } = toolCall.function.arguments; + return { + [entityToInvestigate.field]: entityToInvestigate.value, + }; + }); + const entityExistsResponses: EntityExistsResult[] = await Promise.all( + entitiesToInvestigate.map(async (entity) => { + const response = await esClient.search('find_data_for_entity', { + track_total_hits: 1, + size: 0, + timeout: '1ms', + index: indices.logs.concat(indices.traces), + query: { + bool: { + filter: [...entityQuery(entity)], + }, + }, + }); + + const exists = response.hits.total.value > 0; + if (!exists) { + return getEntitiesByFuzzySearch({ + start, + end, + esClient, + index: indices.logs.concat(indices.traces), + entity, + }).then((suggestions) => { + return { + entity, + exists, + suggestions, + }; + }); + } + + return { entity, exists }; + }) + ); + + const alreadyInvestigatedEntities = entitiesToInvestigate.filter((entity) => { + return previouslyInvestigatedEntities.some((prevEntity) => isEqual(entity, prevEntity)); + }); + + const errors = [ + ...entityExistsResponses + .filter( + (entityExistsResult): entityExistsResult is EntityExistsResultDoesNotExist => + !entityExistsResult.exists + ) + .map(({ suggestions, entity }) => { + return `Entity ${formatEntity( + entity + )} does not exist. Did you mean one of ${suggestions.join(', ')}?`; + }), + ...alreadyInvestigatedEntities.map((entity) => { + return `Entity ${formatEntity(entity)} was already investigated before.`; + }), + ]; + + return errors; +} diff --git a/x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json b/x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json new file mode 100644 index 0000000000000..06ded9c70b4ee --- /dev/null +++ b/x-pack/packages/observability/observability_ai/observability_ai_server/tsconfig.json @@ -0,0 +1,29 @@ +{ + "extends": "../../../../../tsconfig.base.json", + "compilerOptions": { + "outDir": "target/types", + "types": [ + "jest", + "node", + "react" + ] + }, + "include": [ + "**/*.ts" + ], + "exclude": [ + "target/**/*" + ], + "kbn_references": [ + "@kbn/observability-utils-common", + "@kbn/alerting-plugin", + "@kbn/rule-registry-plugin", + "@kbn/inference-plugin", + "@kbn/logging", + "@kbn/calculate-auto", + "@kbn/observability-ai-assistant-plugin", + "@kbn/inference-common", + "@kbn/observability-ai-common", + "@kbn/observability-utils-server", + ] +} diff --git a/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts new file mode 100644 index 0000000000000..aedf89b2ab97d --- /dev/null +++ b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_entities_by_fuzzy_search.ts @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { castArray, orderBy } from 'lodash'; +import Fuse from 'fuse.js'; +import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client'; + +export async function getEntitiesByFuzzySearch({ + esClient, + entity, + start, + end, + index, +}: { + esClient: ObservabilityElasticsearchClient; + entity: Record; + start: number; + end: number; + index: string | string[]; +}): Promise { + if (Object.keys(entity).length > 1) { + return []; + } + + const [field, value] = Object.entries(entity)[0]; + + const { terms } = await esClient.client.termsEnum({ + index: castArray(index).join(','), + field, + index_filter: { + range: { + '@timestamp': { + gte: new Date(start).toISOString(), + lte: new Date(end).toISOString(), + }, + }, + }, + size: 10_000, + }); + + const results = new Fuse(terms, { includeScore: true, threshold: 0.75 }).search(value); + + return orderBy(results, (result) => result.score, 'asc') + .slice(0, 5) + .map((result) => result.item); +} diff --git a/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts new file mode 100644 index 0000000000000..b4071d665f383 --- /dev/null +++ b/x-pack/packages/observability/observability_utils/observability_utils_server/entities/get_log_patterns.ts @@ -0,0 +1,405 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + AggregationsCategorizeTextAggregation, + AggregationsDateHistogramAggregation, + AggregationsMaxAggregation, + AggregationsMinAggregation, + AggregationsTopHitsAggregation, + QueryDslQueryContainer, +} from '@elastic/elasticsearch/lib/api/types'; +import { categorizationAnalyzer } from '@kbn/aiops-log-pattern-analysis/categorization_analyzer'; +import { ChangePointType } from '@kbn/es-types/src'; +import { pValueToLabel } from '@kbn/observability-utils-common/ml/p_value_to_label'; +import { calculateAuto } from '@kbn/calculate-auto'; +import { omit, orderBy, uniqBy } from 'lodash'; +import moment from 'moment'; +import { ObservabilityElasticsearchClient } from '../es/client/create_observability_es_client'; +import { kqlQuery } from '../es/queries/kql_query'; +import { rangeQuery } from '../es/queries/range_query'; + +interface FieldPatternResultBase { + field: string; + count: number; + pattern: string; + regex: string; + sample: string; + firstOccurrence: string; + lastOccurrence: string; + highlight: Record; + metadata: Record; +} + +interface FieldPatternResultChanges { + timeseries: Array<{ x: number; y: number }>; + change: { + timestamp?: string; + significance: 'high' | 'medium' | 'low' | null; + type: ChangePointType; + change_point?: number; + p_value?: number; + }; +} + +export type FieldPatternResult = + FieldPatternResultBase & (TChanges extends true ? FieldPatternResultChanges : {}); + +export type FieldPatternResultWithChanges = FieldPatternResult; + +interface CategorizeTextOptions { + query: QueryDslQueryContainer; + metadata: string[]; + esClient: ObservabilityElasticsearchClient; + samplingProbability: number; + fields: string[]; + index: string | string[]; + useMlStandardTokenizer: boolean; + size: number; + start: number; + end: number; +} +// eslint-disable-next-line @typescript-eslint/consistent-type-definitions +type CategorizeTextSubAggregations = { + sample: { top_hits: AggregationsTopHitsAggregation }; + minTimestamp: { min: AggregationsMinAggregation }; + maxTimestamp: { max: AggregationsMaxAggregation }; +}; + +interface CategorizeTextAggregationResult { + categorize_text: AggregationsCategorizeTextAggregation; + aggs: CategorizeTextSubAggregations & + ( + | {} + | { + timeseries: { date_histogram: AggregationsDateHistogramAggregation }; + changes: { change_point: { buckets_path: string } }; + } + ); +} + +export async function runCategorizeTextAggregation< + TChanges extends boolean | undefined = undefined +>( + options: CategorizeTextOptions & { includeChanges?: TChanges } +): Promise>>; + +export async function runCategorizeTextAggregation({ + esClient, + fields, + metadata, + index, + query, + samplingProbability, + useMlStandardTokenizer, + includeChanges, + size, + start, + end, +}: CategorizeTextOptions & { includeChanges?: boolean }): Promise< + Array> +> { + const aggs = Object.fromEntries( + fields.map((field): [string, CategorizeTextAggregationResult] => [ + field, + { + categorize_text: { + field, + min_doc_count: 1, + size, + categorization_analyzer: useMlStandardTokenizer + ? { + tokenizer: 'ml_standard', + char_filter: [ + { + type: 'pattern_replace', + pattern: '\\\\n', + replacement: '', + } as unknown as string, + ], + } + : categorizationAnalyzer, + }, + aggs: { + minTimestamp: { + min: { + field: '@timestamp', + }, + }, + maxTimestamp: { + max: { + field: '@timestamp', + }, + }, + ...(includeChanges + ? { + timeseries: { + date_histogram: { + field: '@timestamp', + min_doc_count: 0, + extended_bounds: { + min: start, + max: end, + }, + fixed_interval: `${calculateAuto + .atLeast(30, moment.duration(end - start, 'ms'))! + .asMilliseconds()}ms`, + }, + }, + changes: { + change_point: { + buckets_path: 'timeseries>_count', + }, + }, + } + : {}), + sample: { + top_hits: { + size: 1, + _source: false, + fields: [field, ...metadata], + sort: { + _score: { + order: 'desc', + }, + }, + highlight: { + fields: { + '*': {}, + }, + }, + }, + }, + }, + }, + ]) + ); + + const response = await esClient.search('get_log_patterns', { + index, + size: 0, + track_total_hits: false, + query: { + bool: { + filter: [query, ...rangeQuery(start, end)], + }, + }, + aggregations: { + sampler: { + random_sampler: { + probability: samplingProbability, + }, + aggs, + }, + }, + }); + + if (!response.aggregations) { + return []; + } + + const fieldAggregates = omit(response.aggregations.sampler, 'seed', 'doc_count', 'probability'); + + return Object.entries(fieldAggregates).flatMap(([fieldName, aggregate]) => { + const buckets = aggregate.buckets; + + return buckets.map((bucket) => { + return { + field: fieldName, + count: bucket.doc_count, + pattern: bucket.key, + regex: bucket.regex, + sample: bucket.sample.hits.hits[0].fields![fieldName][0] as string, + highlight: bucket.sample.hits.hits[0].highlight ?? {}, + metadata: bucket.sample.hits.hits[0].fields!, + firstOccurrence: new Date(bucket.minTimestamp.value!).toISOString(), + lastOccurrence: new Date(bucket.maxTimestamp.value!).toISOString(), + ...('timeseries' in bucket + ? { + timeseries: bucket.timeseries.buckets.map((dateBucket) => ({ + x: dateBucket.key, + y: dateBucket.doc_count, + })), + change: Object.entries(bucket.changes.type).map( + ([changePointType, change]): FieldPatternResultChanges['change'] => { + return { + type: changePointType as ChangePointType, + significance: + change.p_value !== undefined ? pValueToLabel(change.p_value) : null, + change_point: change.change_point, + p_value: change.p_value, + timestamp: + change.change_point !== undefined + ? bucket.timeseries.buckets[change.change_point].key_as_string + : undefined, + }; + } + )[0], + } + : {}), + }; + }); + }); +} + +interface LogPatternOptions { + esClient: ObservabilityElasticsearchClient; + start: number; + end: number; + index: string | string[]; + kuery: string; + metadata?: string[]; + fields: string[]; +} + +export async function getLogPatterns( + options: LogPatternOptions & { includeChanges?: TChanges } +): Promise>>; + +export async function getLogPatterns({ + esClient, + start, + end, + index, + kuery, + includeChanges, + metadata = [], + fields, +}: LogPatternOptions & { includeChanges?: boolean }): Promise>> { + const fieldCapsResponse = await esClient.fieldCaps('get_field_caps_for_log_pattern_analysis', { + fields, + index_filter: { + bool: { + filter: [...rangeQuery(start, end)], + }, + }, + index, + types: ['text', 'match_only_text'], + }); + + const fieldsInFieldCaps = Object.keys(fieldCapsResponse.fields); + + if (!fieldsInFieldCaps.length) { + return []; + } + + const totalDocsResponse = await esClient.search('get_total_docs_for_log_pattern_analysis', { + index, + size: 0, + track_total_hits: true, + query: { + bool: { + filter: [...kqlQuery(kuery), ...rangeQuery(start, end)], + }, + }, + }); + + const totalHits = totalDocsResponse.hits.total.value; + + if (totalHits === 0) { + return []; + } + + let samplingProbability = 100_000 / totalHits; + + if (samplingProbability >= 0.5) { + samplingProbability = 1; + } + + const fieldGroups = includeChanges + ? fieldsInFieldCaps.map((field) => [field]) + : [fieldsInFieldCaps]; + + const allPatterns = await Promise.all( + fieldGroups.map(async (fieldGroup) => { + const topMessagePatterns = await runCategorizeTextAggregation({ + esClient, + index, + fields: fieldGroup, + query: { + bool: { + filter: kqlQuery(kuery), + }, + }, + samplingProbability, + useMlStandardTokenizer: false, + size: 100, + start, + end, + includeChanges, + metadata, + }); + + if (topMessagePatterns.length === 0) { + return []; + } + + const patternsToExclude = topMessagePatterns.filter((pattern) => { + // elasticsearch will barf because the query is too complex. this measures + // the # of groups to capture for a measure of complexity. + const complexity = pattern.regex.match(/(\.\+\?)|(\.\*\?)/g)?.length ?? 0; + return ( + complexity <= 25 && + // anything less than 50 messages should be re-processed with the ml_standard tokenizer + pattern.count > 50 + ); + }); + + const rareMessagePatterns = await runCategorizeTextAggregation({ + esClient, + index, + fields: fieldGroup, + start, + end, + query: { + bool: { + filter: kqlQuery(kuery), + must_not: [ + ...patternsToExclude.map((pattern) => { + return { + bool: { + filter: [ + { + regexp: { + [pattern.field]: { + value: pattern.regex, + }, + }, + }, + { + match: { + [pattern.field]: { + query: pattern.pattern, + fuzziness: 0, + operator: 'and' as const, + auto_generate_synonyms_phrase_query: false, + }, + }, + }, + ], + }, + }; + }), + ], + }, + }, + size: 1000, + includeChanges, + samplingProbability: 1, + useMlStandardTokenizer: true, + metadata, + }); + + return [...patternsToExclude, ...rareMessagePatterns]; + }) + ); + + return uniqBy( + orderBy(allPatterns.flat(), (pattern) => pattern.count, 'desc'), + (pattern) => pattern.sample + ); +} diff --git a/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json b/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json index f6dd781184b86..33d7e75322f00 100644 --- a/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json +++ b/x-pack/packages/observability/observability_utils/observability_utils_server/tsconfig.json @@ -24,6 +24,8 @@ "@kbn/alerting-plugin", "@kbn/rule-registry-plugin", "@kbn/rule-data-utils", + "@kbn/aiops-log-pattern-analysis", + "@kbn/calculate-auto", "@kbn/utility-types", "@kbn/task-manager-plugin", ] diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts index 4b5ef3a5cfda1..603192fb96db4 100644 --- a/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts +++ b/x-pack/platform/packages/shared/ai-infra/inference-common/index.ts @@ -13,6 +13,9 @@ export { type AssistantMessage, type ToolMessage, type UserMessage, + type MessageOf, + type AssistantMessageOf, + type ToolMessageOf, type ToolSchemaType, type FromToolSchema, type ToolSchema, diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts index 3daa898ab2e1a..cedc8297d75bc 100644 --- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts +++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/index.ts @@ -33,6 +33,9 @@ export { type AssistantMessage, type UserMessage, type ToolMessage, + type AssistantMessageOf, + type MessageOf, + type ToolMessageOf, } from './messages'; export { type ToolSchema, type ToolSchemaType, type FromToolSchema } from './tool_schema'; export { diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts index ca74b094e0a3b..43d03cf130c01 100644 --- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts +++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/messages.ts @@ -5,7 +5,7 @@ * 2.0. */ -import type { ToolCall } from './tools'; +import type { ToolCall, ToolCallsOf, ToolNamesOf, ToolOptions, ToolResponsesOf } from './tools'; /** * Enum for all possible {@link Message} roles. @@ -52,17 +52,32 @@ export type AssistantMessage = MessageBase & { /** * Represents a tool invocation result, following a request from the LLM to execute a tool. */ -export type ToolMessage | unknown> = - MessageBase & { - /** - * The call id matching the {@link ToolCall} this tool message is for. - */ - toolCallId: string; - /** - * The response from the tool invocation. - */ - response: TToolResponse; - }; +export type ToolMessage< + TName extends string = string, + TToolResponse extends Record | unknown = Record | unknown, + TToolData extends Record | undefined = Record | undefined +> = MessageBase & { + /* + * The name of the tool called. Used for refining the type of the response. + */ + name: TName; + /** + * The call id matching the {@link ToolCall} this tool message is for. + */ + toolCallId: string; + /** + * The response from the tool invocation. + */ + response: TToolResponse; +} & (TToolData extends undefined + ? {} + : { + /** + * Additional data from the tool invocation, that is not sent to the LLM + * but can be used to attach baggage (such as timeseries or debug data) + */ + data: TToolData; + }); /** * Mixin composed of all the possible types of messages in a chatComplete discussion. @@ -72,4 +87,30 @@ export type ToolMessage | unknown> = * - {@link AssistantMessage} * - {@link ToolMessage} */ -export type Message = UserMessage | AssistantMessage | ToolMessage; +export type Message = UserMessage | AssistantMessage | ToolMessage; + +/** + * Utility type to get the Assistant message type of a {@link ToolOptions} type. + */ +export type AssistantMessageOf = Omit< + AssistantMessage, + 'toolCalls' +> & + ToolCallsOf; + +/** + * Utility type to get the Tool message type of a {@link ToolOptions} type. + */ + +export type ToolMessageOf = ToolMessage< + ToolNamesOf, + ToolResponsesOf +>; + +/** + * Utility type to get the mixin Message type of a {@link ToolOptions} type. + */ +export type MessageOf = + | UserMessage + | AssistantMessageOf + | ToolMessageOf; diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts index 0c7d5c6755f31..5cd52d5f58883 100644 --- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts +++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/chat_complete/tools.ts @@ -8,24 +8,24 @@ import type { ValuesType } from 'utility-types'; import { FromToolSchema, ToolSchema } from './tool_schema'; -type Assert = TValue extends TType ? TValue & TType : never; - type ToolsOfChoice = TToolOptions['toolChoice'] extends { function: infer TToolName; } ? TToolName extends keyof TToolOptions['tools'] - ? Pick + ? TToolName extends string + ? Pick + : TToolOptions['tools'] : TToolOptions['tools'] : TToolOptions['tools']; /** * Utility type to infer the tool calls response shape. */ -type ToolResponsesOf | undefined> = +export type ToolResponsesOf | undefined> = TTools extends Record ? Array< ValuesType<{ - [TName in keyof TTools]: ToolResponseOf, TTools[TName]>; + [TName in keyof TTools & string]: ToolCall>; }> > : never[]; @@ -33,10 +33,11 @@ type ToolResponsesOf | undefined> /** * Utility type to infer the tool call response shape. */ -type ToolResponseOf = ToolCall< - TName, - TToolDefinition extends { schema: ToolSchema } ? FromToolSchema : {} ->; +export type ToolResponseOf = TToolDefinition extends { + schema: ToolSchema; +} + ? FromToolSchema + : {}; /** * Tool invocation choice type. @@ -129,6 +130,10 @@ export interface ToolCall< name: TName; } & (TArguments extends Record ? { arguments: TArguments } : {}); } +/** + * Utility type to get the tool names of ToolOptions + */ +export type ToolNamesOf = keyof TToolOptions['tools'] & string; /** * Tool-related parameters of {@link ChatCompleteAPI} diff --git a/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts b/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts index 3355042910a61..cd90394cd67d3 100644 --- a/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts +++ b/x-pack/platform/packages/shared/ai-infra/inference-common/src/output/api.ts @@ -96,6 +96,17 @@ export interface OutputOptions< * Defaults to false. */ stream?: TStream; + + /** + * Optional configuration for retrying the call if an error occurs. + */ + retry?: { + /** + * Whether to retry on validation errors. Can be a number or retries, + * or a boolean, which means one retry. + */ + onValidationError?: boolean | number; + }; } /** diff --git a/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json b/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json index 839d411a2efb9..2d7c67f17728d 100644 --- a/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json +++ b/x-pack/platform/packages/shared/ai-infra/product-doc-common/package.json @@ -3,4 +3,4 @@ "private": true, "version": "1.0.0", "license": "Elastic License 2.0" -} \ No newline at end of file +} diff --git a/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts new file mode 100644 index 0000000000000..3bdf8dea9ee60 --- /dev/null +++ b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/categorization_analyzer.ts @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { AggregationsCustomCategorizeTextAnalyzer } from '@elastic/elasticsearch/lib/api/types'; + +// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer. +// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however, +// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover. +// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use +// that instead of unconditionally using 'standard'. +// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches. +export const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = { + char_filter: ['first_line_with_letters'], + tokenizer: 'standard', + filter: [ + // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect + { + type: 'stop', + stopwords: [ + 'Monday', + 'Tuesday', + 'Wednesday', + 'Thursday', + 'Friday', + 'Saturday', + 'Sunday', + 'Mon', + 'Tue', + 'Wed', + 'Thu', + 'Fri', + 'Sat', + 'Sun', + 'January', + 'February', + 'March', + 'April', + 'May', + 'June', + 'July', + 'August', + 'September', + 'October', + 'November', + 'December', + 'Jan', + 'Feb', + 'Mar', + 'Apr', + 'May', + 'Jun', + 'Jul', + 'Aug', + 'Sep', + 'Oct', + 'Nov', + 'Dec', + 'GMT', + 'UTC', + ], + }, + // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect + { + type: 'limit', + max_token_count: '100', + }, + ], +}; diff --git a/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts index c1d6f82c9e582..4e8e2268fed5a 100644 --- a/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts +++ b/x-pack/platform/packages/shared/ml/aiops_log_pattern_analysis/create_category_request.ts @@ -5,16 +5,14 @@ * 2.0. */ -import type { - QueryDslQueryContainer, - AggregationsCustomCategorizeTextAnalyzer, -} from '@elastic/elasticsearch/lib/api/types'; +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; import type { MappingRuntimeFields } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; import { isPopulatedObject } from '@kbn/ml-is-populated-object/src/is_populated_object'; import type { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils'; import { createDefaultQuery } from '@kbn/aiops-common/create_default_query'; +import { categorizationAnalyzer } from './categorization_analyzer'; const CATEGORY_LIMIT = 1000; const EXAMPLE_LIMIT = 4; @@ -121,67 +119,3 @@ export function createCategoryRequest( }, }; } - -// This is a copy of the default categorization analyzer but using the 'standard' tokenizer rather than the 'ml_standard' tokenizer. -// The 'ml_standard' tokenizer splits tokens in a way that was observed to give better categories in testing many years ago, however, -// the downside of these better categories is then a potential failure to match the original documents when creating a filter for Discover. -// A future enhancement would be to check which analyzer is specified in the mappings for the source field and to use -// that instead of unconditionally using 'standard'. -// However for an initial fix, using the standard analyzer will be more likely to match the results from the majority of searches. -const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = { - char_filter: ['first_line_with_letters'], - tokenizer: 'standard', - filter: [ - // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect - { - type: 'stop', - stopwords: [ - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'Mon', - 'Tue', - 'Wed', - 'Thu', - 'Fri', - 'Sat', - 'Sun', - 'January', - 'February', - 'March', - 'April', - 'May', - 'June', - 'July', - 'August', - 'September', - 'October', - 'November', - 'December', - 'Jan', - 'Feb', - 'Mar', - 'Apr', - 'May', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Oct', - 'Nov', - 'Dec', - 'GMT', - 'UTC', - ], - }, - // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect - { - type: 'limit', - max_token_count: '100', - }, - ], -}; diff --git a/x-pack/platform/plugins/private/translations/translations/fr-FR.json b/x-pack/platform/plugins/private/translations/translations/fr-FR.json index 3a94d0a039966..3e1da4621dad2 100644 --- a/x-pack/platform/plugins/private/translations/translations/fr-FR.json +++ b/x-pack/platform/plugins/private/translations/translations/fr-FR.json @@ -25990,7 +25990,6 @@ "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "Ajouter un graphique d'observation", "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "Sélectionnez une source de données pour générer un graphique d'aperçu", "xpack.investigateApp.appTitle": "Investigations", - "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "Aidez-moi à résoudre la cause de cet échec", "xpack.investigateApp.defaultChart.error_equation.description": "Vérifiez l'équation.", "xpack.investigateApp.defaultChart.error_equation.title": "Une erreur s'est produite lors de l'affichage du graphique", "xpack.investigateApp.defaultChart.noData.title": "Aucune donnée graphique disponible", diff --git a/x-pack/platform/plugins/private/translations/translations/ja-JP.json b/x-pack/platform/plugins/private/translations/translations/ja-JP.json index 91ea24cbcd7e4..8a903f40c4dc0 100644 --- a/x-pack/platform/plugins/private/translations/translations/ja-JP.json +++ b/x-pack/platform/plugins/private/translations/translations/ja-JP.json @@ -25849,7 +25849,6 @@ "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "観測グラフを追加", "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "データソースを選択して、プレビューグラフを生成", "xpack.investigateApp.appTitle": "調査", - "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "このエラーの調査を支援", "xpack.investigateApp.defaultChart.error_equation.description": "式を確認してください。", "xpack.investigateApp.defaultChart.error_equation.title": "グラフの表示中にエラーが発生しました", "xpack.investigateApp.defaultChart.noData.title": "グラフデータがありません", diff --git a/x-pack/platform/plugins/private/translations/translations/zh-CN.json b/x-pack/platform/plugins/private/translations/translations/zh-CN.json index c7fbc414a3379..ed461f1a877ad 100644 --- a/x-pack/platform/plugins/private/translations/translations/zh-CN.json +++ b/x-pack/platform/plugins/private/translations/translations/zh-CN.json @@ -25412,7 +25412,6 @@ "xpack.investigateApp.addObservationUI.h2.addAnObservationChartLabel": "添加观察图表", "xpack.investigateApp.addObservationUI.p.selectADataSourceLabel": "选择数据源以生成预览图表", "xpack.investigateApp.appTitle": "调查", - "xpack.investigateApp.assistantHypothesis.observabilityAIAssistantContextualInsight.helpMeInvestigateThisLabel": "帮助我调查此故障", "xpack.investigateApp.defaultChart.error_equation.description": "检查方程。", "xpack.investigateApp.defaultChart.error_equation.title": "渲染图表时出错", "xpack.investigateApp.defaultChart.noData.title": "没有可用图表数据", diff --git a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts index b5d380fa9aac6..c65720aae2e48 100644 --- a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts +++ b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.test.ts @@ -12,6 +12,7 @@ import { ChatCompletionEventType, } from '@kbn/inference-common'; import { createOutputApi } from './create_output_api'; +import { createToolValidationError } from '../../server/chat_complete/errors'; describe('createOutputApi', () => { let chatComplete: jest.Mock; @@ -119,4 +120,80 @@ describe('createOutputApi', () => { }, ]); }); + + describe('when using retry', () => { + const unvalidatedFailedToolCall = { + function: { + name: 'myFunction', + arguments: JSON.stringify({ foo: 'bar' }), + }, + toolCallId: 'foo', + }; + + const validationError = createToolValidationError('Validation failed', { + toolCalls: [unvalidatedFailedToolCall], + }); + + it('retries once when onValidationError is a boolean', async () => { + chatComplete.mockRejectedValueOnce(validationError); + chatComplete.mockResolvedValueOnce( + Promise.resolve({ content: 'retried content', toolCalls: [unvalidatedFailedToolCall] }) + ); + + const output = createOutputApi(chatComplete); + + const response = await output({ + id: 'retry-id', + stream: false, + connectorId: '.retry-connector', + input: 'input message', + retry: { + onValidationError: true, + }, + }); + + expect(chatComplete).toHaveBeenCalledTimes(2); + expect(response).toEqual({ + id: 'retry-id', + content: 'retried content', + output: unvalidatedFailedToolCall.function.arguments, + }); + }); + + it('retries the number of specified attempts', async () => { + chatComplete.mockRejectedValue(validationError); + + const output = createOutputApi(chatComplete); + + await expect( + output({ + id: 'retry-id', + stream: false, + connectorId: '.retry-connector', + input: 'input message', + retry: { + onValidationError: 2, + }, + }) + ).rejects.toThrow('Validation failed'); + + expect(chatComplete).toHaveBeenCalledTimes(3); + }); + + it('throws an error if retry is provided in streaming mode', () => { + const output = createOutputApi(chatComplete); + + expect(() => + output({ + id: 'stream-retry-id', + stream: true, + connectorId: '.stream-retry-connector', + input: 'input message', + retry: { + onValidationError: 1, + }, + }) + ).toThrowError('Retry options are not supported in streaming mode'); + }); + }); }); diff --git a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts index d263f733bf4ee..3e65cb283dd45 100644 --- a/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts +++ b/x-pack/platform/plugins/shared/inference/common/output/create_output_api.ts @@ -10,17 +10,22 @@ import { ChatCompletionEventType, MessageRole, OutputAPI, + OutputCompositeResponse, OutputEventType, OutputOptions, ToolSchema, + isToolValidationError, withoutTokenCountEvents, } from '@kbn/inference-common'; import { isObservable, map } from 'rxjs'; import { ensureMultiTurn } from '../utils/ensure_multi_turn'; +type DefaultOutputOptions = OutputOptions; + export function createOutputApi(chatCompleteApi: ChatCompleteAPI): OutputAPI; + export function createOutputApi(chatCompleteApi: ChatCompleteAPI) { - return ({ + return function callOutputApi({ id, connectorId, input, @@ -29,19 +34,26 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) { previousMessages, functionCalling, stream, - }: OutputOptions) => { + retry, + }: DefaultOutputOptions): OutputCompositeResponse { + if (stream && retry !== undefined) { + throw new Error(`Retry options are not supported in streaming mode`); + } + + const messages = ensureMultiTurn([ + ...(previousMessages || []), + { + role: MessageRole.User, + content: input, + }, + ]); + const response = chatCompleteApi({ connectorId, stream, functionCalling, system, - messages: ensureMultiTurn([ - ...(previousMessages || []), - { - role: MessageRole.User, - content: input, - }, - ]), + messages, ...(schema ? { tools: { @@ -79,16 +91,55 @@ export function createOutputApi(chatCompleteApi: ChatCompleteAPI) { }) ); } else { - return response.then((chatResponse) => { - return { - id, - content: chatResponse.content, - output: - chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function - ? chatResponse.toolCalls[0].function.arguments - : undefined, - }; - }); + return response.then( + (chatResponse) => { + return { + id, + content: chatResponse.content, + output: + chatResponse.toolCalls.length && 'arguments' in chatResponse.toolCalls[0].function + ? chatResponse.toolCalls[0].function.arguments + : undefined, + }; + }, + (error: Error) => { + if (isToolValidationError(error) && retry?.onValidationError) { + const retriesLeft = + typeof retry.onValidationError === 'number' ? retry.onValidationError : 1; + + return callOutputApi({ + id, + connectorId, + input, + schema, + system, + previousMessages: messages.concat( + { + role: MessageRole.Assistant as const, + content: '', + toolCalls: error.meta.toolCalls!, + }, + ...(error.meta.toolCalls?.map((toolCall) => { + return { + name: toolCall.function.name, + role: MessageRole.Tool as const, + toolCallId: toolCall.toolCallId, + response: { + error: error.meta, + }, + }; + }) ?? []) + ), + functionCalling, + stream: false, + retry: { + onValidationError: retriesLeft - 1, + }, + }) as OutputCompositeResponse; + } + throw error; + } + ); } }; } diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts index ca6f60dd45a55..565727b7f57fe 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/bedrock/bedrock_claude_adapter.test.ts @@ -170,6 +170,7 @@ describe('bedrockClaudeAdapter', () => { ], }, { + name: 'my_function', role: MessageRole.Tool, toolCallId: '0', response: { diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts index c3410b2af3623..95a46f73d5d1f 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/gemini/gemini_adapter.test.ts @@ -172,6 +172,7 @@ describe('geminiAdapter', () => { ], }, { + name: 'my_function', role: MessageRole.Tool, toolCallId: '0', response: { diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts index 2d0154313b632..48544f1bb0fb1 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.test.ts @@ -142,6 +142,7 @@ describe('openAIAdapter', () => { ], }, { + name: 'my_function', role: MessageRole.Tool, toolCallId: '0', response: { diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts index fa412f335800d..49b6bb5142023 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/adapters/openai/openai_adapter.ts @@ -58,7 +58,6 @@ export const openAIAdapter: InferenceConnectorAdapter = { request = { stream, messages: messagesToOpenAI({ system: wrapped.system, messages: wrapped.messages }), - temperature: 0, }; } else { request = { @@ -66,7 +65,6 @@ export const openAIAdapter: InferenceConnectorAdapter = { messages: messagesToOpenAI({ system, messages }), tool_choice: toolChoiceToOpenAI(toolChoice), tools: toolsToOpenAI(tools), - temperature: 0, }; } diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts index 13b1c8d87270c..e58c94759e165 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/api.ts @@ -5,7 +5,7 @@ * 2.0. */ -import { last } from 'lodash'; +import { last, omit } from 'lodash'; import { defer, switchMap, throwError } from 'rxjs'; import type { Logger } from '@kbn/logging'; import type { KibanaRequest } from '@kbn/core-http-server'; @@ -51,14 +51,26 @@ export function createChatCompleteApi({ request, actions, logger }: CreateChatCo const connectorType = connector.type; const inferenceAdapter = getInferenceAdapter(connectorType); + const messagesWithoutData = messages.map((message) => omit(message, 'data')); + if (!inferenceAdapter) { return throwError(() => createInferenceRequestError(`Adapter for type ${connectorType} not implemented`, 400) ); } - logger.debug(() => `Sending request: ${JSON.stringify(last(messages))}`); - logger.trace(() => JSON.stringify({ messages, toolChoice, tools, system })); + logger.debug( + () => `Sending request, last message is: ${JSON.stringify(last(messagesWithoutData))}` + ); + + logger.trace(() => + JSON.stringify({ + messages: messagesWithoutData, + toolChoice, + tools, + system, + }) + ); return inferenceAdapter.chatComplete({ system, diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts index a830f57fec559..00ef2d90ca1f7 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/errors.ts @@ -44,7 +44,7 @@ export function createToolValidationError( name?: string; arguments?: string; errorsText?: string; - toolCalls?: UnvalidatedToolCall[]; + toolCalls: UnvalidatedToolCall[]; } ): ChatCompletionToolValidationError { return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta); diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts index 4eb6cfd8d50e1..d2cb0bfae4999 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/simulated_function_calling/wrap_with_simulated_function_calling.ts @@ -79,7 +79,7 @@ export function wrapWithSimulatedFunctionCalling({ }; } -const convertToolResponseMessage = (message: ToolMessage): UserMessage => { +const convertToolResponseMessage = (message: ToolMessage): UserMessage => { return { role: MessageRole.User, content: JSON.stringify({ diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts index c6e5b032120a3..f7c1115305f57 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.test.ts @@ -183,7 +183,7 @@ describe('chunksIntoMessage', () => { } await expect(async () => getMessage()).rejects.toThrowErrorMatchingInlineSnapshot( - `"Tool call arguments for myFunction were invalid"` + `"Tool call arguments for myFunction (001) were invalid"` ); }); diff --git a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts index fe9b745f442fc..77c4164348cc6 100644 --- a/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts +++ b/x-pack/platform/plugins/shared/inference/server/chat_complete/utils/chunks_into_message.ts @@ -5,17 +5,17 @@ * 2.0. */ -import { last, map, merge, OperatorFunction, scan, share } from 'rxjs'; -import type { Logger } from '@kbn/logging'; import { - UnvalidatedToolCall, - ToolOptions, ChatCompletionChunkEvent, ChatCompletionEventType, ChatCompletionMessageEvent, ChatCompletionTokenCountEvent, + ToolOptions, + UnvalidatedToolCall, withoutTokenCountEvents, } from '@kbn/inference-common'; +import type { Logger } from '@kbn/logging'; +import { OperatorFunction, map, merge, share, toArray } from 'rxjs'; import { validateToolCalls } from '../../util/validate_tool_calls'; export function chunksIntoMessage({ @@ -37,38 +37,36 @@ export function chunksIntoMessage({ shared$, shared$.pipe( withoutTokenCountEvents(), - scan( - (prev, chunk) => { - prev.content += chunk.content ?? ''; + toArray(), + map((chunks): ChatCompletionMessageEvent => { + const concatenatedChunk = chunks.reduce( + (prev, chunk) => { + prev.content += chunk.content ?? ''; + + chunk.tool_calls?.forEach((toolCall) => { + let prevToolCall = prev.tool_calls[toolCall.index]; + if (!prevToolCall) { + prev.tool_calls[toolCall.index] = { + function: { + name: '', + arguments: '', + }, + toolCallId: '', + }; - chunk.tool_calls?.forEach((toolCall) => { - let prevToolCall = prev.tool_calls[toolCall.index]; - if (!prevToolCall) { - prev.tool_calls[toolCall.index] = { - function: { - name: '', - arguments: '', - }, - toolCallId: '', - }; + prevToolCall = prev.tool_calls[toolCall.index]; + } - prevToolCall = prev.tool_calls[toolCall.index]; - } + prevToolCall.function.name += toolCall.function.name; + prevToolCall.function.arguments += toolCall.function.arguments; + prevToolCall.toolCallId += toolCall.toolCallId; + }); - prevToolCall.function.name += toolCall.function.name; - prevToolCall.function.arguments += toolCall.function.arguments; - prevToolCall.toolCallId += toolCall.toolCallId; - }); + return prev; + }, + { content: '', tool_calls: [] as UnvalidatedToolCall[] } + ); - return prev; - }, - { - content: '', - tool_calls: [] as UnvalidatedToolCall[], - } - ), - last(), - map((concatenatedChunk): ChatCompletionMessageEvent => { logger.debug(() => `Received completed message: ${JSON.stringify(concatenatedChunk)}`); const validatedToolCalls = validateToolCalls({ diff --git a/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts b/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts index 8b4cc49dfaa46..84e3dd57cded1 100644 --- a/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts +++ b/x-pack/platform/plugins/shared/inference/server/routes/chat_complete.ts @@ -76,9 +76,11 @@ const chatCompleteBodySchema: Type = schema.object({ name: schema.maybe(schema.string()), }), schema.object({ + name: schema.string(), role: schema.literal(MessageRole.Tool), toolCallId: schema.string(), response: schema.recordOf(schema.string(), schema.any()), + data: schema.maybe(schema.recordOf(schema.string(), schema.any())), }), ]) ), diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts index 3d8701eba72db..5c2612aa0a4d4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/actions/generate_esql.ts @@ -34,6 +34,7 @@ export const generateEsqlTask = ({ docBase, functionCalling, logger, + system, }: { connectorId: string; systemMessage: string; @@ -43,6 +44,7 @@ export const generateEsqlTask = ({ docBase: EsqlDocumentBase; functionCalling?: FunctionCallingMode; logger: Pick; + system?: string; }) => { return function askLlmToRespond({ documentationRequest: { commands, functions }, @@ -97,7 +99,7 @@ export const generateEsqlTask = ({ When converting queries from one language to ES|QL, make sure that the functions are available and documented in ES|QL. E.g., for SPL's LEN, use LENGTH. For IF, use CASE. - `, + ${system ? `## Additional instructions\n\n${system}` : ''}`, messages: [ ...messages, { @@ -106,6 +108,7 @@ export const generateEsqlTask = ({ toolCalls: [fakeRequestDocsToolCall], }, { + name: fakeRequestDocsToolCall.function.name, role: MessageRole.Tool, response: { documentation: requestedDocumentation, diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts index 56c48b73f4994..801d80a30174e 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/task.ts @@ -21,6 +21,7 @@ export function naturalLanguageToEsql({ toolChoice, logger, functionCalling, + system, ...rest }: NlToEsqlTaskParams): Observable> { return from(loadDocBase()).pipe( @@ -41,6 +42,7 @@ export function naturalLanguageToEsql({ tools, toolChoice, }, + system, }); return requestDocumentation({ diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts index db3ac3b493481..5a1477524dbd4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/types.ts @@ -29,5 +29,6 @@ export type NlToEsqlTaskParams = { connectorId: string; logger: Pick; functionCalling?: FunctionCallingMode; + system?: string; } & TToolOptions & ({ input: string } | { messages: Message[] }); diff --git a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts index 57b030771c6c0..8691a50cda207 100644 --- a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts +++ b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.test.ts @@ -108,7 +108,7 @@ describe('validateToolCalls', () => { }); } expect(() => validate()).toThrowErrorMatchingInlineSnapshot( - `"Tool call arguments for my_function were invalid"` + `"Tool call arguments for my_function (1) were invalid"` ); try { @@ -119,6 +119,15 @@ describe('validateToolCalls', () => { arguments: JSON.stringify({ foo: 'bar' }), errorsText: `data must have required property 'bar'`, name: 'my_function', + toolCalls: [ + { + function: { + arguments: JSON.stringify({ foo: 'bar' }), + name: 'my_function', + }, + toolCallId: '1', + }, + ], }); } else { fail('Expected toolValidationError'); diff --git a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts index ffc2482774b23..3698a93206c07 100644 --- a/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts +++ b/x-pack/platform/plugins/shared/inference/server/util/validate_tool_calls.ts @@ -54,11 +54,12 @@ export function validateToolCalls({ if (!valid) { throw createToolValidationError( - `Tool call arguments for ${toolCall.function.name} were invalid`, + `Tool call arguments for ${toolCall.function.name} (${toolCall.toolCallId}) were invalid`, { name: toolCall.function.name, errorsText: validator.errorsText(), arguments: toolCall.function.arguments, + toolCalls, } ); } diff --git a/x-pack/plugins/observability_solution/inventory/tsconfig.json b/x-pack/plugins/observability_solution/inventory/tsconfig.json index 561ca62eaf97e..54b6336ab3be2 100644 --- a/x-pack/plugins/observability_solution/inventory/tsconfig.json +++ b/x-pack/plugins/observability_solution/inventory/tsconfig.json @@ -51,6 +51,9 @@ "@kbn/rule-data-utils", "@kbn/spaces-plugin", "@kbn/cloud-plugin", + "@kbn/observability-utils-browser", + "@kbn/observability-utils-server", + "@kbn/observability-utils-common", "@kbn/storybook", "@kbn/dashboard-plugin", "@kbn/deeplinks-analytics", diff --git a/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx b/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx index 42ccc20d52a71..d0e9e936dd598 100644 --- a/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx +++ b/x-pack/plugins/observability_solution/investigate_app/.storybook/get_mock_investigate_app_services.tsx @@ -17,6 +17,7 @@ import { SearchBar, IUnifiedSearchPluginServices } from '@kbn/unified-search-plu import { KibanaContextProvider } from '@kbn/kibana-react-plugin/public'; import { merge } from 'lodash'; import { Storage } from '@kbn/kibana-utils-plugin/public'; +import { of } from 'rxjs'; import type { EsqlQueryMeta } from '../public/services/esql'; import type { InvestigateAppServices } from '../public/services/types'; import { InvestigateAppKibanaContext } from '../public/hooks/use_kibana'; @@ -54,6 +55,10 @@ export function getMockInvestigateAppContext(): DeeplyMockedKeys Promise.resolve()), + stream: jest.fn().mockImplementation(() => of()) as any, + }, }; const core = coreMock.createStart(); diff --git a/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc b/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc index e105cacf75d05..eb69c6f9ea534 100644 --- a/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc +++ b/x-pack/plugins/observability_solution/investigate_app/kibana.jsonc @@ -2,8 +2,8 @@ "type": "plugin", "id": "@kbn/investigate-app-plugin", "owner": "@elastic/obs-ux-management-team", - "group": "observability", "visibility": "private", + "group": "observability", "plugin": { "id": "investigateApp", "server": true, @@ -24,14 +24,22 @@ "observability", "licensing", "ruleRegistry", + "inference", + "alerting", + "spaces", + "slo", + "apmDataAccess", "usageCollection" ], + "optionalPlugins": [ + "observabilityAIAssistant", + "observabilityAIAssistantApp" + ], "requiredBundles": [ "esql", "kibanaReact", "kibanaUtils" ], - "optionalPlugins": ["observabilityAIAssistant"], "extraPublicDirs": [] } } diff --git a/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts b/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts index af02f4a15e740..21f3251bd0d34 100644 --- a/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts +++ b/x-pack/plugins/observability_solution/investigate_app/public/api/index.ts @@ -11,7 +11,7 @@ import type { ReturnOf, RouteRepositoryClient, } from '@kbn/server-route-repository'; -import { formatRequest } from '@kbn/server-route-repository-utils/src/format_request'; +import { createRepositoryClient } from '@kbn/server-route-repository-client'; import type { InvestigateAppServerRouteRepository } from '../../server'; type FetchOptions = Omit & { @@ -25,15 +25,15 @@ export type InvestigateAppAPIClientOptions = Omit< signal: AbortSignal | null; }; -export type InvestigateAppAPIClient = RouteRepositoryClient< +export type InvestigateAppRepositoryClient = RouteRepositoryClient< InvestigateAppServerRouteRepository, InvestigateAppAPIClientOptions ->['fetch']; +>; -export type AutoAbortedInvestigateAppAPIClient = RouteRepositoryClient< +export type AutoAbortedInvestigateAppRepositoryClient = RouteRepositoryClient< InvestigateAppServerRouteRepository, Omit ->['fetch']; +>; export type InvestigateAppAPIEndpoint = keyof InvestigateAppServerRouteRepository; @@ -45,19 +45,6 @@ export type APIReturnType = ReturnO export type InvestigateAppAPIClientRequestParamsOf = ClientRequestParamsOf; -export function createCallInvestigateAppAPI(core: CoreStart | CoreSetup) { - return ((endpoint, options) => { - const { params } = options as unknown as { - params?: Partial>; - }; - - const { method, pathname, version } = formatRequest(endpoint, params?.path); - - return core.http[method](pathname, { - ...options, - body: params && params.body ? JSON.stringify(params.body) : undefined, - query: params?.query, - version, - }); - }) as InvestigateAppAPIClient; +export function createInvestigateAppRepositoryClient(core: CoreStart | CoreSetup) { + return createRepositoryClient(core) as InvestigateAppRepositoryClient; } diff --git a/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx b/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx index f63cbb9c01618..cf993e53790cb 100644 --- a/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx +++ b/x-pack/plugins/observability_solution/investigate_app/public/pages/details/components/assistant_hypothesis/assistant_hypothesis.tsx @@ -4,19 +4,22 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -import dedent from 'dedent'; -import { - ALERT_RULE_PARAMETERS, - ALERT_START, - ALERT_RULE_CATEGORY, - ALERT_REASON, -} from '@kbn/rule-data-utils'; import { i18n } from '@kbn/i18n'; -import { EntityWithSource } from '@kbn/investigation-shared'; -import React, { useCallback } from 'react'; +import type { RootCauseAnalysisEvent } from '@kbn/observability-ai-server/root_cause_analysis'; +import { EcsFieldsResponse } from '@kbn/rule-registry-plugin/common'; +import React, { useState, useRef, useEffect } from 'react'; +import { omit } from 'lodash'; +import { + ALERT_FLAPPING_HISTORY, + ALERT_RULE_EXECUTION_TIMESTAMP, + ALERT_RULE_EXECUTION_UUID, + EVENT_ACTION, + EVENT_KIND, +} from '@kbn/rule-registry-plugin/common/technical_rule_data_field_names'; +import { isRequestAbortedError } from '@kbn/server-route-repository-client'; import { useKibana } from '../../../../hooks/use_kibana'; import { useInvestigation } from '../../contexts/investigation_context'; -import { useFetchEntities } from '../../../../hooks/use_fetch_entities'; +import { useUpdateInvestigation } from '../../../../hooks/use_update_investigation'; export interface InvestigationContextualInsight { key: string; @@ -25,98 +28,177 @@ export interface InvestigationContextualInsight { } export function AssistantHypothesis({ investigationId }: { investigationId: string }) { - const { alert } = useInvestigation(); const { + alert, + globalParams: { timeRange }, + investigation, + } = useInvestigation(); + const { + core: { notifications }, + services: { investigateAppRepositoryClient }, dependencies: { start: { - observabilityAIAssistant: { - ObservabilityAIAssistantContextualInsight, - getContextualInsightMessages, - }, + observabilityAIAssistant: { useGenAIConnectors }, + observabilityAIAssistantApp: { RootCauseAnalysisContainer }, }, }, } = useKibana(); - const { data: entitiesData } = useFetchEntities({ - investigationId, - serviceName: alert?.['service.name'] ? `${alert?.['service.name']}` : undefined, - serviceEnvironment: alert?.['service.environment'] - ? `${alert?.['service.environment']}` - : undefined, - hostName: alert?.['host.name'] ? `${alert?.['host.name']}` : undefined, - containerId: alert?.['container.id'] ? `${alert?.['container.id']}` : undefined, - }); - - const getAlertContextMessages = useCallback(async () => { - if (!getContextualInsightMessages || !alert) { - return []; + + const { mutateAsync: updateInvestigation } = useUpdateInvestigation(); + + const { loading: loadingConnector, selectedConnector } = useGenAIConnectors(); + + const serviceName = alert?.['service.name'] as string | undefined; + + const [events, setEvents] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(undefined); + + const controllerRef = useRef(new AbortController()); + + useEffect(() => { + if (investigation?.rootCauseAnalysis) { + setEvents(investigation.rootCauseAnalysis.events); } + }, [investigation?.rootCauseAnalysis]); + + const [completeInBackground, setCompleteInBackground] = useState(true); + + const runRootCauseAnalysis = ({ + alert: nonNullishAlert, + connectorId, + serviceName: nonNullishServiceName, + }: { + alert: EcsFieldsResponse; + connectorId: string; + serviceName: string; + }) => { + const rangeFrom = timeRange.from; + + const rangeTo = timeRange.to; - const entities = entitiesData?.entities ?? []; - - const entityContext = entities?.length - ? ` - Alerts can optionally be associated with entities. Entities can be services, hosts, containers, or other resources. Entities can have metrics associated with them. - - The alert that triggered this investigation is associated with the following entities: ${entities - .map((entity, index) => { - return dedent(` - ## Entity ${index + 1}: - ${formatEntityMetrics(entity)}; - `); - }) - .join('/n/n')}` - : ''; - - return getContextualInsightMessages({ - message: `I am investigating a failure in my system. I was made aware of the failure by an alert and I am trying to understand the root cause of the issue.`, - instructions: dedent( - `I'm an SRE. I am investigating a failure in my system. I was made aware of the failure via an alert. Your current task is to help me identify the root cause of the failure in my system. - - The rule that triggered the alert is a ${ - alert[ALERT_RULE_CATEGORY] - } rule. The alert started at ${alert[ALERT_START]}. The alert reason is ${ - alert[ALERT_REASON] - }. The rule parameters are ${JSON.stringify(ALERT_RULE_PARAMETERS)}. - - ${entityContext} - - Based on the alert details, suggest a root cause and next steps to mitigate the issue. - - I do not have the alert details or entity details in front of me, so be sure to repeat the alert reason (${ - alert[ALERT_REASON] - }), when the alert was triggered (${ - alert[ALERT_START] - }), and the entity metrics in your response. - - When displaying the entity metrics, please convert the metrics to a human-readable format. For example, convert "logRate" to "Log Rate" and "errorRate" to "Error Rate". - ` - ), - }); - }, [alert, getContextualInsightMessages, entitiesData?.entities]); - - if (!ObservabilityAIAssistantContextualInsight) { + setLoading(true); + + setError(undefined); + + setEvents([]); + + investigateAppRepositoryClient + .stream('POST /internal/observability/investigation/root_cause_analysis', { + params: { + body: { + investigationId, + connectorId, + context: `The user is investigating an alert for the ${serviceName} service, + and wants to find the root cause. Here is the alert: + + ${JSON.stringify(sanitizeAlert(nonNullishAlert))}`, + rangeFrom, + rangeTo, + serviceName: nonNullishServiceName, + completeInBackground, + }, + }, + signal: controllerRef.current.signal, + }) + .subscribe({ + next: (event) => { + setEvents((prev) => { + return prev.concat(event.event); + }); + }, + error: (nextError) => { + if (!isRequestAbortedError(nextError)) { + notifications.toasts.addError(nextError, { + title: i18n.translate( + 'xpack.investigateApp.assistantHypothesis.failedToLoadAnalysis', + { + defaultMessage: `Failed to load analysis`, + } + ), + }); + setError(nextError); + } else { + setError( + new Error( + i18n.translate('xpack.investigateApp.assistantHypothesis.analysisAborted', { + defaultMessage: `Analysis was aborted`, + }) + ) + ); + } + + setLoading(false); + }, + complete: () => { + setLoading(false); + }, + }); + }; + + if (!serviceName) { return null; } - return alert && entitiesData ? ( - { + setCompleteInBackground(() => !completeInBackground); + }} + onStopAnalysisClick={() => { + controllerRef.current.abort(); + controllerRef.current = new AbortController(); + }} + onClearAnalysisClick={() => { + setEvents([]); + if (investigation?.rootCauseAnalysis) { + updateInvestigation({ + investigationId, + payload: { + rootCauseAnalysis: { + events: [], + }, + }, + }); + } + }} + onResetAnalysisClick={() => { + controllerRef.current.abort(); + controllerRef.current = new AbortController(); + if (alert && selectedConnector && serviceName) { + runRootCauseAnalysis({ + alert, + connectorId: selectedConnector, + serviceName, + }); + } + }} + error={error} + onStartAnalysisClick={() => { + if (alert && selectedConnector && serviceName) { + runRootCauseAnalysis({ + alert, + connectorId: selectedConnector, + serviceName, + }); + } + }} /> - ) : null; + ); +} + +function sanitizeAlert(alert: EcsFieldsResponse) { + return omit( + alert, + ALERT_RULE_EXECUTION_TIMESTAMP, + '_index', + ALERT_FLAPPING_HISTORY, + EVENT_ACTION, + EVENT_KIND, + ALERT_RULE_EXECUTION_UUID, + '@timestamp' + ); } -const formatEntityMetrics = (entity: EntityWithSource): string => { - const entityMetrics = Object.entries(entity.metrics) - .map(([key, value]) => `${key}: ${value}`) - .join(', '); - const entitySources = entity.sources.map((source) => source.dataStream).join(', '); - return dedent(` - Entity name: ${entity.display_name}; - Entity type: ${entity.type}; - Entity metrics: ${entityMetrics}; - Entity data streams: ${entitySources} - `); -}; diff --git a/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx b/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx index 5ec88f9d72468..92c94cef94521 100644 --- a/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx +++ b/x-pack/plugins/observability_solution/investigate_app/public/plugin.tsx @@ -27,6 +27,7 @@ import type { InvestigateAppSetupDependencies, InvestigateAppStartDependencies, } from './types'; +import { createInvestigateAppRepositoryClient, InvestigateAppRepositoryClient } from './api'; const getCreateEsqlService = once(() => import('./services/esql').then((m) => m.createEsqlService)); @@ -41,6 +42,7 @@ export class InvestigateAppPlugin { logger: Logger; config: ConfigSchema; + repositoryClient!: InvestigateAppRepositoryClient; constructor(context: PluginInitializerContext) { this.logger = context.logger.get(); @@ -51,6 +53,8 @@ export class InvestigateAppPlugin coreSetup: CoreSetup, pluginsSetup: InvestigateAppSetupDependencies ): InvestigateAppPublicSetup { + this.repositoryClient = createInvestigateAppRepositoryClient(coreSetup); + coreSetup.application.register({ id: INVESTIGATE_APP_ID, title: i18n.translate('xpack.investigateApp.appTitle', { @@ -93,6 +97,7 @@ export class InvestigateAppPlugin lens: pluginsStart.lens, }), charts: pluginsStart.charts, + investigateAppRepositoryClient: this.repositoryClient, }; ReactDOM.render( @@ -127,6 +132,7 @@ export class InvestigateAppPlugin start: pluginsStart, }, services: { + investigateAppRepositoryClient: this.repositoryClient, esql: createEsqlService({ data: pluginsStart.data, dataViews: pluginsStart.dataViews, diff --git a/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts b/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts index 257ed5a7aeaca..04a2a7d063e9f 100644 --- a/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts +++ b/x-pack/plugins/observability_solution/investigate_app/public/services/types.ts @@ -7,8 +7,10 @@ import { ChartsPluginStart } from '@kbn/charts-plugin/public'; import type { EsqlService } from './esql'; +import type { InvestigateAppRepositoryClient } from '../api'; export interface InvestigateAppServices { esql: EsqlService; charts: ChartsPluginStart; + investigateAppRepositoryClient: InvestigateAppRepositoryClient; } diff --git a/x-pack/plugins/observability_solution/investigate_app/public/types.ts b/x-pack/plugins/observability_solution/investigate_app/public/types.ts index a2d7b5227a201..da81bcdbc0c92 100644 --- a/x-pack/plugins/observability_solution/investigate_app/public/types.ts +++ b/x-pack/plugins/observability_solution/investigate_app/public/types.ts @@ -8,6 +8,10 @@ import type { ObservabilityAIAssistantPublicSetup, ObservabilityAIAssistantPublicStart, } from '@kbn/observability-ai-assistant-plugin/public'; +import type { + ObservabilityAIAssistantAppPublicSetup, + ObservabilityAIAssistantAppPublicStart, +} from '@kbn/observability-ai-assistant-app-plugin/public'; import { ChartsPluginStart } from '@kbn/charts-plugin/public'; import type { ContentManagementPublicStart } from '@kbn/content-management-plugin/public'; import type { DataPublicPluginSetup, DataPublicPluginStart } from '@kbn/data-plugin/public'; @@ -43,6 +47,7 @@ export interface InvestigateAppSetupDependencies { investigate: InvestigatePublicSetup; observabilityShared: ObservabilitySharedPluginSetup; observabilityAIAssistant: ObservabilityAIAssistantPublicSetup; + observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicSetup; lens: LensPublicSetup; dataViews: DataViewsPublicPluginSetup; data: DataPublicPluginSetup; @@ -58,6 +63,7 @@ export interface InvestigateAppStartDependencies { investigate: InvestigatePublicStart; observabilityShared: ObservabilitySharedPluginStart; observabilityAIAssistant: ObservabilityAIAssistantPublicStart; + observabilityAIAssistantApp: ObservabilityAIAssistantAppPublicStart; lens: LensPublicStart; dataViews: DataViewsPublicPluginStart; data: DataPublicPluginStart; diff --git a/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts b/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts index 494e13efcba95..1728e6b69b7d3 100644 --- a/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts +++ b/x-pack/plugins/observability_solution/investigate_app/server/routes/get_global_investigate_app_server_route_repository.ts @@ -15,18 +15,19 @@ import { findInvestigationsParamsSchema, getAllInvestigationStatsParamsSchema, getAllInvestigationTagsParamsSchema, + getEntitiesParamsSchema, + GetEntitiesResponse, + getEventsParamsSchema, + GetEventsResponse, getInvestigationItemsParamsSchema, getInvestigationNotesParamsSchema, getInvestigationParamsSchema, updateInvestigationItemParamsSchema, updateInvestigationNoteParamsSchema, updateInvestigationParamsSchema, - getEventsParamsSchema, - GetEventsResponse, - getEntitiesParamsSchema, - GetEntitiesResponse, } from '@kbn/investigation-shared'; import { ScopedAnnotationsClient } from '@kbn/observability-plugin/server'; +import { createEntitiesESClient } from '../clients/create_entities_es_client'; import { createInvestigation } from '../services/create_investigation'; import { createInvestigationItem } from '../services/create_investigation_item'; import { createInvestigationNote } from '../services/create_investigation_note'; @@ -34,20 +35,20 @@ import { deleteInvestigation } from '../services/delete_investigation'; import { deleteInvestigationItem } from '../services/delete_investigation_item'; import { deleteInvestigationNote } from '../services/delete_investigation_note'; import { findInvestigations } from '../services/find_investigations'; +import { AlertsClient, getAlertsClient } from '../services/get_alerts_client'; +import { getAllInvestigationStats } from '../services/get_all_investigation_stats'; import { getAllInvestigationTags } from '../services/get_all_investigation_tags'; +import { getEntitiesWithSource } from '../services/get_entities'; +import { getAlertEvents, getAnnotationEvents } from '../services/get_events'; import { getInvestigation } from '../services/get_investigation'; import { getInvestigationItems } from '../services/get_investigation_items'; import { getInvestigationNotes } from '../services/get_investigation_notes'; import { investigationRepositoryFactory } from '../services/investigation_repository'; import { updateInvestigation } from '../services/update_investigation'; -import { getAlertEvents, getAnnotationEvents } from '../services/get_events'; -import { AlertsClient, getAlertsClient } from '../services/get_alerts_client'; import { updateInvestigationItem } from '../services/update_investigation_item'; import { updateInvestigationNote } from '../services/update_investigation_note'; import { createInvestigateAppServerRoute } from './create_investigate_app_server_route'; -import { getAllInvestigationStats } from '../services/get_all_investigation_stats'; -import { getEntitiesWithSource } from '../services/get_entities'; -import { createEntitiesESClient } from '../clients/create_entities_es_client'; +import { rootCauseAnalysisRoute } from './rca/route'; const createInvestigationRoute = createInvestigateAppServerRoute({ endpoint: 'POST /api/observability/investigations 2023-10-31', @@ -400,6 +401,7 @@ export function getGlobalInvestigateAppServerRouteRepository() { ...getEntitiesRoute, ...getAllInvestigationStatsRoute, ...getAllInvestigationTagsRoute, + ...rootCauseAnalysisRoute, }; } diff --git a/x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts b/x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts new file mode 100644 index 0000000000000..c4b71d7faa4b9 --- /dev/null +++ b/x-pack/plugins/observability_solution/investigate_app/server/routes/rca/route.ts @@ -0,0 +1,163 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Observable, catchError, from, of, share, switchMap, toArray } from 'rxjs'; +import { ServerSentEventBase } from '@kbn/sse-utils'; +import { + RootCauseAnalysisEvent, + runRootCauseAnalysis, +} from '@kbn/observability-ai-server/root_cause_analysis'; +import { z } from '@kbn/zod'; +import datemath from '@elastic/datemath'; +import { OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID } from '@kbn/management-settings-ids'; +import { createObservabilityEsClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { preconditionFailed } from '@hapi/boom'; +import { createInvestigateAppServerRoute } from '../create_investigate_app_server_route'; +import { investigationRepositoryFactory } from '../../services/investigation_repository'; + +export const rootCauseAnalysisRoute = createInvestigateAppServerRoute({ + endpoint: 'POST /internal/observability/investigation/root_cause_analysis', + options: { + tags: [], + }, + params: z.object({ + body: z.object({ + investigationId: z.string(), + rangeFrom: z.string(), + rangeTo: z.string(), + serviceName: z.string(), + context: z.string(), + connectorId: z.string(), + completeInBackground: z.boolean().optional(), + }), + }), + handler: async ({ + params, + plugins, + request, + context: requestContext, + logger, + }): Promise>> => { + const { + body: { + investigationId, + context, + rangeFrom, + rangeTo, + serviceName, + connectorId, + completeInBackground, + }, + } = params; + + if (!plugins.observabilityAIAssistant) { + throw preconditionFailed('Observability AI Assistant plugin is not available'); + } + + const start = datemath.parse(rangeFrom)?.valueOf()!; + const end = datemath.parse(rangeTo)?.valueOf()!; + + const coreContext = await requestContext.core; + + const coreEsClient = coreContext.elasticsearch.client.asCurrentUser; + const soClient = coreContext.savedObjects.client; + const uiSettingsClient = coreContext.uiSettings.client; + + const repository = investigationRepositoryFactory({ soClient, logger }); + + const esClient = createObservabilityEsClient({ + client: coreEsClient, + logger, + plugin: 'investigateApp', + }); + + const [ + investigation, + rulesClient, + alertsClient, + inferenceClient, + observabilityAIAssistantClient, + spaceId = 'default', + apmIndices, + logSources, + sloSummaryIndices, + ] = await Promise.all([ + repository.findById(investigationId), + (await plugins.alerting.start()).getRulesClientWithRequest(request), + (await plugins.ruleRegistry.start()).getRacClientWithRequest(request), + (await plugins.inference.start()).getClient({ request }), + plugins + .observabilityAIAssistant!.start() + .then((observabilityAIAssistantStart) => + observabilityAIAssistantStart.service.getClient({ request, scopes: ['observability'] }) + ), + (await plugins.spaces?.start())?.spacesService.getSpaceId(request), + plugins.apmDataAccess.setup.getApmIndices(soClient), + uiSettingsClient.get(OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID) as Promise, + (await plugins.slo.start()).getSloClientWithRequest(request).getSummaryIndices(), + ]); + + const next$ = runRootCauseAnalysis({ + alertsClient, + connectorId, + start, + end, + esClient, + inferenceClient, + indices: { + logs: logSources, + traces: [apmIndices.span, apmIndices.error, apmIndices.transaction], + sloSummaries: sloSummaryIndices, + }, + rulesClient, + observabilityAIAssistantClient, + serviceName, + spaceId, + context, + logger, + }).pipe( + switchMap((event) => { + return of({ + type: 'event' as const, + event, + }); + }) + ); + + if (completeInBackground) { + const shared$ = next$.pipe(share()); + + shared$ + .pipe( + toArray(), + catchError(() => { + return of(); + }), + switchMap((events) => { + return from( + repository.save({ + ...investigation, + rootCauseAnalysis: { + events: events.map(({ event }) => event), + }, + }) + ); + }) + ) + .subscribe({ + error: (error) => { + logger.error(`Failed to update investigation: ${error.message}`); + logger.error(error); + }, + }); + + return shared$; + } + + return next$; + }, +}); diff --git a/x-pack/plugins/observability_solution/investigate_app/server/types.ts b/x-pack/plugins/observability_solution/investigate_app/server/types.ts index 8803221000d5b..705339dfbf2e9 100644 --- a/x-pack/plugins/observability_solution/investigate_app/server/types.ts +++ b/x-pack/plugins/observability_solution/investigate_app/server/types.ts @@ -5,11 +5,23 @@ * 2.0. */ -import { ObservabilityPluginSetup } from '@kbn/observability-plugin/server'; -import { +import type { ObservabilityPluginSetup } from '@kbn/observability-plugin/server'; +import type { RuleRegistryPluginSetupContract, RuleRegistryPluginStartContract, } from '@kbn/rule-registry-plugin/server'; +import type { AlertingServerSetup, AlertingServerStart } from '@kbn/alerting-plugin/server/plugin'; +import type { SLOServerStart, SLOServerSetup } from '@kbn/slo-plugin/server'; +import type { InferenceServerStart, InferenceServerSetup } from '@kbn/inference-plugin/server'; +import type { SpacesPluginSetup, SpacesPluginStart } from '@kbn/spaces-plugin/server'; +import type { + ApmDataAccessPluginStart, + ApmDataAccessPluginSetup, +} from '@kbn/apm-data-access-plugin/server'; +import type { + ObservabilityAIAssistantServerStart, + ObservabilityAIAssistantServerSetup, +} from '@kbn/observability-ai-assistant-plugin/server'; import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server'; /* eslint-disable @typescript-eslint/no-empty-interface*/ @@ -19,11 +31,23 @@ export interface ConfigSchema {} export interface InvestigateAppSetupDependencies { observability: ObservabilityPluginSetup; ruleRegistry: RuleRegistryPluginSetupContract; + slo: SLOServerSetup; + alerting: AlertingServerSetup; + inference: InferenceServerSetup; + spaces?: SpacesPluginSetup; + apmDataAccess: ApmDataAccessPluginSetup; + observabilityAIAssistant?: ObservabilityAIAssistantServerSetup; usageCollection: UsageCollectionSetup; } export interface InvestigateAppStartDependencies { ruleRegistry: RuleRegistryPluginStartContract; + slo: SLOServerStart; + alerting: AlertingServerStart; + inference: InferenceServerStart; + spaces?: SpacesPluginStart; + apmDataAccess: ApmDataAccessPluginStart; + observabilityAIAssistant?: ObservabilityAIAssistantServerStart; } export interface InvestigateAppServerSetup {} diff --git a/x-pack/plugins/observability_solution/investigate_app/tsconfig.json b/x-pack/plugins/observability_solution/investigate_app/tsconfig.json index bc67b591a57b8..1bce5cad1c796 100644 --- a/x-pack/plugins/observability_solution/investigate_app/tsconfig.json +++ b/x-pack/plugins/observability_solution/investigate_app/tsconfig.json @@ -17,57 +17,67 @@ ".storybook/**/*.js" ], "kbn_references": [ - "@kbn/core", - "@kbn/react-kibana-context-theme", - "@kbn/shared-ux-link-redirect-app", - "@kbn/kibana-react-plugin", - "@kbn/i18n", - "@kbn/embeddable-plugin", - "@kbn/observability-ai-assistant-plugin", - "@kbn/lens-plugin", "@kbn/esql", - "@kbn/esql-utils", - "@kbn/data-plugin", - "@kbn/es-types", - "@kbn/field-types", - "@kbn/expressions-plugin", - "@kbn/deeplinks-observability", - "@kbn/logging", + "@kbn/core", "@kbn/data-views-plugin", - "@kbn/observability-shared-plugin", - "@kbn/config-schema", - "@kbn/investigate-plugin", - "@kbn/dataset-quality-plugin", - "@kbn/utility-types-jest", - "@kbn/content-management-plugin", + "@kbn/expressions-plugin", "@kbn/kibana-utils-plugin", - "@kbn/visualization-utils", + "@kbn/utility-types-jest", + "@kbn/es-types", + "@kbn/data-plugin", + "@kbn/embeddable-plugin", "@kbn/unified-search-plugin", - "@kbn/es-query", + "@kbn/kibana-react-plugin", "@kbn/server-route-repository", - "@kbn/security-plugin", - "@kbn/ui-actions-plugin", - "@kbn/server-route-repository-utils", - "@kbn/core-saved-objects-server", - "@kbn/rule-registry-plugin", + "@kbn/server-route-repository-client", + "@kbn/react-kibana-context-theme", + "@kbn/shared-ux-link-redirect-app", "@kbn/shared-ux-router", + "@kbn/i18n", "@kbn/investigation-shared", - "@kbn/core-security-common", - "@kbn/saved-objects-finder-plugin", - "@kbn/presentation-containers", + "@kbn/lens-plugin", + "@kbn/rule-registry-plugin", + "@kbn/security-plugin", + "@kbn/rule-data-utils", + "@kbn/investigate-plugin", + "@kbn/observability-utils-browser", "@kbn/lens-embeddable-utils", "@kbn/i18n-react", - "@kbn/zod", - "@kbn/observability-plugin", - "@kbn/licensing-plugin", - "@kbn/rule-data-utils", + "@kbn/es-query", + "@kbn/saved-objects-finder-plugin", + "@kbn/presentation-containers", + "@kbn/observability-ai-server", + "@kbn/charts-plugin", + "@kbn/observability-shared-plugin", + "@kbn/core-security-common", + "@kbn/deeplinks-observability", + "@kbn/logging", + "@kbn/esql-utils", + "@kbn/observability-ai-assistant-plugin", + "@kbn/observability-ai-assistant-app-plugin", + "@kbn/content-management-plugin", + "@kbn/dataset-quality-plugin", + "@kbn/ui-actions-plugin", + "@kbn/field-types", "@kbn/entities-schema", - "@kbn/core-elasticsearch-server", + "@kbn/observability-plugin", + "@kbn/config-schema", + "@kbn/visualization-utils", + "@kbn/usage-collection-plugin", "@kbn/calculate-auto", "@kbn/ml-random-sampler-utils", - "@kbn/charts-plugin", - "@kbn/observability-utils-browser", - "@kbn/usage-collection-plugin", + "@kbn/zod", "@kbn/inference-common", + "@kbn/core-elasticsearch-server", + "@kbn/sse-utils", + "@kbn/management-settings-ids", + "@kbn/observability-utils-server", + "@kbn/licensing-plugin", + "@kbn/core-saved-objects-server", + "@kbn/alerting-plugin", + "@kbn/slo-plugin", + "@kbn/inference-plugin", + "@kbn/spaces-plugin", + "@kbn/apm-data-access-plugin", ], } diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts index 974b002ea93c6..229183ed142a7 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/common/convert_messages_for_inference.ts @@ -52,6 +52,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa } inferenceMessages.push({ + name: message.message.name!, role: InferenceMessageRole.Tool, response: JSON.parse(message.message.content ?? '{}'), toolCallId: toolCallRequest.toolCalls![0].toolCallId, diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/public/components/message_panel/message_text.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant/public/components/message_panel/message_text.tsx index 85fa0f4609903..a473e34ba2a0c 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/public/components/message_panel/message_text.tsx +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/public/components/message_panel/message_text.tsx @@ -156,7 +156,14 @@ export function MessageText({ loading, content, onActionClick }: Props) { }, table: (props) => ( <> - + ), diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/server/index.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/server/index.ts index 0ad41969cedc2..b84234164f8c8 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/server/index.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/server/index.ts @@ -18,6 +18,8 @@ export type { ObservabilityAIAssistantServerSetup, } from './types'; +export type { ObservabilityAIAssistantClient } from './service/client'; + export { aiAssistantLogsIndexPattern, aiAssistantSimulatedFunctionCalling, @@ -26,6 +28,9 @@ export { export { streamIntoObservable } from './service/util/stream_into_observable'; +export { createFunctionRequestMessage } from '../common/utils/create_function_request_message'; +export { createFunctionResponseMessage } from '../common/utils/create_function_response_message'; + export const config: PluginConfigDescriptor = { deprecations: ({ unusedFromRoot }) => [ unusedFromRoot('xpack.observability.aiAssistant.enabled', { diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.test.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.test.ts index 89e7aa4cbb4de..2456499b2d66f 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.test.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.test.ts @@ -12,8 +12,6 @@ import { last, merge, repeat } from 'lodash'; import { Subject, Observable } from 'rxjs'; import { EventEmitter, type Readable } from 'stream'; import { finished } from 'stream/promises'; -import type { InferenceClient } from '@kbn/inference-plugin/server'; -import { ChatCompletionEventType as InferenceChatCompletionEventType } from '@kbn/inference-common'; import { ObservabilityAIAssistantClient } from '.'; import { MessageRole, type Message } from '../../../common'; import { @@ -21,6 +19,8 @@ import { MessageAddEvent, StreamingChatResponseEventType, } from '../../../common/conversation_complete'; +import { ChatCompletionEventType as InferenceChatCompletionEventType } from '@kbn/inference-common'; +import { InferenceClient } from '@kbn/inference-plugin/server'; import { createFunctionResponseMessage } from '../../../common/utils/create_function_response_message'; import { CONTEXT_FUNCTION_NAME } from '../../functions/context'; import { ChatFunctionClient } from '../chat_function_client'; @@ -1530,6 +1530,7 @@ describe('Observability AI Assistant client', () => { const parsed = last(body.messages); expect(parsed).toEqual({ + name: 'get_top_alerts', role: 'tool', response: { message: 'Function response exceeded the maximum length allowed and was truncated', diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.ts index 688bd7a2ec860..c03f7d6333825 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/client/index.ts @@ -627,9 +627,11 @@ export class ObservabilityAIAssistantClient { recall = async ({ queries, categories, + limit, }: { queries: Array<{ text: string; boost?: number }>; categories?: string[]; + limit?: { size?: number; tokenCount?: number }; }): Promise => { return ( this.dependencies.knowledgeBaseService?.recall({ @@ -639,6 +641,7 @@ export class ObservabilityAIAssistantClient { categories, esClient: this.dependencies.esClient, uiSettingsClient: this.dependencies.uiSettingsClient, + limit, }) || [] ); }; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/index.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/index.ts index 1cf1cdc326fdf..bb77dfc768d95 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/index.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/index.ts @@ -134,6 +134,7 @@ export class KnowledgeBaseService { namespace, esClient, uiSettingsClient, + limit = {}, }: { queries: Array<{ text: string; boost?: number }>; categories?: string[]; @@ -141,6 +142,7 @@ export class KnowledgeBaseService { namespace: string; esClient: { asCurrentUser: ElasticsearchClient; asInternalUser: ElasticsearchClient }; uiSettingsClient: IUiSettingsClient; + limit?: { tokens?: number; size?: number }; }): Promise => { if (!this.dependencies.config.enableKnowledgeBase) { return []; @@ -186,9 +188,9 @@ export class KnowledgeBaseService { documentsFromKb.concat(documentsFromConnectors), 'score', 'desc' - ).slice(0, 20); + ).slice(0, limit.size ?? 20); - const MAX_TOKENS = 4000; + const maxTokens = limit.tokens ?? 4_000; let tokenCount = 0; @@ -197,7 +199,7 @@ export class KnowledgeBaseService { for (const entry of sortedEntries) { returnedEntries.push(entry); tokenCount += encode(entry.text).length; - if (tokenCount >= MAX_TOKENS) { + if (tokenCount >= maxTokens) { break; } } diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/server/types.ts b/x-pack/plugins/observability_solution/observability_ai_assistant/server/types.ts index ece417d968a13..3ee66bfaed664 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/server/types.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/server/types.ts @@ -23,7 +23,7 @@ import type { CloudSetup, CloudStart } from '@kbn/cloud-plugin/server'; import type { ServerlessPluginSetup, ServerlessPluginStart } from '@kbn/serverless/server'; import type { RuleRegistryPluginStartContract } from '@kbn/rule-registry-plugin/server'; import type { AlertingServerSetup, AlertingServerStart } from '@kbn/alerting-plugin/server'; -import type { InferenceServerStart } from '@kbn/inference-plugin/server'; +import type { InferenceServerSetup, InferenceServerStart } from '@kbn/inference-plugin/server'; import type { ObservabilityAIAssistantService } from './service'; export interface ObservabilityAIAssistantServerSetup { @@ -50,6 +50,7 @@ export interface ObservabilityAIAssistantPluginSetupDependencies { cloud?: CloudSetup; serverless?: ServerlessPluginSetup; alerting: AlertingServerSetup; + inference: InferenceServerSetup; } export interface ObservabilityAIAssistantPluginStartDependencies { diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json b/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json index 77b81c9c72882..a79df51d65af7 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json +++ b/x-pack/plugins/observability_solution/observability_ai_assistant/tsconfig.json @@ -46,6 +46,7 @@ "@kbn/management-settings-ids", "@kbn/ai-assistant-common", "@kbn/inference-common", + "@kbn/inference-plugin", "@kbn/core-lifecycle-server", "@kbn/server-route-repository-utils", "@kbn/inference-plugin" diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/.storybook/storybook_decorator.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/.storybook/storybook_decorator.tsx new file mode 100644 index 0000000000000..ce283bbeccf0a --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/.storybook/storybook_decorator.tsx @@ -0,0 +1,11 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import React, { ComponentType } from 'react'; + +export function KibanaReactStorybookDecorator(Story: ComponentType) { + return ; +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/kibana.jsonc b/x-pack/plugins/observability_solution/observability_ai_assistant_app/kibana.jsonc index 957ca0272c087..8d509e50059a7 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/kibana.jsonc +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/kibana.jsonc @@ -33,6 +33,8 @@ "features", "inference", "logsDataAccess", + "spaces", + "slo", "llmTasks" ], "optionalPlugins": [ @@ -44,4 +46,4 @@ ], "extraPublicDirs": [] } -} \ No newline at end of file +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/charts/spark_plot.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/charts/spark_plot.tsx index ddb57a127fe19..afd7e9f4bceb3 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/charts/spark_plot.tsx +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/charts/spark_plot.tsx @@ -5,26 +5,45 @@ * 2.0. */ import { + AnnotationDomainType, BarSeries, Chart, CurveType, + LineAnnotation, LineSeries, PartialTheme, + Position, ScaleType, Settings, Tooltip, - LineAnnotation, - AnnotationDomainType, - Position, } from '@elastic/charts'; -import React from 'react'; +import { EuiFlexGroup, EuiPanel, EuiText } from '@elastic/eui'; +import { UI_SETTINGS } from '@kbn/data-service'; import { i18n } from '@kbn/i18n'; +import moment from 'moment'; +import React from 'react'; import { useChartTheme } from '../../hooks/use_chart_theme'; +import { useKibana } from '../../hooks/use_kibana'; + +function AnnotationTooltip({ timestamp, label }: { timestamp: number; label: React.ReactNode }) { + const dateFormat = useKibana().services.uiSettings.get(UI_SETTINGS.DATE_FORMAT); + const formattedTime = moment(timestamp).format(dateFormat); + + return ( + + + {formattedTime} + {label} + + + ); +} export function SparkPlot({ type, timeseries, annotations, + compressed, }: { type: 'line' | 'bar'; timeseries: Array<{ x: number; y: number | null }>; @@ -33,8 +52,9 @@ export function SparkPlot({ x: number; color: string; icon: React.ReactNode; - label: string; + label: React.ReactNode; }>; + compressed?: boolean; }) { const defaultChartTheme = useChartTheme(); @@ -56,7 +76,7 @@ export function SparkPlot({ { return ( { + return ( + + ); + }} /> ); })} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/entity_badge/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/entity_badge/index.tsx new file mode 100644 index 0000000000000..edfc956a448d1 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/entity_badge/index.tsx @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { EuiBadge, EuiFlexGroup, EuiFlexItem } from '@elastic/eui'; +import React from 'react'; + +export function EntityBadge({ + entity, + color, +}: { + entity: Record; + color?: React.ComponentProps['color']; +}) { + return ( + + {Object.entries(entity).map(([field, value]) => ( + + {`${field}:${value}`} + + ))} + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/complete_root_cause_analysis.json b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/complete_root_cause_analysis.json new file mode 100644 index 0000000000000..ec2ce7810bc97 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/complete_root_cause_analysis.json @@ -0,0 +1,19532 @@ +[ + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_IURenJnAu7MGepsXonmWtEbx", + "function": { + "name": "observe", + "arguments": { + "observe": true + } + } + } + ] + }, + { + "name": "observe", + "response": { + "content": "## Observations\n\nThe alert indicates that the `controller` service is experiencing a high number of 500 errors for the `/api/cart` endpoint. The error rate is 98.78%, significantly above the threshold of 1% over a 1-minute period. This is a pronounced and rapid onset of errors that suggests a severe and immediate issue with the `controller` service or its dependencies starting at `2024-11-02T10:56:42.498Z`.\n\n## Symptoms\n\n- **Onset**: Rapid\n- **Severity**: Pronounced\n- **Time of Change**: `2024-11-02T10:56:42.498Z`\n\nGiven the sudden and high volume of errors, the symptoms suggest a significant event such as a deployment, configuration change, or a failure in a critical dependency.\n\n## Next Steps\n\nThe next step is to further investigate the logs for the `controller` service to identify any changes or error patterns around the specified time.\n\n**Entity to Investigate**:\n- `service.name`: `controller`" + }, + "data": { + "content": "## Observations\n\nThe alert indicates that the `controller` service is experiencing a high number of 500 errors for the `/api/cart` endpoint. The error rate is 98.78%, significantly above the threshold of 1% over a 1-minute period. This is a pronounced and rapid onset of errors that suggests a severe and immediate issue with the `controller` service or its dependencies starting at `2024-11-02T10:56:42.498Z`.\n\n## Symptoms\n\n- **Onset**: Rapid\n- **Severity**: Pronounced\n- **Time of Change**: `2024-11-02T10:56:42.498Z`\n\nGiven the sudden and high volume of errors, the symptoms suggest a significant event such as a deployment, configuration change, or a failure in a critical dependency.\n\n## Next Steps\n\nThe next step is to further investigate the logs for the `controller` service to identify any changes or error patterns around the specified time.\n\n**Entity to Investigate**:\n- `service.name`: `controller`", + "investigations": [] + }, + "role": "tool", + "toolCallId": "call_IURenJnAu7MGepsXonmWtEbx" + }, + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_6Vcfeg2e2IUC0gSX0bwejZl1", + "function": { + "name": "investigateEntity", + "arguments": { + "context": "Investigating the alert for the controller service, which indicates a high number of 500 errors for the /api/cart endpoint. Error rate is 98.78%, above the threshold of 1% over a 1-minute duration. Start time of the issue is 2024-11-02T10:56:42.498Z. Will review controller service logs for changes or error patterns around this time.", + "entity": { + "field": "service.name", + "value": "controller" + } + } + } + } + ] + }, + { + "name": "investigateEntity", + "role": "tool", + "toolCallId": "call_6Vcfeg2e2IUC0gSX0bwejZl1", + "response": { + "entity": { + "service.name": "controller" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "frontend" + }, + "reason": "The frontend service interacts closely with the controller (ingress-nginx-controller). Requests directed to /api/cart initiate from frontendproxy which routes to frontend. Given the high error rate at the /api/cart endpoint, the issue might involve downstream calls from frontend to dependent services.", + "confidence": "high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The /api/cart endpoint directly involves manipulating or retrieving data from cartservice. Given the 500 errors on requests routed via the controller, it's likely that cartservice might be either causing or being affected by these failures.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend-web" + }, + "reason": "URL referrer and path fragments suggest client-side interactions impacting the controller's ingress. Issues in the client's request construction or timing might be indirectly impacting how the controller handles these requests.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "Though the controller deals directly with cart interactions, completion of cart actions could trigger subsequent processes in checkoutservice. Errors during cart operations could thereby propagate issues if mismanaged upstream.", + "confidence": "low" + }, + { + "entity": { + "service.name": "coredns" + }, + "reason": "DNS resolution issues could lead to inability to locate the correct backend service IP for controller, causing errors in routing HTTP requests.", + "confidence": "low" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service is called by the `frontendproxy`, which is indicated as the `attributes.upstream.name` in `controller` logs. In the context provided, the `frontend` communicates with other backend services to process user interactions and might be an upstream dependency for the `controller`. The problem might cascade from `frontendproxy` to `frontend`.", + "confidence": "high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The `cartservice` handles shopping cart actions which map the `controller`'s `/api/cart` endpoint. If the `cartservice` has issues, they would cascade to the `controller` /api/cart endpoint that depends on this service for cart management functions.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "Handling currency conversions, indirectly connected to cart operations. The issues might arise if currency conversion fails during cart operations, contributing to the 500 errors in the `controller`.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "frauddetectionservice" + }, + "reason": "No specific logs point directly; contextual relevance due to transaction handling. If fraud detection triggers false flags, it can affect cart transactions.", + "confidence": "low" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The `cartservice` directly manages shopping cart operations over the gRPC protocol, and errors here could lead to repeated 500 errors in the `controller`.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service handles user interactions that are passed to the `controller`. Issues in `frontend` could lead to failures in `controller`, resulting in 500 errors.", + "confidence": "medium to high" + }, + { + "entity": { + "service.name": "loadgenerator" + }, + "reason": "The `loadgenerator` simulates high user traffic which might strain other services and indirectly cause 500 errors in the `controller`.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "etcd" + }, + "reason": "As a control plane activity tracker for Ingress, indirect impacts from `etcd` inconsistencies can propagate as issues in services interacting via Ingress, including `controller`.", + "confidence": "medium" + } + ], + "summary": "### Characteristics of the `controller` Entity\n\n**1. Infrastructure & Environment:**\n\nThe `controller` service is deployed within a Kubernetes environment, specifically in the `ingress-nginx` namespace. The service is associated with a `controller` container running within the `ingress-nginx-controller-bc57996ff-r6blf` pod. This pod is scheduled on a node identified as `minikube`, and the service is part of the `ingress-nginx-controller` deployment. Additionally, the infrastructure is based on an ARM64 architecture, running Ubuntu 24.04.1 LTS, with deployment environment set to `opentelemetry-demo`.\n\n**2. Communication Characteristics:**\n\nThe `controller` service functions as an Ingress controller, managing incoming traffic and routing it to backend services. It processes HTTP requests, typically version 1.1, and communicates with other services via HTTP protocols. Key endpoints involved in the communication include:\n- Inbound requests are received at endpoints like `/api/cart` and `/api/checkout`.\n- Outbound requests are routed primarily to the `frontendproxy` service, evidenced by the upstream address `10.244.0.26:8080`.\n\nThe service handles both `GET` and `POST` requests, and its communication paths include requests from user agents like \"HeadlessChrome\" and \"python-requests\".\n\n**3. Context of Entity in Investigation:**\n\n**Reason for Investigation:**\nThe `controller` service is currently under investigation due to an alert triggered by a high number of 500 errors (98.78% error rate) occurring at the `/api/cart` endpoint. The significant spike in error rates is concerning and suggests a potential issue within the service or its interaction with dependencies.\n\n**Relation to Other Entities:**\nThe high error rates on the `controller` are likely impacting the overall user experience of the application, notably through the `/api/cart` functionality, which interacts with the `cartservice`. As the `controller` service forwards requests to the `frontendproxy`, which in turn interacts with various backend services like `cartservice`, `checkoutservice`, `currencyservice`, and others, the ripple effect of these errors might be felt across multiple services in the architecture. This interconnectedness necessitates thorough investigation of `controller's` logs and error patterns to isolate the root cause and mitigate the disruption in the flow of HTTP requests.\n\nBy understanding the behavior and interaction of the `controller` service within the broader microservices architecture, we aim to identify and resolve the underlying issue causing the increase in 500 errors. This involves examining logs for patterns, determining if recent changes or specific conditions triggered the fault, and analyzing dependencies that could contribute to the error rates observed. This step is integral to restoring normal operation and maintaining service reliability and user satisfaction.\n\n### Analysis of Log Patterns for service.name:controller\n\n#### Pattern Group: Request Errors\n- **Pattern:**\n ```\n .*?10\\\\.244\\\\.0\\\\.38.+?02.+?2024.+?10.+?0000.+?api.+?HTTP.+?1\\\\.1.+?python.+?requests.+?2\\\\.31\\\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\\\.244\\\\.0\\\\.26.+?8080.*?\n ```\n - **Sample:**\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n - **Description:** This pattern represents request logs where an HTTP POST request to the `/api/cart` endpoint resulted in a `500 Internal Server Error`. The requests are from a client using `python-requests/2.31.0`.\n\n- **Count:** 263 occurrences\n\n- **Timeseries Analysis:**\n - **Trend:** The number of occurrences show fluctuation with no significant change in the pattern.\n - **Log Counts:** \n - Example timestamps and counts:\n - `2024-11-02T10:56:00.000Z: 6`\n - `2024-11-02T10:56:30.000Z: 2`\n - `2024-11-02T10:57:00.000Z: 0`\n\n### Summary\nBased on the provided logs patterns, the `controller` service is experiencing a high number of `500 Internal Server Errors` when handling POST requests to the `/api/cart` endpoint. This is evident from the consistent error logs observed at different timestamps. The alert threshold breach likely correlates with these error occurrences.\n\nNo other unusual patterns such as connection issues, startup messages, or garbage collection messages were identified in the logs around the error events. Further investigation on the backend or potential dependency issues (e.g., `cartservice`) might be needed to determine the exact cause of these errors.\n\n### Timeline of significant events\n\n- **2024-11-02T10:56:42.498Z**\n - **Alert Triggered**: High number of 500 errors for the /api/cart endpoint. Error rate is 98.78%, above the threshold of 1% over a 1-minute duration.\n \n- **2024-11-02T10:56:00.000Z**\n - **Log Entry**: 6 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - **Example Log**:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n \n- **2024-11-02T10:56:30.000Z**\n - **Log Entry**: 2 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - **Example Log**:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:35 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n### Context and reasoning\n\nThe `controller` service plays a critical role in handling incoming HTTP requests within the Kubernetes environment, specifically acting as an Ingress controller within the `ingress-nginx` namespace. The recent alert, triggered by a high number of 500 errors, indicates a significant disruption in the normal operations of the `controller` service, particularly impacting the `/api/cart` endpoint. The log entries leading up to and following the alert highlight a pattern of consistent `500 Internal Server Error` responses, specifically around the timestamp in question.\n\nThe pattern group identified, related to request errors, shows repeated failures for HTTP POST requests from a client using `python-requests/2.31.0`, likely directed towards the `cartservice` via the `frontendproxy`. This high error rate suggests an underlying issue either within the `controller` service itself, its communication path to the `frontendproxy`, or potentially further downstream with the backend services like `cartservice`.\n\nNo other unusual log patterns, such as connection issues or any system degradation messages were noted, indicating the problem is likely concentrated around the interaction between `controller` and its immediate dependencies rather than a broader system-wide issue. By correlating the timestamps and repeated error logs, a clear link can be drawn between the `500 Internal Server Error` responses and the timeline, further supporting the hypothesis of a specific fault affecting the `/api/cart` endpoint.\n\nFurther investigation into the dependencies of the `controller` service, especially focusing on the interactions with the `frontendproxy` and subsequently `cartservice`, is warranted to isolate and rectify the root cause of these error patterns." + }, + "data": { + "attachments": { + "alerts": [ + { + "kibana.alert.reason": "500 Errors is 98.78049, above the threshold of 1. (duration: 1 min, data view: otel_logs_data (Automated by Demo CLI), group: controller,/api/cart)", + "kibana.alert.evaluation.values": [ + 98.78048780487805 + ], + "kibana.alert.evaluation.threshold": [ + 1 + ], + "kibana.alert.group": [ + { + "field": "service.name", + "value": "controller" + }, + { + "field": "url.path", + "value": "/api/cart" + } + ], + "tags": [ + "demo", + "cli-created" + ], + "service.name": "controller", + "kibana.alert.rule.category": "Custom threshold", + "kibana.alert.rule.consumer": "logs", + "kibana.alert.rule.execution.uuid": "4187827d-686e-4098-98e3-21e13495798e", + "kibana.alert.rule.name": "NGINX 500s", + "kibana.alert.rule.parameters": { + "criteria": [ + { + "comparator": ">", + "metrics": [ + { + "name": "A", + "filter": "http.response.status_code:*", + "aggType": "count" + }, + { + "name": "B", + "filter": "http.response.status_code>=500", + "aggType": "count" + } + ], + "threshold": [ + 1 + ], + "timeSize": 1, + "timeUnit": "m", + "equation": "(B/A) * 100", + "label": "500 Errors" + } + ], + "alertOnNoData": false, + "alertOnGroupDisappear": false, + "searchConfiguration": { + "query": { + "query": "k8s.namespace.name: \"ingress-nginx\" AND url.path: /api/*", + "language": "kuery" + }, + "index": "otel_logs_data" + }, + "groupBy": [ + "service.name", + "url.path" + ] + }, + "kibana.alert.rule.producer": "observability", + "kibana.alert.rule.revision": 0, + "kibana.alert.rule.rule_type_id": "observability.rules.custom_threshold", + "kibana.alert.rule.tags": [ + "demo", + "cli-created" + ], + "kibana.alert.rule.uuid": "9055220c-8fb1-4f9f-be7c-0a33eb2bafc5", + "kibana.space_ids": [ + "default" + ], + "@timestamp": "2024-11-02T10:56:42.498Z", + "event.action": "open", + "event.kind": "signal", + "kibana.alert.rule.execution.timestamp": "2024-11-02T10:56:42.498Z", + "kibana.alert.action_group": "custom_threshold.fired", + "kibana.alert.flapping": false, + "kibana.alert.flapping_history": [ + true + ], + "kibana.alert.instance.id": "controller,/api/cart", + "kibana.alert.maintenance_window_ids": [], + "kibana.alert.consecutive_matches": 1, + "kibana.alert.status": "active", + "kibana.alert.uuid": "b97a1b20-d14b-4835-b948-bd7a423284eb", + "kibana.alert.severity_improving": false, + "kibana.alert.workflow_status": "open", + "kibana.alert.duration.us": 0, + "kibana.alert.start": "2024-11-02T10:56:42.498Z", + "kibana.alert.time_range": { + "gte": "2024-11-02T10:56:42.498Z" + }, + "kibana.version": "9.0.0" + }, + { + "kibana.alert.reason": "500 Errors is 100, above the threshold of 1. (duration: 1 min, data view: otel_logs_data (Automated by Demo CLI), group: controller,/api/checkout)", + "kibana.alert.evaluation.values": [ + 100 + ], + "kibana.alert.evaluation.threshold": [ + 1 + ], + "kibana.alert.group": [ + { + "field": "service.name", + "value": "controller" + }, + { + "field": "url.path", + "value": "/api/checkout" + } + ], + "tags": [ + "demo", + "cli-created" + ], + "service.name": "controller", + "kibana.alert.rule.category": "Custom threshold", + "kibana.alert.rule.consumer": "logs", + "kibana.alert.rule.execution.uuid": "4187827d-686e-4098-98e3-21e13495798e", + "kibana.alert.rule.name": "NGINX 500s", + "kibana.alert.rule.parameters": { + "criteria": [ + { + "comparator": ">", + "metrics": [ + { + "name": "A", + "filter": "http.response.status_code:*", + "aggType": "count" + }, + { + "name": "B", + "filter": "http.response.status_code>=500", + "aggType": "count" + } + ], + "threshold": [ + 1 + ], + "timeSize": 1, + "timeUnit": "m", + "equation": "(B/A) * 100", + "label": "500 Errors" + } + ], + "alertOnNoData": false, + "alertOnGroupDisappear": false, + "searchConfiguration": { + "query": { + "query": "k8s.namespace.name: \"ingress-nginx\" AND url.path: /api/*", + "language": "kuery" + }, + "index": "otel_logs_data" + }, + "groupBy": [ + "service.name", + "url.path" + ] + }, + "kibana.alert.rule.producer": "observability", + "kibana.alert.rule.revision": 0, + "kibana.alert.rule.rule_type_id": "observability.rules.custom_threshold", + "kibana.alert.rule.tags": [ + "demo", + "cli-created" + ], + "kibana.alert.rule.uuid": "9055220c-8fb1-4f9f-be7c-0a33eb2bafc5", + "kibana.space_ids": [ + "default" + ], + "@timestamp": "2024-11-02T10:56:42.498Z", + "event.action": "open", + "event.kind": "signal", + "kibana.alert.rule.execution.timestamp": "2024-11-02T10:56:42.498Z", + "kibana.alert.action_group": "custom_threshold.fired", + "kibana.alert.flapping": false, + "kibana.alert.flapping_history": [ + true + ], + "kibana.alert.instance.id": "controller,/api/checkout", + "kibana.alert.maintenance_window_ids": [], + "kibana.alert.consecutive_matches": 1, + "kibana.alert.status": "active", + "kibana.alert.uuid": "78472a9c-63a2-41ba-9812-47cebd48d833", + "kibana.alert.severity_improving": false, + "kibana.alert.workflow_status": "open", + "kibana.alert.duration.us": 0, + "kibana.alert.start": "2024-11-02T10:56:42.498Z", + "kibana.alert.time_range": { + "gte": "2024-11-02T10:56:42.498Z" + }, + "kibana.version": "9.0.0" + } + ], + "slos": [], + "analysis": { + "total": 56181, + "sampled": 1000, + "fields": [ + "@timestamp:date - 994 distinct values", + "app.label.component:keyword - 1 distinct values (`controller`)", + "attributes.event.name.text:text - 1 distinct values (`nginx.ingress.controller.error`)", + "attributes.event.name:keyword - 1 distinct values (`nginx.ingress.controller.error`)", + "attributes.http.request.id:keyword - 1000 distinct values (`87c973a800b3bd34d3679f0a12263c40`, `ee99fada68450d77be61deaae1f04008`, `80312e7e98e9171c8010e442784e7a04`, `a931c4f7d964f91f0a0750f3221dfa50`, `8c2e99672e225b279a7741671c976c93`, `eed4ff70db044f81adeda15bc1f3ad4c`, `dd730f83b894fc2adedfaf950cf5884c`, `4e35653560a824d6693d1e024906e9ce`, `36d6dfae2fe5d6fecbfaadd4fac5bde1`, `43c774bea806c21478f7ee57a449222b`, 990 more values)", + "attributes.http.request.method:keyword - 2 distinct values (`POST`, `GET`)", + "attributes.http.request.referrer:keyword - 2 distinct values (`http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/`, `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`)", + "attributes.http.request.size:long - 61 distinct values (`351`, `374`, `373`, `809`, `383`, `506`, `342`, `350`, `538`, `479`, 51 more values)", + "attributes.http.request.time:float - 90 distinct values (`3.172`, `1.103`, `0.053`, `4.434`, `0.019`, `3.328`, `1.01`, `3.197`, `3.133`, `2.566`, 80 more values)", + "attributes.http.response.body.size:long - 130 distinct values (`1144`, `1120`, `1024`, `19`, `1336`, `1198`, `1070`, `111157`, `111163`, `1271`, 120 more values)", + "attributes.http.response.status_code:long - 4 distinct values (`499`, `500`, `308`, `200`)", + "attributes.http.version:keyword - 1 distinct values (`1.1`)", + "attributes.log.file.path.text:text - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "attributes.log.file.path:keyword - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "attributes.log.iostream:keyword - 1 distinct values (`stdout`)", + "attributes.network.protocol.name.text:text - 1 distinct values (`http`)", + "attributes.network.protocol.name:keyword - 1 distinct values (`http`)", + "attributes.source.address:keyword - 1 distinct values (`10.244.0.38`)", + "attributes.upstream.address:keyword - 1 distinct values (`10.244.0.26:8080`)", + "attributes.upstream.name.text:text - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "attributes.upstream.name:keyword - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "attributes.upstream.response.size:keyword - 117 distinct values (`1149`, `1120`, `1029`, `19`, `1336`, `1203`, `1070`, `1276`, `1356`, `1105`, 107 more values)", + "attributes.upstream.response.status_code:keyword - 3 distinct values (`500`, `308`, `200`)", + "attributes.upstream.response.time:keyword - 89 distinct values (`3.171`, `1.007`, `1.103`, `0.053`, `4.434`, `3.328`, `1.010`, `3.197`, `3.132`, `2.565`, 79 more values)", + "attributes.url.domain:keyword - 2 distinct values (`icons`, ``)", + "attributes.url.extension:keyword - 7 distinct values (`css`, `Service/ResolveAll`, `Service/EventStream`, `png`, `svg`, `jpg`, `js`)", + "attributes.url.original.text:text - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, 165 more values)", + "attributes.url.original:keyword - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `/api/recommendations?productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `/api/cart?sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `/api/recommendations?productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `/api/data/`, `/api/cart?sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `/api/recommendations?productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `/api/recommendations?productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `/api/cart?sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, 157 more values)", + "attributes.url.path:keyword - 43 distinct values (`/api/products/0PUK6V6EV0`, `/api/products/9SIQT8TOJO`, `/api/products/LS4PSXUNUM`, `/api/data/`, `/api/products/L9ECAV7KIM`, `/api/data`, `/api/products/66VCHSJNUP`, `/api/products/2ZYFJ3GM2N`, `/cart`, `/images/products/NationalParkFoundationExplorascope.jpg`, 33 more values)", + "attributes.url.query:keyword - 126 distinct values (`productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, `productIds=&sessionId=0c122e28-e8fb-4ab5-9889-ac797b5f17cf¤cyCode=USD`, 116 more values)", + "attributes.url.scheme:keyword - 1 distinct values (``)", + "attributes.user_agent.name.text:text - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "attributes.user_agent.name:keyword - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "attributes.user_agent.original.text:text - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "attributes.user_agent.original:keyword - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "attributes.user_agent.version:keyword - 2 distinct values (`2.31`, `120.0.6099`)", + "body.text:text - 1000 distinct values (`10.244.0.38 - - [02/Nov/2024:10:37:56 +0000] \"GET /api/currency? HTTP/1.1\" 200 199 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 598 0.007 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 199 0.007 200 87c973a800b3bd34d3679f0a12263c40\n`, `10.244.0.38 - - [02/Nov/2024:10:46:11 +0000] \"GET /_next/static/chunks/pages/_app-c923ae378a182a07.js HTTP/1.1\" 200 111150 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 484 0.017 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 111008 0.018 200 ee99fada68450d77be61deaae1f04008\n`, 998 more values)", + "data_stream.dataset:keyword - 1 distinct values (`nginx_ingress_controller.error.otel`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`logs`)", + "deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "dropped_attributes_count:long - 1 distinct values (`0`)", + "event.dataset:keyword - 1 distinct values (`nginx_ingress_controller.error.otel`)", + "event.name:keyword - 1 distinct values (`nginx.ingress.controller.error`)", + "host.arch:keyword - 1 distinct values (`arm64`)", + "host.architecture:keyword - 1 distinct values (`arm64`)", + "host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "host.cpu.family:keyword - 1 distinct values (``)", + "host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "host.cpu.model.name:keyword - 1 distinct values (``)", + "host.cpu.stepping:keyword - 1 distinct values (`0`)", + "host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "host.os.full:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "host.os.platform:keyword - 1 distinct values (`linux`)", + "http.request.id:keyword - 1000 distinct values (`87c973a800b3bd34d3679f0a12263c40`, `ee99fada68450d77be61deaae1f04008`, `80312e7e98e9171c8010e442784e7a04`, `a931c4f7d964f91f0a0750f3221dfa50`, `8c2e99672e225b279a7741671c976c93`, `eed4ff70db044f81adeda15bc1f3ad4c`, `dd730f83b894fc2adedfaf950cf5884c`, `4e35653560a824d6693d1e024906e9ce`, `36d6dfae2fe5d6fecbfaadd4fac5bde1`, `43c774bea806c21478f7ee57a449222b`, 990 more values)", + "http.request.method:keyword - 2 distinct values (`POST`, `GET`)", + "http.request.referrer:keyword - 2 distinct values (`http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/`, `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`)", + "http.request.size:long - 61 distinct values (`351`, `374`, `373`, `809`, `383`, `506`, `342`, `350`, `538`, `479`, 51 more values)", + "http.request.time:float - 90 distinct values (`3.172`, `1.103`, `0.053`, `4.434`, `0.019`, `3.328`, `1.01`, `3.197`, `3.133`, `2.566`, 80 more values)", + "http.response.body.size:long - 130 distinct values (`1144`, `1120`, `1024`, `19`, `1336`, `1198`, `1070`, `111157`, `111163`, `1271`, 120 more values)", + "http.response.status_code:long - 4 distinct values (`499`, `500`, `308`, `200`)", + "http.version:keyword - 1 distinct values (`1.1`)", + "k8s.container.name:keyword - 1 distinct values (`controller`)", + "k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "k8s.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "k8s.namespace.name:keyword - 1 distinct values (`ingress-nginx`)", + "k8s.node.name:keyword - 1 distinct values (`minikube`)", + "k8s.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T08:56:56Z`)", + "k8s.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "kubernetes.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "kubernetes.namespace:keyword - 1 distinct values (`ingress-nginx`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "log.file.path:keyword - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "log.iostream:keyword - 1 distinct values (`stdout`)", + "message:text - 1000 distinct values (`10.244.0.38 - - [02/Nov/2024:10:37:56 +0000] \"GET /api/currency? HTTP/1.1\" 200 199 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 598 0.007 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 199 0.007 200 87c973a800b3bd34d3679f0a12263c40\n`, `10.244.0.38 - - [02/Nov/2024:10:46:11 +0000] \"GET /_next/static/chunks/pages/_app-c923ae378a182a07.js HTTP/1.1\" 200 111150 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 484 0.017 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 111008 0.018 200 ee99fada68450d77be61deaae1f04008\n`, 998 more values)", + "network.protocol.name:keyword - 1 distinct values (`http`)", + "observed_timestamp:date_nanos - 1000 distinct values (`2024-11-02T10:37:56.158731715Z`, `2024-11-02T10:46:11.961228541Z`, `2024-11-02T10:38:02.758802635Z`, `2024-11-02T10:54:51.563521629Z`, `2024-11-02T10:37:05.149442469Z`, `2024-11-02T10:49:01.961126259Z`, `2024-11-02T10:56:19.96308167Z`, `2024-11-02T10:42:06.760264887Z`, `2024-11-02T10:49:45.76210446Z`, `2024-11-02T10:54:02.562840801Z`, 990 more values)", + "os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.app.label.component:keyword - 1 distinct values (`controller`)", + "resource.attributes.deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "resource.attributes.host.arch:keyword - 1 distinct values (`arm64`)", + "resource.attributes.host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "resource.attributes.host.cpu.family:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "resource.attributes.host.cpu.model.name.text:text - 1 distinct values (``)", + "resource.attributes.host.cpu.model.name:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.stepping:keyword - 1 distinct values (`0`)", + "resource.attributes.host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "resource.attributes.host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "resource.attributes.host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "resource.attributes.host.name:keyword - 1 distinct values (`otel-daemonset-opentelemetry-collector-agent-7jlpk`)", + "resource.attributes.k8s.container.name.text:text - 1 distinct values (`controller`)", + "resource.attributes.k8s.container.name:keyword - 1 distinct values (`controller`)", + "resource.attributes.k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "resource.attributes.k8s.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "resource.attributes.k8s.namespace.name:keyword - 1 distinct values (`ingress-nginx`)", + "resource.attributes.k8s.node.name:keyword - 1 distinct values (`minikube`)", + "resource.attributes.k8s.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "resource.attributes.k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T08:56:56Z`)", + "resource.attributes.k8s.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "resource.attributes.os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "resource.attributes.os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.service.name.text:text - 1 distinct values (`controller`)", + "resource.attributes.service.name:keyword - 1 distinct values (`controller`)", + "resource.dropped_attributes_count:long - 1 distinct values (`0`)", + "resource.schema_url:keyword - 1 distinct values (`https://opentelemetry.io/schemas/1.6.1`)", + "scope.dropped_attributes_count:long - 1 distinct values (`0`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.name:keyword - 1 distinct values (`controller`)", + "severity_number:byte - 1 distinct values (`0`)", + "source.address:keyword - 1 distinct values (`10.244.0.38`)", + "upstream.address:keyword - 1 distinct values (`10.244.0.26:8080`)", + "upstream.name:keyword - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "upstream.response.size:keyword - 117 distinct values (`1149`, `1120`, `1029`, `19`, `1336`, `1203`, `1070`, `1276`, `1356`, `1105`, 107 more values)", + "upstream.response.status_code:keyword - 3 distinct values (`500`, `308`, `200`)", + "upstream.response.time:keyword - 89 distinct values (`3.171`, `1.007`, `1.103`, `0.053`, `4.434`, `3.328`, `1.010`, `3.197`, `3.132`, `2.565`, 79 more values)", + "url.domain:keyword - 2 distinct values (`icons`, ``)", + "url.extension:keyword - 7 distinct values (`css`, `Service/ResolveAll`, `Service/EventStream`, `png`, `svg`, `jpg`, `js`)", + "url.original:keyword - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `/api/recommendations?productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `/api/cart?sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `/api/recommendations?productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `/api/data/`, `/api/cart?sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `/api/recommendations?productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `/api/recommendations?productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `/api/cart?sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, 157 more values)", + "url.path:keyword - 43 distinct values (`/api/products/0PUK6V6EV0`, `/api/products/9SIQT8TOJO`, `/api/products/LS4PSXUNUM`, `/api/data/`, `/api/products/L9ECAV7KIM`, `/api/data`, `/api/products/66VCHSJNUP`, `/api/products/2ZYFJ3GM2N`, `/cart`, `/images/products/NationalParkFoundationExplorascope.jpg`, 33 more values)", + "url.query:keyword - 126 distinct values (`productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, `productIds=&sessionId=0c122e28-e8fb-4ab5-9889-ac797b5f17cf¤cyCode=USD`, 116 more values)", + "url.scheme:keyword - 1 distinct values (``)", + "user_agent.name:keyword - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "user_agent.original:keyword - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "user_agent.version:keyword - 2 distinct values (`2.31`, `120.0.6099`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 54850, + "pattern": "HTTP/1.1 Mozilla/5.0 X11 Linux aarch64 AppleWebKit/537.36 KHTML like Gecko HeadlessChrome/120.0.6099.28 Safari/537.36 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?HTTP/1\\.1.+?Mozilla/5\\.0.+?X11.+?Linux.+?aarch64.+?AppleWebKit/537\\.36.+?KHTML.+?like.+?Gecko.+?HeadlessChrome/120\\.0\\.6099\\.28.+?Safari/537\\.36.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:54:54 +0000] \"GET /icons/Chevron.svg HTTP/1.1\" 200 805 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 508 0.001 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 810 0.001 200 8693bfefd0668ca27fff0e929785a2c5\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:54:54 +0000] \"GET /icons/Chevron.svg HTTP/1.1\" 200 805 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 508 0.001 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 810 0.001 200 8693bfefd0668ca27fff0e929785a2c5\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.057Z", + "lastOccurrence": "2024-11-02T10:56:59.420Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1385 + }, + { + "x": 1730543820000, + "y": 1172 + }, + { + "x": 1730543850000, + "y": 1520 + }, + { + "x": 1730543880000, + "y": 1312 + }, + { + "x": 1730543910000, + "y": 986 + }, + { + "x": 1730543940000, + "y": 1030 + }, + { + "x": 1730543970000, + "y": 1248 + }, + { + "x": 1730544000000, + "y": 1395 + }, + { + "x": 1730544030000, + "y": 1371 + }, + { + "x": 1730544060000, + "y": 1289 + }, + { + "x": 1730544090000, + "y": 1467 + }, + { + "x": 1730544120000, + "y": 1223 + }, + { + "x": 1730544150000, + "y": 1429 + }, + { + "x": 1730544180000, + "y": 1329 + }, + { + "x": 1730544210000, + "y": 1224 + }, + { + "x": 1730544240000, + "y": 1424 + }, + { + "x": 1730544270000, + "y": 1409 + }, + { + "x": 1730544300000, + "y": 1405 + }, + { + "x": 1730544330000, + "y": 1322 + }, + { + "x": 1730544360000, + "y": 1326 + }, + { + "x": 1730544390000, + "y": 1416 + }, + { + "x": 1730544420000, + "y": 1391 + }, + { + "x": 1730544450000, + "y": 1320 + }, + { + "x": 1730544480000, + "y": 1384 + }, + { + "x": 1730544510000, + "y": 1457 + }, + { + "x": 1730544540000, + "y": 1415 + }, + { + "x": 1730544570000, + "y": 1386 + }, + { + "x": 1730544600000, + "y": 1337 + }, + { + "x": 1730544630000, + "y": 1315 + }, + { + "x": 1730544660000, + "y": 1259 + }, + { + "x": 1730544690000, + "y": 1403 + }, + { + "x": 1730544720000, + "y": 1352 + }, + { + "x": 1730544750000, + "y": 1396 + }, + { + "x": 1730544780000, + "y": 1325 + }, + { + "x": 1730544810000, + "y": 1365 + }, + { + "x": 1730544840000, + "y": 1387 + }, + { + "x": 1730544870000, + "y": 1414 + }, + { + "x": 1730544900000, + "y": 1210 + }, + { + "x": 1730544930000, + "y": 1344 + }, + { + "x": 1730544960000, + "y": 1444 + }, + { + "x": 1730544990000, + "y": 1264 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "dip", + "significance": "medium", + "change_point": 4, + "p_value": 0.00005345763188668862, + "timestamp": "2024-11-02T10:38:30.000Z" + }, + "shortId": "hhpu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1069, + "pattern": "GET HTTP/1.1 python-requests/2.31.0 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?GET.+?HTTP/1\\.1.+?python-requests/2\\.31\\.0.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:01 +0000] \"GET /api/data HTTP/1.1\" 200 177 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 177 0.003 200 6cf0fcfd1259f68166701125d3c9761c\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:01 +0000] \"GET /api/data HTTP/1.1\" 200 177 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 177 0.003 200 6cf0fcfd1259f68166701125d3c9761c\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:32.391Z", + "lastOccurrence": "2024-11-02T10:56:57.316Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 25 + }, + { + "x": 1730543820000, + "y": 27 + }, + { + "x": 1730543850000, + "y": 26 + }, + { + "x": 1730543880000, + "y": 26 + }, + { + "x": 1730543910000, + "y": 26 + }, + { + "x": 1730543940000, + "y": 27 + }, + { + "x": 1730543970000, + "y": 24 + }, + { + "x": 1730544000000, + "y": 25 + }, + { + "x": 1730544030000, + "y": 22 + }, + { + "x": 1730544060000, + "y": 27 + }, + { + "x": 1730544090000, + "y": 20 + }, + { + "x": 1730544120000, + "y": 23 + }, + { + "x": 1730544150000, + "y": 29 + }, + { + "x": 1730544180000, + "y": 25 + }, + { + "x": 1730544210000, + "y": 23 + }, + { + "x": 1730544240000, + "y": 27 + }, + { + "x": 1730544270000, + "y": 26 + }, + { + "x": 1730544300000, + "y": 27 + }, + { + "x": 1730544330000, + "y": 25 + }, + { + "x": 1730544360000, + "y": 31 + }, + { + "x": 1730544390000, + "y": 21 + }, + { + "x": 1730544420000, + "y": 27 + }, + { + "x": 1730544450000, + "y": 25 + }, + { + "x": 1730544480000, + "y": 29 + }, + { + "x": 1730544510000, + "y": 23 + }, + { + "x": 1730544540000, + "y": 27 + }, + { + "x": 1730544570000, + "y": 24 + }, + { + "x": 1730544600000, + "y": 31 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 24 + }, + { + "x": 1730544690000, + "y": 27 + }, + { + "x": 1730544720000, + "y": 27 + }, + { + "x": 1730544750000, + "y": 27 + }, + { + "x": 1730544780000, + "y": 24 + }, + { + "x": 1730544810000, + "y": 25 + }, + { + "x": 1730544840000, + "y": 26 + }, + { + "x": 1730544870000, + "y": 29 + }, + { + "x": 1730544900000, + "y": 33 + }, + { + "x": 1730544930000, + "y": 34 + }, + { + "x": 1730544960000, + "y": 25 + }, + { + "x": 1730544990000, + "y": 28 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jmoh", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1068, + "pattern": "10.244.0.38 02 2024 10 0000 GET HTTP 1.1 python requests 2.31.0 default my otel demo frontendproxy 8080 10.244.0.26 8080", + "regex": ".*?10\\.244\\.0\\.38.+?02.+?2024.+?10.+?0000.+?GET.+?HTTP.+?1\\.1.+?python.+?requests.+?2\\.31\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\.244\\.0\\.26.+?8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:06 +0000] \"GET /api/cart HTTP/1.1\" 200 24 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 24 0.003 200 33934468a35902ed5b6dde14b1aca6a0\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:06 +0000] \"GET /api/cart HTTP/1.1\" 200 24 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 24 0.003 200 33934468a35902ed5b6dde14b1aca6a0\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:32.391Z", + "lastOccurrence": "2024-11-02T10:56:57.316Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 25 + }, + { + "x": 1730543820000, + "y": 27 + }, + { + "x": 1730543850000, + "y": 26 + }, + { + "x": 1730543880000, + "y": 26 + }, + { + "x": 1730543910000, + "y": 26 + }, + { + "x": 1730543940000, + "y": 27 + }, + { + "x": 1730543970000, + "y": 24 + }, + { + "x": 1730544000000, + "y": 25 + }, + { + "x": 1730544030000, + "y": 22 + }, + { + "x": 1730544060000, + "y": 27 + }, + { + "x": 1730544090000, + "y": 20 + }, + { + "x": 1730544120000, + "y": 23 + }, + { + "x": 1730544150000, + "y": 29 + }, + { + "x": 1730544180000, + "y": 25 + }, + { + "x": 1730544210000, + "y": 23 + }, + { + "x": 1730544240000, + "y": 27 + }, + { + "x": 1730544270000, + "y": 26 + }, + { + "x": 1730544300000, + "y": 27 + }, + { + "x": 1730544330000, + "y": 25 + }, + { + "x": 1730544360000, + "y": 31 + }, + { + "x": 1730544390000, + "y": 21 + }, + { + "x": 1730544420000, + "y": 27 + }, + { + "x": 1730544450000, + "y": 25 + }, + { + "x": 1730544480000, + "y": 29 + }, + { + "x": 1730544510000, + "y": 23 + }, + { + "x": 1730544540000, + "y": 27 + }, + { + "x": 1730544570000, + "y": 24 + }, + { + "x": 1730544600000, + "y": 31 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 24 + }, + { + "x": 1730544690000, + "y": 27 + }, + { + "x": 1730544720000, + "y": 27 + }, + { + "x": 1730544750000, + "y": 27 + }, + { + "x": 1730544780000, + "y": 24 + }, + { + "x": 1730544810000, + "y": 25 + }, + { + "x": 1730544840000, + "y": 26 + }, + { + "x": 1730544870000, + "y": 29 + }, + { + "x": 1730544900000, + "y": 32 + }, + { + "x": 1730544930000, + "y": 34 + }, + { + "x": 1730544960000, + "y": 25 + }, + { + "x": 1730544990000, + "y": 28 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "vzwi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 262, + "pattern": "POST HTTP/1.1 python-requests/2.31.0 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?POST.+?HTTP/1\\.1.+?python-requests/2\\.31\\.0.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:03 +0000] \"POST /api/checkout HTTP/1.1\" 200 984 \"-\" \"python-requests/2.31.0\" 780 0.068 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 984 0.069 200 f87d6397ec8b5740f05f1f3ca07adf50\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:03 +0000] \"POST /api/checkout HTTP/1.1\" 200 984 \"-\" \"python-requests/2.31.0\" 780 0.068 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 984 0.069 200 f87d6397ec8b5740f05f1f3ca07adf50\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.220Z", + "lastOccurrence": "2024-11-02T10:56:45.575Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 5 + }, + { + "x": 1730543820000, + "y": 10 + }, + { + "x": 1730543850000, + "y": 14 + }, + { + "x": 1730543880000, + "y": 3 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 12 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 7 + }, + { + "x": 1730544060000, + "y": 5 + }, + { + "x": 1730544090000, + "y": 5 + }, + { + "x": 1730544120000, + "y": 5 + }, + { + "x": 1730544150000, + "y": 5 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 9 + }, + { + "x": 1730544240000, + "y": 5 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 14 + }, + { + "x": 1730544330000, + "y": 7 + }, + { + "x": 1730544360000, + "y": 10 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 4 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 3 + }, + { + "x": 1730544570000, + "y": 9 + }, + { + "x": 1730544600000, + "y": 5 + }, + { + "x": 1730544630000, + "y": 9 + }, + { + "x": 1730544660000, + "y": 3 + }, + { + "x": 1730544690000, + "y": 14 + }, + { + "x": 1730544720000, + "y": 13 + }, + { + "x": 1730544750000, + "y": 2 + }, + { + "x": 1730544780000, + "y": 5 + }, + { + "x": 1730544810000, + "y": 3 + }, + { + "x": 1730544840000, + "y": 7 + }, + { + "x": 1730544870000, + "y": 5 + }, + { + "x": 1730544900000, + "y": 11 + }, + { + "x": 1730544930000, + "y": 11 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 2 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "vdjm", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 263, + "pattern": "10.244.0.38 02 2024 10 0000 api HTTP 1.1 python requests 2.31.0 default my otel demo frontendproxy 8080 10.244.0.26 8080", + "regex": ".*?10\\.244\\.0\\.38.+?02.+?2024.+?10.+?0000.+?api.+?HTTP.+?1\\.1.+?python.+?requests.+?2\\.31\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\.244\\.0\\.26.+?8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.220Z", + "lastOccurrence": "2024-11-02T10:56:45.575Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 5 + }, + { + "x": 1730543820000, + "y": 10 + }, + { + "x": 1730543850000, + "y": 14 + }, + { + "x": 1730543880000, + "y": 3 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 12 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 7 + }, + { + "x": 1730544060000, + "y": 5 + }, + { + "x": 1730544090000, + "y": 5 + }, + { + "x": 1730544120000, + "y": 5 + }, + { + "x": 1730544150000, + "y": 5 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 9 + }, + { + "x": 1730544240000, + "y": 5 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 14 + }, + { + "x": 1730544330000, + "y": 7 + }, + { + "x": 1730544360000, + "y": 10 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 4 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 3 + }, + { + "x": 1730544570000, + "y": 9 + }, + { + "x": 1730544600000, + "y": 5 + }, + { + "x": 1730544630000, + "y": 9 + }, + { + "x": 1730544660000, + "y": 3 + }, + { + "x": 1730544690000, + "y": 14 + }, + { + "x": 1730544720000, + "y": 13 + }, + { + "x": 1730544750000, + "y": 2 + }, + { + "x": 1730544780000, + "y": 5 + }, + { + "x": 1730544810000, + "y": 3 + }, + { + "x": 1730544840000, + "y": 7 + }, + { + "x": 1730544870000, + "y": 5 + }, + { + "x": 1730544900000, + "y": 12 + }, + { + "x": 1730544930000, + "y": 11 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 2 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "hxyo", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [], + "searches": [ + { + "fragments": [ + "10.244.0.38:8080", + "10.244.0.38", + "8080" + ], + "appearsAs": "This IP address and port are referenced as attributes.source.address, attributes.upstream.address in the investigated entity service.name:controller." + }, + { + "fragments": [ + "http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart", + "/api/cart" + ], + "appearsAs": "These URL fragments appear as attributes.http.request.referrer, attributes.url.original in the investigated entity service.name:controller." + }, + { + "fragments": [ + "/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF", + "sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57", + "currencyCode=CHF" + ], + "appearsAs": "These URL fragments appear as attributes.url.query in the investigated entity service.name:controller." + }, + { + "fragments": [ + "default-my-otel-demo-frontendproxy-8080", + "frontendproxy", + "frontend" + ], + "appearsAs": "These service names appear as attributes.upstream.name in the investigated entity service.name:controller." + }, + { + "fragments": [ + "e6bae77cfb5b598f1d725bc6cc52d06d", + "506" + ], + "appearsAs": " These fragments appear as attributes.http.request.time, attributes.http.response.status_code in the investigated entity service.name:controller." + } + ], + "relatedEntitiesSummaries": [ + "### Related Entities Analysis\n\n#### 1. Entity: service.name:frontend\n- **Indicators (Strength: Average to Strong):**\n - Field: `attributes.upstream.name` in service.name:controller - `default-my-otel-demo-frontendproxy-8080`\n - Field: `kubernetes.pod.name.text` in service.name:frontend - `my-otel-demo-frontend-5bbf4d78bc-qtwdr`\n - URL paths in logs that suggest interactions with the frontend service, specifically `/api/cart`.\n- **Reasoning:** The `frontend` service interacts closely with the `controller` (ingress-nginx-controller). Requests directed to `/api/cart` initiate from `frontendproxy` which routes to `frontend`. Given the high error rate at the `/api/cart` endpoint, the issue might involve downstream calls from `frontend` to dependent services.\n- **Overall Likeliness:** High. The `frontend` service is one of the primary services processing requests (user interactions) that subsequently hit the `controller`.\n\n#### 2. Entity: service.name:cartservice\n- **Indicators (Strength: Strong):**\n - Correspondence of session IDs in the request URLs or logs between controller and cartservice.\n - Mention of cart failures in `frontend` interactions - indicated by error logs involving failed RPC calls to `cartservice`.\n - Logs for service.name:cartservice with messages such as `GetCartAsync called with userId=1d9f48dd`.\n- **Reasoning:** The `/api/cart` endpoint directly involves manipulating or retrieving data from `cartservice`. Given the 500 errors on requests routed via the `controller`, it's likely that `cartservice` might be either causing or being affected by these failures.\n- **Overall Likeliness:** High. Direct involvement of `cartservice` is plausible given the issue is associated specifically with cart interactions.\n\n#### 3. Entity: service.name:frontend-web\n- **Indicators (Strength: Average):**\n - URL referrer and path fragments suggest client-side interactions impact the controller’s ingress.\n - URL fields from both controller logs and frontend-web logs such as `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`.\n- **Reasoning:** `frontend-web` represents the client-side application layer. Issues in the client's request construction or timing might be indirectly impacting how the `controller` handles these requests.\n- **Overall Likeliness:** Moderate. While not directly involved in backend processing, interactions originating from the frontend-web could potentially impact the backend flow.\n\n#### 4. Entity: service.name:checkoutservice\n- **Indicators (Strength: Weak):**\n - Shared URL fragments related to the checkout process (`/api/cart` could be an intermediate step).\n - Common use of `sessionId` and `currencyCode` seen in request attributes.\n- **Reasoning:** Though the controller deals directly with cart interactions, completion of cart actions could trigger subsequent processes in `checkoutservice`. Errors during cart operations could thereby propagate issues if mismanaged upstream.\n- **Overall Likeliness:** Low to Moderate. Contextually relevant but perhaps indirectly linked unless specific error propagation is identified.\n\n#### 5. Entity: service.name:coredns\n- **Indicators (Strength: Weak to Average):**\n - DNS logs showing interactions such as `10.244.0.38:45693 - 8080 \"A IN ingress-nginx-controller.ingress-nginx.svc.cluster.local. UDP 74`.\n- **Reasoning:** DNS resolution issues could lead to inability to locate the correct backend service IP for `controller`, causing errors in routing HTTP requests.\n- **Overall Likeliness:** Low. Potential indirect influence but less likely to be the primary cause.\n\n#### Possible Relationship Summary\nGiven these findings:\n\n1. **frontend** and **cartservice** are the most relevant entities with strong indicators of their dependencies and direct involvement in handling requests relevant to the `/api/cart` endpoint. Investigating these services in conjunction with `controller` will likely reveal root causes.\n2. **frontend-web** could be examined for potential client-side initiation issues leading to increased errors, but it remains secondary.\n3. **checkoutservice** may be potentially linked through a sequence of dependencies initiated post-cart operations but is less immediate.\n4. **coredns** could be impacting resolution, yet this remains an unlikely principal cause unless supported by specific DNS errors.\n\nFocus on examining logs and dependencies from `cartservice` and `frontend` to pinpoint exact causes for the reported 500 errors at the `/api/cart` endpoint managed by the controller service.", + "## Analysis and Identification of Related Entities\n\n### Relationship 1: `frontendproxy` (default-my-otel-demo-frontendproxy-8080)\n- **Related entity:** `service.name:frontendproxy`\n- **Indicator strength:** Strong, based on the matching `attributes.upstream.name` field.\n - **Indications:**\n - From `controller` logs: `attributes.upstream.name` = `default-my-otel-demo-frontendproxy-8080`\n - Upstream IP address: `attributes.upstream.address` = `10.244.0.26:8080` in controller logs coinciding with requests that resulted in `500` errors.\n- **Reason:** The `frontendproxy` service routes traffic to the backend services, including the `controller`. Since the `controller` service receives requests and directs traffic to `frontendproxy`, it points to a possible upstream dependency causing the 500 errors during the requests to `/api/cart`.\n- **Overall likeliness:** High. This entity is directly involved since it routes traffic within the system, and its downstream performance or errors reflect on the `controller`.\n\n### Relationship 2: `frontend`\n- **Related entity:** `service.name:frontend`\n- **Indicator strength:** Moderate, observing the infrastructure context and the dependency chain.\n - **Indications:** \n - The `frontend` service is called by the `frontendproxy`, which is indicated as the `attributes.upstream.name` in `controller` logs.\n- **Reason:** In the context provided, the `frontend` communicates with other backend services to process user interactions and might be an upstream dependency for the `controller`. The problem might cascade from `frontendproxy` to `frontend`.\n- **Overall likeliness:** High. This service is critical in handling user interactions, and problems here could cause issue propagation to `controller`.\n\n### Relationship 3: `cartservice`\n- **Related entity:** `service.name:cartservice`\n- **Indicator strength:** Moderate, based on its role in the architecture and likely correlation.\n - **Indications:**\n - The `cartservice` handles shopping cart actions which map the `controller`'s `/api/cart` endpoint.\n - **Reason:** If the `cartservice` has issues, they would cascade to the `controller` /api/cart endpoint that depends on this service for cart management functions.\n- **Overall likeliness:** High. It closely correlates with the endpoint's functionality and possible 500 errors due to backend service inconsistencies.\n\n### Relationship 4: `currencyservice`\n- **Related entity:** `service.name:currencyservice`\n- **Indicator strength:** Weak, contextual dependency but crucial interplay with `cart` operations.\n - **Indications:**\n - Handling currency conversions, indirectly connected to cart operations.\n- **Reason:** The issues might arise if currency conversion fails during cart operations, contributing to the 500 errors in the `controller`.\n- **Overall likeliness:** Moderate. Essential for part of the request chains during cart operations, but primary errors might occur upstream.\n\n### Relationship 5: `productcatalogservice`\n- **Related entity:** `service.name:productcatalogservice`\n- **Indicator strength:** Weak, as no direct log correlation but relevant in the context.\n - **Indications:**\n - Provides product data, indirectly required during cart operations.\n- **Reason:** Cart operations fetching/validating product data might fail, contributing to the `controller` errors when /api/cart requests involve fetching product data.\n- **Overall likeliness:** Moderate. Possible issues here can impact `cartservice` processes and propagate as 500 errors.\n\n### Relationship 6: `frauddetectionservice`\n- **Related entity:** `service.name:frauddetectionservice`\n- **Indicator strength:** Weak, speculative and based more on potential correlation.\n - **Indications:**\n - No specific logs point directly; contextual relevance due to transaction handling.\n- **Reason:** If fraud detection triggers false flags, it can affect cart transactions.\n- **Overall likeliness:** Low. Unlikely direct contributor and more as a consequential involvement in transaction failures.\n\n### Summary\n\nThe `frontendproxy`, `frontend`, and `cartservice` are the most probable related entities with high relevance to the errors indicated in the `controller` service. Investigations should focus on these first, focusing on log patterns and errors cascading through these entities causing the 500 errors on `/api/cart`. Indirect relations like `currencyservice`, `productcatalogservice`, and `frauddetectionservice` can also be explored as part of the broader context.", + "During the investigation of the alert for the `controller` service, we are identifying possible relationships to other entities. We will meticulously examine the evidence from logs, traces, and other relevant data to uncover dependencies and potential causes for the high number of 500 errors on the `/api/cart` endpoint.\n\n### Highly Relevant Entities and Relationships\n\n#### 1. Entity: service.name:frontendproxy\n\n- **Indicators of Evidence**\n - **Strong Indicator**: `attributes.upstream.name: default-my-otel-demo-frontendproxy-8080`\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"attributes.upstream.name\": \"default-my-otel-demo-frontendproxy-8080\"`\n - `\"attributes.upstream.response.status_code\": [\"500\", \"200\", \"308\"]`\n - **Fingerprinting Evidence**:\n - `\"message\": \"10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \\\"POST /api/cart HTTP/1.1\\\" 500 32 \\\"-\\\" \\\"python-requests/2.31.0\\\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\"`\n\n- **Relationship and Context**\n - The `frontendproxy` service acts as the reverse proxy, routing incoming HTTP traffic to different backend services, including the `controller`. The errors appearing in the `controller` log indicate communication issues between the `controller` and the `frontendproxy`.\n\n- **Overall Likeliness**: High\n - Given the primary role of the `frontendproxy` in routing traffic to `controller`, coupled with the direct evidence in the logs, this entity is crucial to the situation at hand.\n\n#### 2. Entity: service.name:frontend\n\n- **Indicators of Evidence**\n - **Average Indicator**: `features.service.name: frontend`\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"attributes.http.request.origin\": \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\"`\n - `\"/api/cart` endpoint related queries`\n\n- **Relationship and Context**:\n - The `frontend` service handles key operations related to user interactions before requests are passed to various backend services, including the `controller`. Issues in `frontend`, like poor handling of requests or incorrect data forwarding, might cascade downstream, resulting in 500 errors in the `controller`.\n\n- **Overall Likeliness**: Medium to High\n - Given the data pathway from user interactions handled by `frontend`, it’s essential to determine if `frontend` anomalies are contributing to the failure in `controller`.\n\n#### 3. Entity: service.name:cartservice\n\n- **Indicators of Evidence**\n - **Average Indicator**: `/api/cart` requests typically involving `cartservice`\n - **Field Values in Investigated Entity (`controller`)**:\n - `500 errors at \"/api/cart\"`\n\n- **Relationship and Context**:\n - The `cartservice` directly manages shopping cart operations over the gRPC protocol. While the HTTP API from `controller` might indicate communication failures with `cartservice`, these errors could also originate from improper data requests or transaction handling within `cartservice`.\n\n- **Overall Likeliness**: High\n - Investigating the interaction with `cartservice` can help distinguish if business logic errors or miscommunication between services lead to repeated 500 errors.\n\n### Moderately Relevant Entities\n\n#### 4. Entity: service.name:loadgenerator\n\n- **Indicators of Evidence**\n - **Weak Indicator**: Simulated high traffic which might put strain on other services causing errors.\n - **Field Values in Investigated Entity (`controller`)**:\n - General logged high traffic instances or increased load around the alert time.\n\n- **Relationship and Context**:\n - The `loadgenerator` mimics external user traffic managed by the Ingress and ultimately passed through nodes like `controller`. Excessive load can manifest as backend service overloads or timeouts, indirectly causing 500 errors in `controller`.\n\n- **Overall Likeliness**: Medium\n - Given the traffic simulation aspect, its role is probable but direct correlation to specific error occurrences in `controller` needs corroboration with other data points.\n\n### Less Likely but Contextually Relevant Entities\n\n#### 5. Entity: service.name:etcd\n\n- **Indicators of Evidence**\n - **Weak Indicator**: General mention in logs for Ingress control plane activity\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"response count\": 0`, signifying Ingress data handling might be amiss during high traffic\n\n- **Relationship and Context**:\n - Possible indirect impact wherein control plane malfunctions or data inconsistencies in the `etcd` state can propagate as issues dispersed across services interacting via Ingress, including `controller`.\n\n- **Overall Likeliness**: Medium\n - This needs verifying integration with upstream Ingress state logs to validate contributions to observed `controller` issues.\n\n### Further Investigations and Likelihood Summary\n\nBased on logs and identified relationships, we will prioritize looking into:\n\n1. **Frontendproxy** - Checking its health, routing efficiency, and connections to `controller`.\n2. **Frontend** - Verifying its data handling during user interactions that lead to API cart errors.\n3. **Cartservice** - Ensuring logical transactions and gRPC communications, ruling out core service bugs.\n\nWe will cross-reference error logs, API traces, and dependency interactions tightly intertwined with the `controller` to form a comprehensive approach to resolution." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 3 + } + ] + } + } + }, + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_DB13TclS3Gc9zUgCrWLrvDuu", + "function": { + "name": "observe", + "arguments": { + "observe": true + } + } + } + ] + }, + { + "name": "observe", + "response": { + "content": "### Context and Ongoing Investigation\n\nThe initial context indicated that the `controller` service was experiencing an alert due to a high number of `500 Internal Server Errors` on the `/api/cart` endpoint. The error rate was significantly high (98.78%), suggesting a severe issue within the `controller` service or its dependencies. During the initial investigation of the `controller` service, it was determined that the errors resulted from HTTP POST requests to the `/api/cart` endpoint emanating from a client using `python-requests/2.31.0`.\n\n### Key Observations and Analysis from Investigations\n\n#### 1. `controller` Service Characteristics\n\n- **Environment**: The `controller` service runs in a Kubernetes environment (`ingress-nginx` namespace), with the pod named `ingress-nginx-controller-bc57996ff-r6blf` on node `minikube`.\n- **Functionality**: Acts as an Ingress controller managing HTTP requests routing to backend services including the `frontendproxy`.\n- **Observations**:\n - Pattern showing consistent `500 Internal Server Error` for POST requests to `/api/cart`.\n - Most of these errors originate from requests routed through the `frontendproxy`.\n\n#### 2. Related Entities and Their Possible Impact\n\nEntities that closely interact with `controller` or are otherwise significant to `controller’s` functionality were assessed:\n\n1. **`frontend`**:\n - Interacts with the `controller` for routing requests.\n - Downstream dependencies and interactions might propagate errors.\n\n2. **`cartservice`**:\n - Directly involved in handling requests for the `/api/cart` endpoint.\n - Any issues here could cause `controller` errors.\n\n3. **`frontend-web`**:\n - Client-side interactions leading to requests to `/api/cart` via `controller`.\n - Potential timing or construction issues in requests.\n\n4. **Other entities (`checkoutservice`, `coredns`)**:\n - Indirectly related with varying degrees of confidence on impact.\n\n### Timeline of Significant Events\n\n1. **2024-11-02T10:56:00.000Z**:\n - **Observation**:\n - 6 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - Example Log:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n2. **2024-11-02T10:56:30.000Z**:\n - **Observation**:\n - 2 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - Example Log:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:35 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n3. **2024-11-02T10:56:42.498Z**:\n - **Alert Triggered**:\n - High number of 500 errors for the /api/cart endpoint. Error rate is 98.78%, above the threshold of 1% over a 1-minute duration.\n\n### Context and Reasoning\n\nThe `controller` service functions as the main ingress gateway, routing HTTP requests to the appropriate backend services including the `frontendproxy`, and more specifically the downstream `cartservice`. The nature of the high error rates observed at `/api/cart`, particularly for POST requests, suggests the problem could originate either at the `cartservice` backend or along the routing path between `controller`, `frontendproxy`, and `cartservice`.\n\n#### Potential Root Causes\n- **Backend Service Issue**:\n Given the direct involvement of `cartservice` in `/api/cart` operations, issues here could easily propagate into repeated 500 errors seen by the `controller`.\n- **Routing/Network Issues**:\n Communication path issues (e.g., `frontendproxy` to `cartservice`) causing errors in routing HTTP requests could result in these errors.\n\n### Next Steps\n\nGiven the strong patterns:\n\n1. **Investigate `cartservice`**:\n - As the downstream service handling `/api/cart` requests, issues here would directly contribute to the observed error patterns in `controller`.\n\n2. **Investigate `frontendproxy`**:\n - Since requests are routed through this service to `cartservice`, any routing or network issues here might also contribute to the errors.\n\nUpon investigating both entities, performing more detailed inspections of the logs and interactions might reveal issues that propagate these failures. This should help isolate the fault in the communication path or within the specific services causing the `500 Internal Server Errors` seen in `controller`.\n\n**Entities to Investigate Next**:\n- **cartservice**\n- **frontendproxy**\n\nThese investigations will provide further insights into whether the root cause is localized to `cartservice` itself or issues are emergent from the routing path involving `frontendproxy`." + }, + "data": { + "content": "### Context and Ongoing Investigation\n\nThe initial context indicated that the `controller` service was experiencing an alert due to a high number of `500 Internal Server Errors` on the `/api/cart` endpoint. The error rate was significantly high (98.78%), suggesting a severe issue within the `controller` service or its dependencies. During the initial investigation of the `controller` service, it was determined that the errors resulted from HTTP POST requests to the `/api/cart` endpoint emanating from a client using `python-requests/2.31.0`.\n\n### Key Observations and Analysis from Investigations\n\n#### 1. `controller` Service Characteristics\n\n- **Environment**: The `controller` service runs in a Kubernetes environment (`ingress-nginx` namespace), with the pod named `ingress-nginx-controller-bc57996ff-r6blf` on node `minikube`.\n- **Functionality**: Acts as an Ingress controller managing HTTP requests routing to backend services including the `frontendproxy`.\n- **Observations**:\n - Pattern showing consistent `500 Internal Server Error` for POST requests to `/api/cart`.\n - Most of these errors originate from requests routed through the `frontendproxy`.\n\n#### 2. Related Entities and Their Possible Impact\n\nEntities that closely interact with `controller` or are otherwise significant to `controller’s` functionality were assessed:\n\n1. **`frontend`**:\n - Interacts with the `controller` for routing requests.\n - Downstream dependencies and interactions might propagate errors.\n\n2. **`cartservice`**:\n - Directly involved in handling requests for the `/api/cart` endpoint.\n - Any issues here could cause `controller` errors.\n\n3. **`frontend-web`**:\n - Client-side interactions leading to requests to `/api/cart` via `controller`.\n - Potential timing or construction issues in requests.\n\n4. **Other entities (`checkoutservice`, `coredns`)**:\n - Indirectly related with varying degrees of confidence on impact.\n\n### Timeline of Significant Events\n\n1. **2024-11-02T10:56:00.000Z**:\n - **Observation**:\n - 6 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - Example Log:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n2. **2024-11-02T10:56:30.000Z**:\n - **Observation**:\n - 2 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - Example Log:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:35 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n3. **2024-11-02T10:56:42.498Z**:\n - **Alert Triggered**:\n - High number of 500 errors for the /api/cart endpoint. Error rate is 98.78%, above the threshold of 1% over a 1-minute duration.\n\n### Context and Reasoning\n\nThe `controller` service functions as the main ingress gateway, routing HTTP requests to the appropriate backend services including the `frontendproxy`, and more specifically the downstream `cartservice`. The nature of the high error rates observed at `/api/cart`, particularly for POST requests, suggests the problem could originate either at the `cartservice` backend or along the routing path between `controller`, `frontendproxy`, and `cartservice`.\n\n#### Potential Root Causes\n- **Backend Service Issue**:\n Given the direct involvement of `cartservice` in `/api/cart` operations, issues here could easily propagate into repeated 500 errors seen by the `controller`.\n- **Routing/Network Issues**:\n Communication path issues (e.g., `frontendproxy` to `cartservice`) causing errors in routing HTTP requests could result in these errors.\n\n### Next Steps\n\nGiven the strong patterns:\n\n1. **Investigate `cartservice`**:\n - As the downstream service handling `/api/cart` requests, issues here would directly contribute to the observed error patterns in `controller`.\n\n2. **Investigate `frontendproxy`**:\n - Since requests are routed through this service to `cartservice`, any routing or network issues here might also contribute to the errors.\n\nUpon investigating both entities, performing more detailed inspections of the logs and interactions might reveal issues that propagate these failures. This should help isolate the fault in the communication path or within the specific services causing the `500 Internal Server Errors` seen in `controller`.\n\n**Entities to Investigate Next**:\n- **cartservice**\n- **frontendproxy**\n\nThese investigations will provide further insights into whether the root cause is localized to `cartservice` itself or issues are emergent from the routing path involving `frontendproxy`.", + "investigations": [ + { + "attachments": { + "alerts": [ + { + "kibana.alert.reason": "500 Errors is 98.78049, above the threshold of 1. (duration: 1 min, data view: otel_logs_data (Automated by Demo CLI), group: controller,/api/cart)", + "kibana.alert.evaluation.values": [ + 98.78048780487805 + ], + "kibana.alert.evaluation.threshold": [ + 1 + ], + "kibana.alert.group": [ + { + "field": "service.name", + "value": "controller" + }, + { + "field": "url.path", + "value": "/api/cart" + } + ], + "tags": [ + "demo", + "cli-created" + ], + "service.name": "controller", + "kibana.alert.rule.category": "Custom threshold", + "kibana.alert.rule.consumer": "logs", + "kibana.alert.rule.execution.uuid": "4187827d-686e-4098-98e3-21e13495798e", + "kibana.alert.rule.name": "NGINX 500s", + "kibana.alert.rule.parameters": { + "criteria": [ + { + "comparator": ">", + "metrics": [ + { + "name": "A", + "filter": "http.response.status_code:*", + "aggType": "count" + }, + { + "name": "B", + "filter": "http.response.status_code>=500", + "aggType": "count" + } + ], + "threshold": [ + 1 + ], + "timeSize": 1, + "timeUnit": "m", + "equation": "(B/A) * 100", + "label": "500 Errors" + } + ], + "alertOnNoData": false, + "alertOnGroupDisappear": false, + "searchConfiguration": { + "query": { + "query": "k8s.namespace.name: \"ingress-nginx\" AND url.path: /api/*", + "language": "kuery" + }, + "index": "otel_logs_data" + }, + "groupBy": [ + "service.name", + "url.path" + ] + }, + "kibana.alert.rule.producer": "observability", + "kibana.alert.rule.revision": 0, + "kibana.alert.rule.rule_type_id": "observability.rules.custom_threshold", + "kibana.alert.rule.tags": [ + "demo", + "cli-created" + ], + "kibana.alert.rule.uuid": "9055220c-8fb1-4f9f-be7c-0a33eb2bafc5", + "kibana.space_ids": [ + "default" + ], + "@timestamp": "2024-11-02T10:56:42.498Z", + "event.action": "open", + "event.kind": "signal", + "kibana.alert.rule.execution.timestamp": "2024-11-02T10:56:42.498Z", + "kibana.alert.action_group": "custom_threshold.fired", + "kibana.alert.flapping": false, + "kibana.alert.flapping_history": [ + true + ], + "kibana.alert.instance.id": "controller,/api/cart", + "kibana.alert.maintenance_window_ids": [], + "kibana.alert.consecutive_matches": 1, + "kibana.alert.status": "active", + "kibana.alert.uuid": "b97a1b20-d14b-4835-b948-bd7a423284eb", + "kibana.alert.severity_improving": false, + "kibana.alert.workflow_status": "open", + "kibana.alert.duration.us": 0, + "kibana.alert.start": "2024-11-02T10:56:42.498Z", + "kibana.alert.time_range": { + "gte": "2024-11-02T10:56:42.498Z" + }, + "kibana.version": "9.0.0" + }, + { + "kibana.alert.reason": "500 Errors is 100, above the threshold of 1. (duration: 1 min, data view: otel_logs_data (Automated by Demo CLI), group: controller,/api/checkout)", + "kibana.alert.evaluation.values": [ + 100 + ], + "kibana.alert.evaluation.threshold": [ + 1 + ], + "kibana.alert.group": [ + { + "field": "service.name", + "value": "controller" + }, + { + "field": "url.path", + "value": "/api/checkout" + } + ], + "tags": [ + "demo", + "cli-created" + ], + "service.name": "controller", + "kibana.alert.rule.category": "Custom threshold", + "kibana.alert.rule.consumer": "logs", + "kibana.alert.rule.execution.uuid": "4187827d-686e-4098-98e3-21e13495798e", + "kibana.alert.rule.name": "NGINX 500s", + "kibana.alert.rule.parameters": { + "criteria": [ + { + "comparator": ">", + "metrics": [ + { + "name": "A", + "filter": "http.response.status_code:*", + "aggType": "count" + }, + { + "name": "B", + "filter": "http.response.status_code>=500", + "aggType": "count" + } + ], + "threshold": [ + 1 + ], + "timeSize": 1, + "timeUnit": "m", + "equation": "(B/A) * 100", + "label": "500 Errors" + } + ], + "alertOnNoData": false, + "alertOnGroupDisappear": false, + "searchConfiguration": { + "query": { + "query": "k8s.namespace.name: \"ingress-nginx\" AND url.path: /api/*", + "language": "kuery" + }, + "index": "otel_logs_data" + }, + "groupBy": [ + "service.name", + "url.path" + ] + }, + "kibana.alert.rule.producer": "observability", + "kibana.alert.rule.revision": 0, + "kibana.alert.rule.rule_type_id": "observability.rules.custom_threshold", + "kibana.alert.rule.tags": [ + "demo", + "cli-created" + ], + "kibana.alert.rule.uuid": "9055220c-8fb1-4f9f-be7c-0a33eb2bafc5", + "kibana.space_ids": [ + "default" + ], + "@timestamp": "2024-11-02T10:56:42.498Z", + "event.action": "open", + "event.kind": "signal", + "kibana.alert.rule.execution.timestamp": "2024-11-02T10:56:42.498Z", + "kibana.alert.action_group": "custom_threshold.fired", + "kibana.alert.flapping": false, + "kibana.alert.flapping_history": [ + true + ], + "kibana.alert.instance.id": "controller,/api/checkout", + "kibana.alert.maintenance_window_ids": [], + "kibana.alert.consecutive_matches": 1, + "kibana.alert.status": "active", + "kibana.alert.uuid": "78472a9c-63a2-41ba-9812-47cebd48d833", + "kibana.alert.severity_improving": false, + "kibana.alert.workflow_status": "open", + "kibana.alert.duration.us": 0, + "kibana.alert.start": "2024-11-02T10:56:42.498Z", + "kibana.alert.time_range": { + "gte": "2024-11-02T10:56:42.498Z" + }, + "kibana.version": "9.0.0" + } + ], + "slos": [], + "analysis": { + "total": 56181, + "sampled": 1000, + "fields": [ + "@timestamp:date - 994 distinct values", + "app.label.component:keyword - 1 distinct values (`controller`)", + "attributes.event.name.text:text - 1 distinct values (`nginx.ingress.controller.error`)", + "attributes.event.name:keyword - 1 distinct values (`nginx.ingress.controller.error`)", + "attributes.http.request.id:keyword - 1000 distinct values (`87c973a800b3bd34d3679f0a12263c40`, `ee99fada68450d77be61deaae1f04008`, `80312e7e98e9171c8010e442784e7a04`, `a931c4f7d964f91f0a0750f3221dfa50`, `8c2e99672e225b279a7741671c976c93`, `eed4ff70db044f81adeda15bc1f3ad4c`, `dd730f83b894fc2adedfaf950cf5884c`, `4e35653560a824d6693d1e024906e9ce`, `36d6dfae2fe5d6fecbfaadd4fac5bde1`, `43c774bea806c21478f7ee57a449222b`, 990 more values)", + "attributes.http.request.method:keyword - 2 distinct values (`POST`, `GET`)", + "attributes.http.request.referrer:keyword - 2 distinct values (`http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/`, `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`)", + "attributes.http.request.size:long - 61 distinct values (`351`, `374`, `373`, `809`, `383`, `506`, `342`, `350`, `538`, `479`, 51 more values)", + "attributes.http.request.time:float - 90 distinct values (`3.172`, `1.103`, `0.053`, `4.434`, `0.019`, `3.328`, `1.01`, `3.197`, `3.133`, `2.566`, 80 more values)", + "attributes.http.response.body.size:long - 130 distinct values (`1144`, `1120`, `1024`, `19`, `1336`, `1198`, `1070`, `111157`, `111163`, `1271`, 120 more values)", + "attributes.http.response.status_code:long - 4 distinct values (`499`, `500`, `308`, `200`)", + "attributes.http.version:keyword - 1 distinct values (`1.1`)", + "attributes.log.file.path.text:text - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "attributes.log.file.path:keyword - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "attributes.log.iostream:keyword - 1 distinct values (`stdout`)", + "attributes.network.protocol.name.text:text - 1 distinct values (`http`)", + "attributes.network.protocol.name:keyword - 1 distinct values (`http`)", + "attributes.source.address:keyword - 1 distinct values (`10.244.0.38`)", + "attributes.upstream.address:keyword - 1 distinct values (`10.244.0.26:8080`)", + "attributes.upstream.name.text:text - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "attributes.upstream.name:keyword - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "attributes.upstream.response.size:keyword - 117 distinct values (`1149`, `1120`, `1029`, `19`, `1336`, `1203`, `1070`, `1276`, `1356`, `1105`, 107 more values)", + "attributes.upstream.response.status_code:keyword - 3 distinct values (`500`, `308`, `200`)", + "attributes.upstream.response.time:keyword - 89 distinct values (`3.171`, `1.007`, `1.103`, `0.053`, `4.434`, `3.328`, `1.010`, `3.197`, `3.132`, `2.565`, 79 more values)", + "attributes.url.domain:keyword - 2 distinct values (`icons`, ``)", + "attributes.url.extension:keyword - 7 distinct values (`css`, `Service/ResolveAll`, `Service/EventStream`, `png`, `svg`, `jpg`, `js`)", + "attributes.url.original.text:text - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, 165 more values)", + "attributes.url.original:keyword - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `/api/recommendations?productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `/api/cart?sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `/api/recommendations?productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `/api/data/`, `/api/cart?sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `/api/recommendations?productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `/api/recommendations?productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `/api/cart?sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, 157 more values)", + "attributes.url.path:keyword - 43 distinct values (`/api/products/0PUK6V6EV0`, `/api/products/9SIQT8TOJO`, `/api/products/LS4PSXUNUM`, `/api/data/`, `/api/products/L9ECAV7KIM`, `/api/data`, `/api/products/66VCHSJNUP`, `/api/products/2ZYFJ3GM2N`, `/cart`, `/images/products/NationalParkFoundationExplorascope.jpg`, 33 more values)", + "attributes.url.query:keyword - 126 distinct values (`productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, `productIds=&sessionId=0c122e28-e8fb-4ab5-9889-ac797b5f17cf¤cyCode=USD`, 116 more values)", + "attributes.url.scheme:keyword - 1 distinct values (``)", + "attributes.user_agent.name.text:text - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "attributes.user_agent.name:keyword - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "attributes.user_agent.original.text:text - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "attributes.user_agent.original:keyword - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "attributes.user_agent.version:keyword - 2 distinct values (`2.31`, `120.0.6099`)", + "body.text:text - 1000 distinct values (`10.244.0.38 - - [02/Nov/2024:10:37:56 +0000] \"GET /api/currency? HTTP/1.1\" 200 199 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 598 0.007 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 199 0.007 200 87c973a800b3bd34d3679f0a12263c40\n`, `10.244.0.38 - - [02/Nov/2024:10:46:11 +0000] \"GET /_next/static/chunks/pages/_app-c923ae378a182a07.js HTTP/1.1\" 200 111150 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 484 0.017 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 111008 0.018 200 ee99fada68450d77be61deaae1f04008\n`, 998 more values)", + "data_stream.dataset:keyword - 1 distinct values (`nginx_ingress_controller.error.otel`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`logs`)", + "deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "dropped_attributes_count:long - 1 distinct values (`0`)", + "event.dataset:keyword - 1 distinct values (`nginx_ingress_controller.error.otel`)", + "event.name:keyword - 1 distinct values (`nginx.ingress.controller.error`)", + "host.arch:keyword - 1 distinct values (`arm64`)", + "host.architecture:keyword - 1 distinct values (`arm64`)", + "host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "host.cpu.family:keyword - 1 distinct values (``)", + "host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "host.cpu.model.name:keyword - 1 distinct values (``)", + "host.cpu.stepping:keyword - 1 distinct values (`0`)", + "host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "host.os.full:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "host.os.platform:keyword - 1 distinct values (`linux`)", + "http.request.id:keyword - 1000 distinct values (`87c973a800b3bd34d3679f0a12263c40`, `ee99fada68450d77be61deaae1f04008`, `80312e7e98e9171c8010e442784e7a04`, `a931c4f7d964f91f0a0750f3221dfa50`, `8c2e99672e225b279a7741671c976c93`, `eed4ff70db044f81adeda15bc1f3ad4c`, `dd730f83b894fc2adedfaf950cf5884c`, `4e35653560a824d6693d1e024906e9ce`, `36d6dfae2fe5d6fecbfaadd4fac5bde1`, `43c774bea806c21478f7ee57a449222b`, 990 more values)", + "http.request.method:keyword - 2 distinct values (`POST`, `GET`)", + "http.request.referrer:keyword - 2 distinct values (`http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/`, `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`)", + "http.request.size:long - 61 distinct values (`351`, `374`, `373`, `809`, `383`, `506`, `342`, `350`, `538`, `479`, 51 more values)", + "http.request.time:float - 90 distinct values (`3.172`, `1.103`, `0.053`, `4.434`, `0.019`, `3.328`, `1.01`, `3.197`, `3.133`, `2.566`, 80 more values)", + "http.response.body.size:long - 130 distinct values (`1144`, `1120`, `1024`, `19`, `1336`, `1198`, `1070`, `111157`, `111163`, `1271`, 120 more values)", + "http.response.status_code:long - 4 distinct values (`499`, `500`, `308`, `200`)", + "http.version:keyword - 1 distinct values (`1.1`)", + "k8s.container.name:keyword - 1 distinct values (`controller`)", + "k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "k8s.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "k8s.namespace.name:keyword - 1 distinct values (`ingress-nginx`)", + "k8s.node.name:keyword - 1 distinct values (`minikube`)", + "k8s.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T08:56:56Z`)", + "k8s.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "kubernetes.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "kubernetes.namespace:keyword - 1 distinct values (`ingress-nginx`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "log.file.path:keyword - 1 distinct values (`/var/log/pods/ingress-nginx_ingress-nginx-controller-bc57996ff-r6blf_35200065-5cde-4a7e-9018-0f0a4e2c5bb4/controller/0.log`)", + "log.iostream:keyword - 1 distinct values (`stdout`)", + "message:text - 1000 distinct values (`10.244.0.38 - - [02/Nov/2024:10:37:56 +0000] \"GET /api/currency? HTTP/1.1\" 200 199 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 598 0.007 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 199 0.007 200 87c973a800b3bd34d3679f0a12263c40\n`, `10.244.0.38 - - [02/Nov/2024:10:46:11 +0000] \"GET /_next/static/chunks/pages/_app-c923ae378a182a07.js HTTP/1.1\" 200 111150 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 484 0.017 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 111008 0.018 200 ee99fada68450d77be61deaae1f04008\n`, 998 more values)", + "network.protocol.name:keyword - 1 distinct values (`http`)", + "observed_timestamp:date_nanos - 1000 distinct values (`2024-11-02T10:37:56.158731715Z`, `2024-11-02T10:46:11.961228541Z`, `2024-11-02T10:38:02.758802635Z`, `2024-11-02T10:54:51.563521629Z`, `2024-11-02T10:37:05.149442469Z`, `2024-11-02T10:49:01.961126259Z`, `2024-11-02T10:56:19.96308167Z`, `2024-11-02T10:42:06.760264887Z`, `2024-11-02T10:49:45.76210446Z`, `2024-11-02T10:54:02.562840801Z`, 990 more values)", + "os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.app.label.component:keyword - 1 distinct values (`controller`)", + "resource.attributes.deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "resource.attributes.host.arch:keyword - 1 distinct values (`arm64`)", + "resource.attributes.host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "resource.attributes.host.cpu.family:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "resource.attributes.host.cpu.model.name.text:text - 1 distinct values (``)", + "resource.attributes.host.cpu.model.name:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.stepping:keyword - 1 distinct values (`0`)", + "resource.attributes.host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "resource.attributes.host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "resource.attributes.host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "resource.attributes.host.name:keyword - 1 distinct values (`otel-daemonset-opentelemetry-collector-agent-7jlpk`)", + "resource.attributes.k8s.container.name.text:text - 1 distinct values (`controller`)", + "resource.attributes.k8s.container.name:keyword - 1 distinct values (`controller`)", + "resource.attributes.k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "resource.attributes.k8s.deployment.name:keyword - 1 distinct values (`ingress-nginx-controller`)", + "resource.attributes.k8s.namespace.name:keyword - 1 distinct values (`ingress-nginx`)", + "resource.attributes.k8s.node.name:keyword - 1 distinct values (`minikube`)", + "resource.attributes.k8s.pod.name:keyword - 1 distinct values (`ingress-nginx-controller-bc57996ff-r6blf`)", + "resource.attributes.k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T08:56:56Z`)", + "resource.attributes.k8s.pod.uid:keyword - 1 distinct values (`35200065-5cde-4a7e-9018-0f0a4e2c5bb4`)", + "resource.attributes.os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "resource.attributes.os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.service.name.text:text - 1 distinct values (`controller`)", + "resource.attributes.service.name:keyword - 1 distinct values (`controller`)", + "resource.dropped_attributes_count:long - 1 distinct values (`0`)", + "resource.schema_url:keyword - 1 distinct values (`https://opentelemetry.io/schemas/1.6.1`)", + "scope.dropped_attributes_count:long - 1 distinct values (`0`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.name:keyword - 1 distinct values (`controller`)", + "severity_number:byte - 1 distinct values (`0`)", + "source.address:keyword - 1 distinct values (`10.244.0.38`)", + "upstream.address:keyword - 1 distinct values (`10.244.0.26:8080`)", + "upstream.name:keyword - 1 distinct values (`default-my-otel-demo-frontendproxy-8080`)", + "upstream.response.size:keyword - 117 distinct values (`1149`, `1120`, `1029`, `19`, `1336`, `1203`, `1070`, `1276`, `1356`, `1105`, 107 more values)", + "upstream.response.status_code:keyword - 3 distinct values (`500`, `308`, `200`)", + "upstream.response.time:keyword - 89 distinct values (`3.171`, `1.007`, `1.103`, `0.053`, `4.434`, `3.328`, `1.010`, `3.197`, `3.132`, `2.565`, 79 more values)", + "url.domain:keyword - 2 distinct values (`icons`, ``)", + "url.extension:keyword - 7 distinct values (`css`, `Service/ResolveAll`, `Service/EventStream`, `png`, `svg`, `jpg`, `js`)", + "url.original:keyword - 167 distinct values (`/api/recommendations?productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `/api/recommendations?productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `/api/cart?sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `/api/recommendations?productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `/api/data/`, `/api/cart?sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `/api/recommendations?productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `/api/recommendations?productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `/api/cart?sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, 157 more values)", + "url.path:keyword - 43 distinct values (`/api/products/0PUK6V6EV0`, `/api/products/9SIQT8TOJO`, `/api/products/LS4PSXUNUM`, `/api/data/`, `/api/products/L9ECAV7KIM`, `/api/data`, `/api/products/66VCHSJNUP`, `/api/products/2ZYFJ3GM2N`, `/cart`, `/images/products/NationalParkFoundationExplorascope.jpg`, 33 more values)", + "url.query:keyword - 126 distinct values (`productIds=&sessionId=cc71f7a4-0303-4404-8864-820628e21ca0¤cyCode=USD`, `sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF`, `productIds=&sessionId=389794a9-8298-4921-a2fd-523803ed73c3¤cyCode=`, `sessionId=140befa2-624a-4e98-a1c2-fa9d9ccbfbdd¤cyCode=`, `productIds=&sessionId=4cbd2d72-6719-4619-b0a8-09415f9ea500¤cyCode=USD`, `sessionId=b9f19d90-1824-463c-b9f6-95c894a8fc64¤cyCode=`, `productIds=&sessionId=933d52cf-c574-49e7-8432-86f958764dbe¤cyCode=`, `productIds=&sessionId=213624a6-bf31-46c4-aa67-8e2c102a8763¤cyCode=USD`, `sessionId=221a8b46-29b7-4d66-9536-62e1a0d3ad75¤cyCode=CHF`, `productIds=&sessionId=0c122e28-e8fb-4ab5-9889-ac797b5f17cf¤cyCode=USD`, 116 more values)", + "url.scheme:keyword - 1 distinct values (``)", + "user_agent.name:keyword - 2 distinct values (`Python Requests`, `HeadlessChrome`)", + "user_agent.original:keyword - 2 distinct values (`python-requests/2.31.0`, `Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36`)", + "user_agent.version:keyword - 2 distinct values (`2.31`, `120.0.6099`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 54850, + "pattern": "HTTP/1.1 Mozilla/5.0 X11 Linux aarch64 AppleWebKit/537.36 KHTML like Gecko HeadlessChrome/120.0.6099.28 Safari/537.36 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?HTTP/1\\.1.+?Mozilla/5\\.0.+?X11.+?Linux.+?aarch64.+?AppleWebKit/537\\.36.+?KHTML.+?like.+?Gecko.+?HeadlessChrome/120\\.0\\.6099\\.28.+?Safari/537\\.36.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:54:54 +0000] \"GET /icons/Chevron.svg HTTP/1.1\" 200 805 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 508 0.001 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 810 0.001 200 8693bfefd0668ca27fff0e929785a2c5\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:54:54 +0000] \"GET /icons/Chevron.svg HTTP/1.1\" 200 805 \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/\" \"Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36\" 508 0.001 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 810 0.001 200 8693bfefd0668ca27fff0e929785a2c5\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.057Z", + "lastOccurrence": "2024-11-02T10:56:59.420Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1385 + }, + { + "x": 1730543820000, + "y": 1172 + }, + { + "x": 1730543850000, + "y": 1520 + }, + { + "x": 1730543880000, + "y": 1312 + }, + { + "x": 1730543910000, + "y": 986 + }, + { + "x": 1730543940000, + "y": 1030 + }, + { + "x": 1730543970000, + "y": 1248 + }, + { + "x": 1730544000000, + "y": 1395 + }, + { + "x": 1730544030000, + "y": 1371 + }, + { + "x": 1730544060000, + "y": 1289 + }, + { + "x": 1730544090000, + "y": 1467 + }, + { + "x": 1730544120000, + "y": 1223 + }, + { + "x": 1730544150000, + "y": 1429 + }, + { + "x": 1730544180000, + "y": 1329 + }, + { + "x": 1730544210000, + "y": 1224 + }, + { + "x": 1730544240000, + "y": 1424 + }, + { + "x": 1730544270000, + "y": 1409 + }, + { + "x": 1730544300000, + "y": 1405 + }, + { + "x": 1730544330000, + "y": 1322 + }, + { + "x": 1730544360000, + "y": 1326 + }, + { + "x": 1730544390000, + "y": 1416 + }, + { + "x": 1730544420000, + "y": 1391 + }, + { + "x": 1730544450000, + "y": 1320 + }, + { + "x": 1730544480000, + "y": 1384 + }, + { + "x": 1730544510000, + "y": 1457 + }, + { + "x": 1730544540000, + "y": 1415 + }, + { + "x": 1730544570000, + "y": 1386 + }, + { + "x": 1730544600000, + "y": 1337 + }, + { + "x": 1730544630000, + "y": 1315 + }, + { + "x": 1730544660000, + "y": 1259 + }, + { + "x": 1730544690000, + "y": 1403 + }, + { + "x": 1730544720000, + "y": 1352 + }, + { + "x": 1730544750000, + "y": 1396 + }, + { + "x": 1730544780000, + "y": 1325 + }, + { + "x": 1730544810000, + "y": 1365 + }, + { + "x": 1730544840000, + "y": 1387 + }, + { + "x": 1730544870000, + "y": 1414 + }, + { + "x": 1730544900000, + "y": 1210 + }, + { + "x": 1730544930000, + "y": 1344 + }, + { + "x": 1730544960000, + "y": 1444 + }, + { + "x": 1730544990000, + "y": 1264 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "dip", + "significance": "medium", + "change_point": 4, + "p_value": 0.00005345763188668862, + "timestamp": "2024-11-02T10:38:30.000Z" + }, + "shortId": "hhpu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1069, + "pattern": "GET HTTP/1.1 python-requests/2.31.0 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?GET.+?HTTP/1\\.1.+?python-requests/2\\.31\\.0.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:01 +0000] \"GET /api/data HTTP/1.1\" 200 177 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 177 0.003 200 6cf0fcfd1259f68166701125d3c9761c\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:01 +0000] \"GET /api/data HTTP/1.1\" 200 177 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 177 0.003 200 6cf0fcfd1259f68166701125d3c9761c\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:32.391Z", + "lastOccurrence": "2024-11-02T10:56:57.316Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 25 + }, + { + "x": 1730543820000, + "y": 27 + }, + { + "x": 1730543850000, + "y": 26 + }, + { + "x": 1730543880000, + "y": 26 + }, + { + "x": 1730543910000, + "y": 26 + }, + { + "x": 1730543940000, + "y": 27 + }, + { + "x": 1730543970000, + "y": 24 + }, + { + "x": 1730544000000, + "y": 25 + }, + { + "x": 1730544030000, + "y": 22 + }, + { + "x": 1730544060000, + "y": 27 + }, + { + "x": 1730544090000, + "y": 20 + }, + { + "x": 1730544120000, + "y": 23 + }, + { + "x": 1730544150000, + "y": 29 + }, + { + "x": 1730544180000, + "y": 25 + }, + { + "x": 1730544210000, + "y": 23 + }, + { + "x": 1730544240000, + "y": 27 + }, + { + "x": 1730544270000, + "y": 26 + }, + { + "x": 1730544300000, + "y": 27 + }, + { + "x": 1730544330000, + "y": 25 + }, + { + "x": 1730544360000, + "y": 31 + }, + { + "x": 1730544390000, + "y": 21 + }, + { + "x": 1730544420000, + "y": 27 + }, + { + "x": 1730544450000, + "y": 25 + }, + { + "x": 1730544480000, + "y": 29 + }, + { + "x": 1730544510000, + "y": 23 + }, + { + "x": 1730544540000, + "y": 27 + }, + { + "x": 1730544570000, + "y": 24 + }, + { + "x": 1730544600000, + "y": 31 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 24 + }, + { + "x": 1730544690000, + "y": 27 + }, + { + "x": 1730544720000, + "y": 27 + }, + { + "x": 1730544750000, + "y": 27 + }, + { + "x": 1730544780000, + "y": 24 + }, + { + "x": 1730544810000, + "y": 25 + }, + { + "x": 1730544840000, + "y": 26 + }, + { + "x": 1730544870000, + "y": 29 + }, + { + "x": 1730544900000, + "y": 33 + }, + { + "x": 1730544930000, + "y": 34 + }, + { + "x": 1730544960000, + "y": 25 + }, + { + "x": 1730544990000, + "y": 28 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jmoh", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1068, + "pattern": "10.244.0.38 02 2024 10 0000 GET HTTP 1.1 python requests 2.31.0 default my otel demo frontendproxy 8080 10.244.0.26 8080", + "regex": ".*?10\\.244\\.0\\.38.+?02.+?2024.+?10.+?0000.+?GET.+?HTTP.+?1\\.1.+?python.+?requests.+?2\\.31\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\.244\\.0\\.26.+?8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:06 +0000] \"GET /api/cart HTTP/1.1\" 200 24 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 24 0.003 200 33934468a35902ed5b6dde14b1aca6a0\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:06 +0000] \"GET /api/cart HTTP/1.1\" 200 24 \"-\" \"python-requests/2.31.0\" 350 0.003 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 24 0.003 200 33934468a35902ed5b6dde14b1aca6a0\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:32.391Z", + "lastOccurrence": "2024-11-02T10:56:57.316Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 25 + }, + { + "x": 1730543820000, + "y": 27 + }, + { + "x": 1730543850000, + "y": 26 + }, + { + "x": 1730543880000, + "y": 26 + }, + { + "x": 1730543910000, + "y": 26 + }, + { + "x": 1730543940000, + "y": 27 + }, + { + "x": 1730543970000, + "y": 24 + }, + { + "x": 1730544000000, + "y": 25 + }, + { + "x": 1730544030000, + "y": 22 + }, + { + "x": 1730544060000, + "y": 27 + }, + { + "x": 1730544090000, + "y": 20 + }, + { + "x": 1730544120000, + "y": 23 + }, + { + "x": 1730544150000, + "y": 29 + }, + { + "x": 1730544180000, + "y": 25 + }, + { + "x": 1730544210000, + "y": 23 + }, + { + "x": 1730544240000, + "y": 27 + }, + { + "x": 1730544270000, + "y": 26 + }, + { + "x": 1730544300000, + "y": 27 + }, + { + "x": 1730544330000, + "y": 25 + }, + { + "x": 1730544360000, + "y": 31 + }, + { + "x": 1730544390000, + "y": 21 + }, + { + "x": 1730544420000, + "y": 27 + }, + { + "x": 1730544450000, + "y": 25 + }, + { + "x": 1730544480000, + "y": 29 + }, + { + "x": 1730544510000, + "y": 23 + }, + { + "x": 1730544540000, + "y": 27 + }, + { + "x": 1730544570000, + "y": 24 + }, + { + "x": 1730544600000, + "y": 31 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 24 + }, + { + "x": 1730544690000, + "y": 27 + }, + { + "x": 1730544720000, + "y": 27 + }, + { + "x": 1730544750000, + "y": 27 + }, + { + "x": 1730544780000, + "y": 24 + }, + { + "x": 1730544810000, + "y": 25 + }, + { + "x": 1730544840000, + "y": 26 + }, + { + "x": 1730544870000, + "y": 29 + }, + { + "x": 1730544900000, + "y": 32 + }, + { + "x": 1730544930000, + "y": 34 + }, + { + "x": 1730544960000, + "y": 25 + }, + { + "x": 1730544990000, + "y": 28 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "vzwi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 262, + "pattern": "POST HTTP/1.1 python-requests/2.31.0 default-my-otel-demo-frontendproxy-8080", + "regex": ".*?POST.+?HTTP/1\\.1.+?python-requests/2\\.31\\.0.+?default-my-otel-demo-frontendproxy-8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:55:03 +0000] \"POST /api/checkout HTTP/1.1\" 200 984 \"-\" \"python-requests/2.31.0\" 780 0.068 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 984 0.069 200 f87d6397ec8b5740f05f1f3ca07adf50\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:55:03 +0000] \"POST /api/checkout HTTP/1.1\" 200 984 \"-\" \"python-requests/2.31.0\" 780 0.068 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 984 0.069 200 f87d6397ec8b5740f05f1f3ca07adf50\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.220Z", + "lastOccurrence": "2024-11-02T10:56:45.575Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 5 + }, + { + "x": 1730543820000, + "y": 10 + }, + { + "x": 1730543850000, + "y": 14 + }, + { + "x": 1730543880000, + "y": 3 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 12 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 7 + }, + { + "x": 1730544060000, + "y": 5 + }, + { + "x": 1730544090000, + "y": 5 + }, + { + "x": 1730544120000, + "y": 5 + }, + { + "x": 1730544150000, + "y": 5 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 9 + }, + { + "x": 1730544240000, + "y": 5 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 14 + }, + { + "x": 1730544330000, + "y": 7 + }, + { + "x": 1730544360000, + "y": 10 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 4 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 3 + }, + { + "x": 1730544570000, + "y": 9 + }, + { + "x": 1730544600000, + "y": 5 + }, + { + "x": 1730544630000, + "y": 9 + }, + { + "x": 1730544660000, + "y": 3 + }, + { + "x": 1730544690000, + "y": 14 + }, + { + "x": 1730544720000, + "y": 13 + }, + { + "x": 1730544750000, + "y": 2 + }, + { + "x": 1730544780000, + "y": 5 + }, + { + "x": 1730544810000, + "y": 3 + }, + { + "x": 1730544840000, + "y": 7 + }, + { + "x": 1730544870000, + "y": 5 + }, + { + "x": 1730544900000, + "y": 11 + }, + { + "x": 1730544930000, + "y": 11 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 2 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "vdjm", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 263, + "pattern": "10.244.0.38 02 2024 10 0000 api HTTP 1.1 python requests 2.31.0 default my otel demo frontendproxy 8080 10.244.0.26 8080", + "regex": ".*?10\\.244\\.0\\.38.+?02.+?2024.+?10.+?0000.+?api.+?HTTP.+?1\\.1.+?python.+?requests.+?2\\.31\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\.244\\.0\\.26.+?8080.*?", + "sample": "10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n", + "highlight": { + "service.name": [ + "controller" + ], + "resource.attributes.service.name": [ + "controller" + ] + }, + "metadata": { + "message": [ + "10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.220Z", + "lastOccurrence": "2024-11-02T10:56:45.575Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 5 + }, + { + "x": 1730543820000, + "y": 10 + }, + { + "x": 1730543850000, + "y": 14 + }, + { + "x": 1730543880000, + "y": 3 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 12 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 7 + }, + { + "x": 1730544060000, + "y": 5 + }, + { + "x": 1730544090000, + "y": 5 + }, + { + "x": 1730544120000, + "y": 5 + }, + { + "x": 1730544150000, + "y": 5 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 9 + }, + { + "x": 1730544240000, + "y": 5 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 14 + }, + { + "x": 1730544330000, + "y": 7 + }, + { + "x": 1730544360000, + "y": 10 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 4 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 3 + }, + { + "x": 1730544570000, + "y": 9 + }, + { + "x": 1730544600000, + "y": 5 + }, + { + "x": 1730544630000, + "y": 9 + }, + { + "x": 1730544660000, + "y": 3 + }, + { + "x": 1730544690000, + "y": 14 + }, + { + "x": 1730544720000, + "y": 13 + }, + { + "x": 1730544750000, + "y": 2 + }, + { + "x": 1730544780000, + "y": 5 + }, + { + "x": 1730544810000, + "y": 3 + }, + { + "x": 1730544840000, + "y": 7 + }, + { + "x": 1730544870000, + "y": 5 + }, + { + "x": 1730544900000, + "y": 12 + }, + { + "x": 1730544930000, + "y": 11 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 2 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "hxyo", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [], + "searches": [ + { + "fragments": [ + "10.244.0.38:8080", + "10.244.0.38", + "8080" + ], + "appearsAs": "This IP address and port are referenced as attributes.source.address, attributes.upstream.address in the investigated entity service.name:controller." + }, + { + "fragments": [ + "http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart", + "/api/cart" + ], + "appearsAs": "These URL fragments appear as attributes.http.request.referrer, attributes.url.original in the investigated entity service.name:controller." + }, + { + "fragments": [ + "/api/cart?sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57¤cyCode=CHF", + "sessionId=1d9f48dd-3313-4f9f-b83b-ff87635eca57", + "currencyCode=CHF" + ], + "appearsAs": "These URL fragments appear as attributes.url.query in the investigated entity service.name:controller." + }, + { + "fragments": [ + "default-my-otel-demo-frontendproxy-8080", + "frontendproxy", + "frontend" + ], + "appearsAs": "These service names appear as attributes.upstream.name in the investigated entity service.name:controller." + }, + { + "fragments": [ + "e6bae77cfb5b598f1d725bc6cc52d06d", + "506" + ], + "appearsAs": " These fragments appear as attributes.http.request.time, attributes.http.response.status_code in the investigated entity service.name:controller." + } + ], + "relatedEntitiesSummaries": [ + "### Related Entities Analysis\n\n#### 1. Entity: service.name:frontend\n- **Indicators (Strength: Average to Strong):**\n - Field: `attributes.upstream.name` in service.name:controller - `default-my-otel-demo-frontendproxy-8080`\n - Field: `kubernetes.pod.name.text` in service.name:frontend - `my-otel-demo-frontend-5bbf4d78bc-qtwdr`\n - URL paths in logs that suggest interactions with the frontend service, specifically `/api/cart`.\n- **Reasoning:** The `frontend` service interacts closely with the `controller` (ingress-nginx-controller). Requests directed to `/api/cart` initiate from `frontendproxy` which routes to `frontend`. Given the high error rate at the `/api/cart` endpoint, the issue might involve downstream calls from `frontend` to dependent services.\n- **Overall Likeliness:** High. The `frontend` service is one of the primary services processing requests (user interactions) that subsequently hit the `controller`.\n\n#### 2. Entity: service.name:cartservice\n- **Indicators (Strength: Strong):**\n - Correspondence of session IDs in the request URLs or logs between controller and cartservice.\n - Mention of cart failures in `frontend` interactions - indicated by error logs involving failed RPC calls to `cartservice`.\n - Logs for service.name:cartservice with messages such as `GetCartAsync called with userId=1d9f48dd`.\n- **Reasoning:** The `/api/cart` endpoint directly involves manipulating or retrieving data from `cartservice`. Given the 500 errors on requests routed via the `controller`, it's likely that `cartservice` might be either causing or being affected by these failures.\n- **Overall Likeliness:** High. Direct involvement of `cartservice` is plausible given the issue is associated specifically with cart interactions.\n\n#### 3. Entity: service.name:frontend-web\n- **Indicators (Strength: Average):**\n - URL referrer and path fragments suggest client-side interactions impact the controller’s ingress.\n - URL fields from both controller logs and frontend-web logs such as `http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart`.\n- **Reasoning:** `frontend-web` represents the client-side application layer. Issues in the client's request construction or timing might be indirectly impacting how the `controller` handles these requests.\n- **Overall Likeliness:** Moderate. While not directly involved in backend processing, interactions originating from the frontend-web could potentially impact the backend flow.\n\n#### 4. Entity: service.name:checkoutservice\n- **Indicators (Strength: Weak):**\n - Shared URL fragments related to the checkout process (`/api/cart` could be an intermediate step).\n - Common use of `sessionId` and `currencyCode` seen in request attributes.\n- **Reasoning:** Though the controller deals directly with cart interactions, completion of cart actions could trigger subsequent processes in `checkoutservice`. Errors during cart operations could thereby propagate issues if mismanaged upstream.\n- **Overall Likeliness:** Low to Moderate. Contextually relevant but perhaps indirectly linked unless specific error propagation is identified.\n\n#### 5. Entity: service.name:coredns\n- **Indicators (Strength: Weak to Average):**\n - DNS logs showing interactions such as `10.244.0.38:45693 - 8080 \"A IN ingress-nginx-controller.ingress-nginx.svc.cluster.local. UDP 74`.\n- **Reasoning:** DNS resolution issues could lead to inability to locate the correct backend service IP for `controller`, causing errors in routing HTTP requests.\n- **Overall Likeliness:** Low. Potential indirect influence but less likely to be the primary cause.\n\n#### Possible Relationship Summary\nGiven these findings:\n\n1. **frontend** and **cartservice** are the most relevant entities with strong indicators of their dependencies and direct involvement in handling requests relevant to the `/api/cart` endpoint. Investigating these services in conjunction with `controller` will likely reveal root causes.\n2. **frontend-web** could be examined for potential client-side initiation issues leading to increased errors, but it remains secondary.\n3. **checkoutservice** may be potentially linked through a sequence of dependencies initiated post-cart operations but is less immediate.\n4. **coredns** could be impacting resolution, yet this remains an unlikely principal cause unless supported by specific DNS errors.\n\nFocus on examining logs and dependencies from `cartservice` and `frontend` to pinpoint exact causes for the reported 500 errors at the `/api/cart` endpoint managed by the controller service.", + "## Analysis and Identification of Related Entities\n\n### Relationship 1: `frontendproxy` (default-my-otel-demo-frontendproxy-8080)\n- **Related entity:** `service.name:frontendproxy`\n- **Indicator strength:** Strong, based on the matching `attributes.upstream.name` field.\n - **Indications:**\n - From `controller` logs: `attributes.upstream.name` = `default-my-otel-demo-frontendproxy-8080`\n - Upstream IP address: `attributes.upstream.address` = `10.244.0.26:8080` in controller logs coinciding with requests that resulted in `500` errors.\n- **Reason:** The `frontendproxy` service routes traffic to the backend services, including the `controller`. Since the `controller` service receives requests and directs traffic to `frontendproxy`, it points to a possible upstream dependency causing the 500 errors during the requests to `/api/cart`.\n- **Overall likeliness:** High. This entity is directly involved since it routes traffic within the system, and its downstream performance or errors reflect on the `controller`.\n\n### Relationship 2: `frontend`\n- **Related entity:** `service.name:frontend`\n- **Indicator strength:** Moderate, observing the infrastructure context and the dependency chain.\n - **Indications:** \n - The `frontend` service is called by the `frontendproxy`, which is indicated as the `attributes.upstream.name` in `controller` logs.\n- **Reason:** In the context provided, the `frontend` communicates with other backend services to process user interactions and might be an upstream dependency for the `controller`. The problem might cascade from `frontendproxy` to `frontend`.\n- **Overall likeliness:** High. This service is critical in handling user interactions, and problems here could cause issue propagation to `controller`.\n\n### Relationship 3: `cartservice`\n- **Related entity:** `service.name:cartservice`\n- **Indicator strength:** Moderate, based on its role in the architecture and likely correlation.\n - **Indications:**\n - The `cartservice` handles shopping cart actions which map the `controller`'s `/api/cart` endpoint.\n - **Reason:** If the `cartservice` has issues, they would cascade to the `controller` /api/cart endpoint that depends on this service for cart management functions.\n- **Overall likeliness:** High. It closely correlates with the endpoint's functionality and possible 500 errors due to backend service inconsistencies.\n\n### Relationship 4: `currencyservice`\n- **Related entity:** `service.name:currencyservice`\n- **Indicator strength:** Weak, contextual dependency but crucial interplay with `cart` operations.\n - **Indications:**\n - Handling currency conversions, indirectly connected to cart operations.\n- **Reason:** The issues might arise if currency conversion fails during cart operations, contributing to the 500 errors in the `controller`.\n- **Overall likeliness:** Moderate. Essential for part of the request chains during cart operations, but primary errors might occur upstream.\n\n### Relationship 5: `productcatalogservice`\n- **Related entity:** `service.name:productcatalogservice`\n- **Indicator strength:** Weak, as no direct log correlation but relevant in the context.\n - **Indications:**\n - Provides product data, indirectly required during cart operations.\n- **Reason:** Cart operations fetching/validating product data might fail, contributing to the `controller` errors when /api/cart requests involve fetching product data.\n- **Overall likeliness:** Moderate. Possible issues here can impact `cartservice` processes and propagate as 500 errors.\n\n### Relationship 6: `frauddetectionservice`\n- **Related entity:** `service.name:frauddetectionservice`\n- **Indicator strength:** Weak, speculative and based more on potential correlation.\n - **Indications:**\n - No specific logs point directly; contextual relevance due to transaction handling.\n- **Reason:** If fraud detection triggers false flags, it can affect cart transactions.\n- **Overall likeliness:** Low. Unlikely direct contributor and more as a consequential involvement in transaction failures.\n\n### Summary\n\nThe `frontendproxy`, `frontend`, and `cartservice` are the most probable related entities with high relevance to the errors indicated in the `controller` service. Investigations should focus on these first, focusing on log patterns and errors cascading through these entities causing the 500 errors on `/api/cart`. Indirect relations like `currencyservice`, `productcatalogservice`, and `frauddetectionservice` can also be explored as part of the broader context.", + "During the investigation of the alert for the `controller` service, we are identifying possible relationships to other entities. We will meticulously examine the evidence from logs, traces, and other relevant data to uncover dependencies and potential causes for the high number of 500 errors on the `/api/cart` endpoint.\n\n### Highly Relevant Entities and Relationships\n\n#### 1. Entity: service.name:frontendproxy\n\n- **Indicators of Evidence**\n - **Strong Indicator**: `attributes.upstream.name: default-my-otel-demo-frontendproxy-8080`\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"attributes.upstream.name\": \"default-my-otel-demo-frontendproxy-8080\"`\n - `\"attributes.upstream.response.status_code\": [\"500\", \"200\", \"308\"]`\n - **Fingerprinting Evidence**:\n - `\"message\": \"10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \\\"POST /api/cart HTTP/1.1\\\" 500 32 \\\"-\\\" \\\"python-requests/2.31.0\\\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\"`\n\n- **Relationship and Context**\n - The `frontendproxy` service acts as the reverse proxy, routing incoming HTTP traffic to different backend services, including the `controller`. The errors appearing in the `controller` log indicate communication issues between the `controller` and the `frontendproxy`.\n\n- **Overall Likeliness**: High\n - Given the primary role of the `frontendproxy` in routing traffic to `controller`, coupled with the direct evidence in the logs, this entity is crucial to the situation at hand.\n\n#### 2. Entity: service.name:frontend\n\n- **Indicators of Evidence**\n - **Average Indicator**: `features.service.name: frontend`\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"attributes.http.request.origin\": \"http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart\"`\n - `\"/api/cart` endpoint related queries`\n\n- **Relationship and Context**:\n - The `frontend` service handles key operations related to user interactions before requests are passed to various backend services, including the `controller`. Issues in `frontend`, like poor handling of requests or incorrect data forwarding, might cascade downstream, resulting in 500 errors in the `controller`.\n\n- **Overall Likeliness**: Medium to High\n - Given the data pathway from user interactions handled by `frontend`, it’s essential to determine if `frontend` anomalies are contributing to the failure in `controller`.\n\n#### 3. Entity: service.name:cartservice\n\n- **Indicators of Evidence**\n - **Average Indicator**: `/api/cart` requests typically involving `cartservice`\n - **Field Values in Investigated Entity (`controller`)**:\n - `500 errors at \"/api/cart\"`\n\n- **Relationship and Context**:\n - The `cartservice` directly manages shopping cart operations over the gRPC protocol. While the HTTP API from `controller` might indicate communication failures with `cartservice`, these errors could also originate from improper data requests or transaction handling within `cartservice`.\n\n- **Overall Likeliness**: High\n - Investigating the interaction with `cartservice` can help distinguish if business logic errors or miscommunication between services lead to repeated 500 errors.\n\n### Moderately Relevant Entities\n\n#### 4. Entity: service.name:loadgenerator\n\n- **Indicators of Evidence**\n - **Weak Indicator**: Simulated high traffic which might put strain on other services causing errors.\n - **Field Values in Investigated Entity (`controller`)**:\n - General logged high traffic instances or increased load around the alert time.\n\n- **Relationship and Context**:\n - The `loadgenerator` mimics external user traffic managed by the Ingress and ultimately passed through nodes like `controller`. Excessive load can manifest as backend service overloads or timeouts, indirectly causing 500 errors in `controller`.\n\n- **Overall Likeliness**: Medium\n - Given the traffic simulation aspect, its role is probable but direct correlation to specific error occurrences in `controller` needs corroboration with other data points.\n\n### Less Likely but Contextually Relevant Entities\n\n#### 5. Entity: service.name:etcd\n\n- **Indicators of Evidence**\n - **Weak Indicator**: General mention in logs for Ingress control plane activity\n - **Field Values in Investigated Entity (`controller`)**:\n - `\"response count\": 0`, signifying Ingress data handling might be amiss during high traffic\n\n- **Relationship and Context**:\n - Possible indirect impact wherein control plane malfunctions or data inconsistencies in the `etcd` state can propagate as issues dispersed across services interacting via Ingress, including `controller`.\n\n- **Overall Likeliness**: Medium\n - This needs verifying integration with upstream Ingress state logs to validate contributions to observed `controller` issues.\n\n### Further Investigations and Likelihood Summary\n\nBased on logs and identified relationships, we will prioritize looking into:\n\n1. **Frontendproxy** - Checking its health, routing efficiency, and connections to `controller`.\n2. **Frontend** - Verifying its data handling during user interactions that lead to API cart errors.\n3. **Cartservice** - Ensuring logical transactions and gRPC communications, ruling out core service bugs.\n\nWe will cross-reference error logs, API traces, and dependency interactions tightly intertwined with the `controller` to form a comprehensive approach to resolution." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 3 + } + ] + }, + "entity": { + "service.name": "controller" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "frontend" + }, + "reason": "The frontend service interacts closely with the controller (ingress-nginx-controller). Requests directed to /api/cart initiate from frontendproxy which routes to frontend. Given the high error rate at the /api/cart endpoint, the issue might involve downstream calls from frontend to dependent services.", + "confidence": "high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The /api/cart endpoint directly involves manipulating or retrieving data from cartservice. Given the 500 errors on requests routed via the controller, it's likely that cartservice might be either causing or being affected by these failures.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend-web" + }, + "reason": "URL referrer and path fragments suggest client-side interactions impacting the controller's ingress. Issues in the client's request construction or timing might be indirectly impacting how the controller handles these requests.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "Though the controller deals directly with cart interactions, completion of cart actions could trigger subsequent processes in checkoutservice. Errors during cart operations could thereby propagate issues if mismanaged upstream.", + "confidence": "low" + }, + { + "entity": { + "service.name": "coredns" + }, + "reason": "DNS resolution issues could lead to inability to locate the correct backend service IP for controller, causing errors in routing HTTP requests.", + "confidence": "low" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service is called by the `frontendproxy`, which is indicated as the `attributes.upstream.name` in `controller` logs. In the context provided, the `frontend` communicates with other backend services to process user interactions and might be an upstream dependency for the `controller`. The problem might cascade from `frontendproxy` to `frontend`.", + "confidence": "high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The `cartservice` handles shopping cart actions which map the `controller`'s `/api/cart` endpoint. If the `cartservice` has issues, they would cascade to the `controller` /api/cart endpoint that depends on this service for cart management functions.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "Handling currency conversions, indirectly connected to cart operations. The issues might arise if currency conversion fails during cart operations, contributing to the 500 errors in the `controller`.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "frauddetectionservice" + }, + "reason": "No specific logs point directly; contextual relevance due to transaction handling. If fraud detection triggers false flags, it can affect cart transactions.", + "confidence": "low" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "The `cartservice` directly manages shopping cart operations over the gRPC protocol, and errors here could lead to repeated 500 errors in the `controller`.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service handles user interactions that are passed to the `controller`. Issues in `frontend` could lead to failures in `controller`, resulting in 500 errors.", + "confidence": "medium to high" + }, + { + "entity": { + "service.name": "loadgenerator" + }, + "reason": "The `loadgenerator` simulates high user traffic which might strain other services and indirectly cause 500 errors in the `controller`.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "etcd" + }, + "reason": "As a control plane activity tracker for Ingress, indirect impacts from `etcd` inconsistencies can propagate as issues in services interacting via Ingress, including `controller`.", + "confidence": "medium" + } + ], + "summary": "### Characteristics of the `controller` Entity\n\n**1. Infrastructure & Environment:**\n\nThe `controller` service is deployed within a Kubernetes environment, specifically in the `ingress-nginx` namespace. The service is associated with a `controller` container running within the `ingress-nginx-controller-bc57996ff-r6blf` pod. This pod is scheduled on a node identified as `minikube`, and the service is part of the `ingress-nginx-controller` deployment. Additionally, the infrastructure is based on an ARM64 architecture, running Ubuntu 24.04.1 LTS, with deployment environment set to `opentelemetry-demo`.\n\n**2. Communication Characteristics:**\n\nThe `controller` service functions as an Ingress controller, managing incoming traffic and routing it to backend services. It processes HTTP requests, typically version 1.1, and communicates with other services via HTTP protocols. Key endpoints involved in the communication include:\n- Inbound requests are received at endpoints like `/api/cart` and `/api/checkout`.\n- Outbound requests are routed primarily to the `frontendproxy` service, evidenced by the upstream address `10.244.0.26:8080`.\n\nThe service handles both `GET` and `POST` requests, and its communication paths include requests from user agents like \"HeadlessChrome\" and \"python-requests\".\n\n**3. Context of Entity in Investigation:**\n\n**Reason for Investigation:**\nThe `controller` service is currently under investigation due to an alert triggered by a high number of 500 errors (98.78% error rate) occurring at the `/api/cart` endpoint. The significant spike in error rates is concerning and suggests a potential issue within the service or its interaction with dependencies.\n\n**Relation to Other Entities:**\nThe high error rates on the `controller` are likely impacting the overall user experience of the application, notably through the `/api/cart` functionality, which interacts with the `cartservice`. As the `controller` service forwards requests to the `frontendproxy`, which in turn interacts with various backend services like `cartservice`, `checkoutservice`, `currencyservice`, and others, the ripple effect of these errors might be felt across multiple services in the architecture. This interconnectedness necessitates thorough investigation of `controller's` logs and error patterns to isolate the root cause and mitigate the disruption in the flow of HTTP requests.\n\nBy understanding the behavior and interaction of the `controller` service within the broader microservices architecture, we aim to identify and resolve the underlying issue causing the increase in 500 errors. This involves examining logs for patterns, determining if recent changes or specific conditions triggered the fault, and analyzing dependencies that could contribute to the error rates observed. This step is integral to restoring normal operation and maintaining service reliability and user satisfaction.\n\n### Analysis of Log Patterns for service.name:controller\n\n#### Pattern Group: Request Errors\n- **Pattern:**\n ```\n .*?10\\\\.244\\\\.0\\\\.38.+?02.+?2024.+?10.+?0000.+?api.+?HTTP.+?1\\\\.1.+?python.+?requests.+?2\\\\.31\\\\.0.+?default.+?my.+?otel.+?demo.+?frontendproxy.+?8080.+?10\\\\.244\\\\.0\\\\.26.+?8080.*?\n ```\n - **Sample:**\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n - **Description:** This pattern represents request logs where an HTTP POST request to the `/api/cart` endpoint resulted in a `500 Internal Server Error`. The requests are from a client using `python-requests/2.31.0`.\n\n- **Count:** 263 occurrences\n\n- **Timeseries Analysis:**\n - **Trend:** The number of occurrences show fluctuation with no significant change in the pattern.\n - **Log Counts:** \n - Example timestamps and counts:\n - `2024-11-02T10:56:00.000Z: 6`\n - `2024-11-02T10:56:30.000Z: 2`\n - `2024-11-02T10:57:00.000Z: 0`\n\n### Summary\nBased on the provided logs patterns, the `controller` service is experiencing a high number of `500 Internal Server Errors` when handling POST requests to the `/api/cart` endpoint. This is evident from the consistent error logs observed at different timestamps. The alert threshold breach likely correlates with these error occurrences.\n\nNo other unusual patterns such as connection issues, startup messages, or garbage collection messages were identified in the logs around the error events. Further investigation on the backend or potential dependency issues (e.g., `cartservice`) might be needed to determine the exact cause of these errors.\n\n### Timeline of significant events\n\n- **2024-11-02T10:56:42.498Z**\n - **Alert Triggered**: High number of 500 errors for the /api/cart endpoint. Error rate is 98.78%, above the threshold of 1% over a 1-minute duration.\n \n- **2024-11-02T10:56:00.000Z**\n - **Log Entry**: 6 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - **Example Log**:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:08 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n \n- **2024-11-02T10:56:30.000Z**\n - **Log Entry**: 2 occurrences of HTTP POST requests to the `/api/cart` endpoint resulting in `500 Internal Server Error`.\n - **Example Log**:\n ```\n 10.244.0.38 - - [02/Nov/2024:10:56:35 +0000] \"POST /api/cart HTTP/1.1\" 500 32 \"-\" \"python-requests/2.31.0\" 506 0.002 [default-my-otel-demo-frontendproxy-8080] [] 10.244.0.26:8080 32 0.002 500 e6bae77cfb5b598f1d725bc6cc52d06d\n ```\n\n### Context and reasoning\n\nThe `controller` service plays a critical role in handling incoming HTTP requests within the Kubernetes environment, specifically acting as an Ingress controller within the `ingress-nginx` namespace. The recent alert, triggered by a high number of 500 errors, indicates a significant disruption in the normal operations of the `controller` service, particularly impacting the `/api/cart` endpoint. The log entries leading up to and following the alert highlight a pattern of consistent `500 Internal Server Error` responses, specifically around the timestamp in question.\n\nThe pattern group identified, related to request errors, shows repeated failures for HTTP POST requests from a client using `python-requests/2.31.0`, likely directed towards the `cartservice` via the `frontendproxy`. This high error rate suggests an underlying issue either within the `controller` service itself, its communication path to the `frontendproxy`, or potentially further downstream with the backend services like `cartservice`.\n\nNo other unusual log patterns, such as connection issues or any system degradation messages were noted, indicating the problem is likely concentrated around the interaction between `controller` and its immediate dependencies rather than a broader system-wide issue. By correlating the timestamps and repeated error logs, a clear link can be drawn between the `500 Internal Server Error` responses and the timeline, further supporting the hypothesis of a specific fault affecting the `/api/cart` endpoint.\n\nFurther investigation into the dependencies of the `controller` service, especially focusing on the interactions with the `frontendproxy` and subsequently `cartservice`, is warranted to isolate and rectify the root cause of these error patterns." + } + ] + }, + "role": "tool", + "toolCallId": "call_DB13TclS3Gc9zUgCrWLrvDuu" + }, + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_kstyq8qXoaWRX4CJQkC6wbhR", + "function": { + "name": "investigateEntity", + "arguments": { + "context": "As the downstream service handling /api/cart requests, issues here would directly contribute to error patterns in controller. High errors were observed in the controller for POST requests to the /api/cart endpoint resulting in '500 Internal Server Error'. Starting error rate was significantly high (98.78%), around 2024-11-02T10:56:42.498Z.", + "entity": { + "field": "service.name", + "value": "cartservice" + } + } + } + }, + { + "toolCallId": "call_zNoUwjdb2DJhxjhshLkgqlZw", + "function": { + "name": "investigateEntity", + "arguments": { + "context": "Requests are routed through this service to cartservice. Any routing or network issues here might also contribute to the 500 errors seen in controller. High error rate (98.78%) in controller for POST requests to the /api/cart endpoint starting at 2024-11-02T10:56:42.498Z.", + "entity": { + "field": "service.name", + "value": "frontend" + } + } + } + } + ] + }, + { + "name": "investigateEntity", + "role": "tool", + "toolCallId": "call_zNoUwjdb2DJhxjhshLkgqlZw", + "response": { + "entity": { + "service.name": "frontend" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "controller" + }, + "reason": "`controller` service handles endpoints like `/api/cart`, which corresponds to failed transactions. They share common IP address and port patterns, indicating network interaction.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "`frontend` service attempts to interact with `cartservice`. Connection errors from `frontend` indicate it is calling `cartservice` and facing issues getting responses, potentially causing the 500 errors.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "frontend-web" + }, + "reason": "`frontend-web` captures real user interactions with the `/api/cart` endpoint. Helps trace the errors and delays externally perceived by users.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "coredns" + }, + "reason": "`coredns` is responsible for DNS resolution within the cluster and issues here could result in connectivity problems seen in `frontend`.", + "confidence": "moderate" + } + ], + "summary": "### Investigation of Entity `service.name:frontend`\n\n#### Context of Investigation\n\nThe `frontend` service is critical in this investigation due to its role in the overall request handling and the observed high error rate (98.78%) in the `controller` service for POST requests to the `/api/cart` endpoint. This issue began at `2024-11-02T10:56:42.498Z`. The `frontend` service routes requests to `cartservice`, a key interaction in this error occurrence.\n\n#### Infrastructure & Environment\n\n- **Environment**: The `frontend` operates within the `opentelemetry-demo` deployment environment.\n- **Deployment**: It is deployed on Kubernetes (k8s), specifically within the `default` namespace. \n- **Pod Information**: The pod name is `my-otel-demo-frontend-5bbf4d78bc-qtwdr`, on the `minikube` node. Each pod has a unique UID (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`), with logs stored at `/var/log/pods/`.\n- **Resource Attributes**: Hosts `Apple` Arm64 architecture processor (`host.cpu.vendor.id: Apple`), running `Ubuntu 24.04.1 LTS (Noble Numbat)`, indicating a Linux-based system.\n\n#### Communication Characteristics \n\n- **Protocols**: \n - The `frontend` service communicates primarily using HTTP for inbound requests and gRPC for backend service interactions. \n - gRPC is specifically used when the `frontend` communicates with `cartservice`, highlighted by recorded RPC errors in the logs.\n- **Endpoints**:\n - The `frontend` gets requests from `frontendproxy` (Envoy), acting as the reverse proxy.\n - The service then routes these requests to different backend services, including `cartservice`, via gRPC.\n - The critical IP noted in logs is `10.110.191.164:8080`, pointing to a potential endpoint for the `cartservice`.\n\n#### Context of Entity in Investigation\n\n- **Reason for Investigation**: The `frontend` service is under scrutiny due to its pivotal role in processing and routing incoming user requests. Any disruption in its communication or processing mechanism directly affects the functionality of connected services, notably `cartservice`, leading to high error rates observed in the `controller`.\n- **Relation to Other Entities**:\n - **Previous Entity**: `controller` - The `controller` service logs indicate a high number of 500 errors were triggered during POST requests to `/api/cart`.\n - **Dependencies**: `frontend` relies on successful communication with `cartservice` to fulfill these cart-related requests.\n- **Architectural Role**: The `frontend` is the nucleus for user interactions within this microservices architecture. Any network or communication issue in `frontend` can cascade, influencing its backend dependencies like `cartservice` and leading to broader service disruptions.\n\nThis comprehensive analysis of the `frontend` service’s infrastructure, communication protocols, and its investigative context emphasizes its critical position in the service chain responsible for the reported errors. Further scrutiny of the communication logs, particularly focusing on the gRPC interactions with `cartservice`, is essential to identify and mitigate the root cause of these failures.\n\nObservations for service `frontend`.\n\n### Connection Issues to Upstream Dependency\n\n1. **Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Sample**: \"14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)\"\n - **Timeseries Observations**: \n - No occurrences before 2024-11-02T10:55:30.000Z.\n - Sharp increase at 2024-11-02T10:55:30.000Z (291 occurrences).\n - Further increase at 2024-11-02T10:56:00.000Z (381 occurrences).\n - Next timestamp at 2024-11-02T10:57:00.000Z showing a return to 0 occurrences.\n\n2. **Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"\n - **Sample**: \"13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"\n - **Timeseries Observations**:\n - No occurrences before 2024-11-02T10:55:30.000Z.\n - Slight increase at 2024-11-02T10:55:30.000Z (2 occurrences).\n - One more occurrence at 2024-11-02T10:56:00.000Z (1 occurrence).\n - No occurrences after 2024-11-02T10:56:30.000Z.\n\nThe significant spike in connection errors starting from 2024-11-02T10:55:30.000Z aligns with the high error rate in the `controller` service, suggesting a strong correlation between these established connection issues in the `frontend` service and the 500 errors seen in the `controller` service for POST requests to the `/api/cart` endpoint around the same time.\n\n### Possibly Relevant Log Patterns from coredns\n\n1. **Pattern**: \"INFO 10.244.0.26 AAAA IN my-otel-demo-frontend udp 39 false 512 NXDOMAIN qr,rd,ra 39\"\n - **Sample**: \"[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\"\n - **Timeseries Observations**:\n - Frequent steady occurrences of 6 events every 30 seconds.\n - No notable changes or spikes coinciding with the connection issues observed in the `frontend` service.\n\nFrom these observations, it appears that the connection refusal errors in the `frontend` service are likely caused by issues in communication with the `cartservice`, potentially as a result of intermittent network disruptions or dependency service failures at the IP 10.110.191.164. Further investigation should focus on the health and connectivity status of the `cartservice` around the specified time period.\n\n### Timeline of Significant Events\n\n1. **2024-11-02T10:55:30.000Z**\n - **Event**: Sharp increase in connection error logs.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 291 occurrences reported.\n\n2. **2024-11-02T10:55:30.000Z**\n - **Event**: Appearance of an internal error in accessing cart services.\n - **Log Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error\"\n - **Details**: 2 occurrences reported.\n\n3. **2024-11-02T10:56:00.000Z**\n - **Event**: Further increase in connection error logs.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 381 occurrences reported.\n\n4. **2024-11-02T10:56:00.000Z**\n - **Event**: Additional occurrence of an internal error in accessing cart services.\n - **Log Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error\"\n - **Details**: 1 occurrence reported.\n\n5. **2024-11-02T10:56:42.498Z**\n - **Event**: High error rate (98.78%) in the `controller` service for POST requests to the `/api/cart` endpoint begins.\n - **Details**: Corresponds to the timing of observed spikes in connection errors in the `frontend` service.\n\n6. **2024-11-02T10:57:00.000Z**\n - **Event**: Connection error logs return to zero occurrences.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 0 occurrences reported.\n\n### Context and Reasoning\n\nThe `frontend` service is critical in this investigation due to its role in routing requests to the `cartservice`. The high error rates observed in the `controller` service's POST requests to the `/api/cart` endpoint started precisely at 2024-11-02T10:56:42.498Z, which is within the timeframe when the `frontend` service experienced significant connection issues with the `cartservice`.\n\nUpon analyzing log patterns, it is evident that the communication disruptions between the `frontend` service and the `cartservice` began at 2024-11-02T10:55:30.000Z with a sharp increase in connection errors, and these errors persisted until approximately 2024-11-02T10:57:00.000Z. During this period, numerous `ECONNREFUSED` errors were logged, indicating the `frontend` service's inability to establish a connection to the `cartservice` (IP 10.110.191.164:8080). This aligns with the timeline of the high error rates in the `controller` service, suggesting a direct correlation.\n\nFurthermore, the internal errors logged by the `frontend` service related to RPC errors and failed attempts to get the user cart during checkout reaffirm these connection problems. These errors underscore an impaired communication pathway to the `cartservice`, affecting the `controller` service's ability to process POST requests to the `/api/cart` endpoint.\n\nTherefore, the evidence strongly indicates that network or connection failures between the `frontend` service and its upstream dependency, `cartservice`, are the principal cause of the cascading 500 errors seen in the `controller` service. This highlights the critical position of the `frontend` service in ensuring stable communication pathways within the microservice architecture." + }, + "data": { + "attachments": { + "alerts": [], + "slos": [], + "analysis": { + "total": 98017, + "sampled": 1000, + "fields": [ + "@timestamp:date - 327 distinct values", + "app.label.component:keyword - 1 distinct values (`frontend`)", + "attributes.log.file.path.text:text - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "attributes.log.file.path:keyword - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "attributes.log.iostream:keyword - 1 distinct values (`stderr`)", + "body.text:text - 36 distinct values (` code: 13,\n`, ` details: 'cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"',\n`, 34 more values)", + "data_stream.dataset:keyword - 1 distinct values (`generic.otel`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`logs`)", + "deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "dropped_attributes_count:long - 1 distinct values (`0`)", + "event.dataset:keyword - 1 distinct values (`generic.otel`)", + "host.arch:keyword - 1 distinct values (`arm64`)", + "host.architecture:keyword - 1 distinct values (`arm64`)", + "host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "host.cpu.family:keyword - 1 distinct values (``)", + "host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "host.cpu.model.name:keyword - 1 distinct values (``)", + "host.cpu.stepping:keyword - 1 distinct values (`0`)", + "host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "host.os.full:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "host.os.platform:keyword - 1 distinct values (`linux`)", + "k8s.container.name:keyword - 1 distinct values (`frontend`)", + "k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "k8s.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "k8s.namespace.name:keyword - 1 distinct values (`default`)", + "k8s.node.name:keyword - 1 distinct values (`minikube`)", + "k8s.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T09:00:25Z`)", + "k8s.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "kubernetes.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "kubernetes.namespace:keyword - 1 distinct values (`default`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "log.file.path:keyword - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "log.iostream:keyword - 1 distinct values (`stderr`)", + "message:text - 36 distinct values (` code: 13,\n`, ` details: 'cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"',\n`, 34 more values)", + "observed_timestamp:date_nanos - 1000 distinct values (`2024-11-02T10:56:50.564274379Z`, `2024-11-02T10:55:46.563453752Z`, `2024-11-02T10:56:52.76272763Z`, `2024-11-02T10:56:14.764728626Z`, `2024-11-02T10:55:51.964080879Z`, `2024-11-02T10:55:51.764216463Z`, `2024-11-02T10:56:50.564063629Z`, `2024-11-02T10:55:38.563141804Z`, `2024-11-02T10:55:53.963540672Z`, `2024-11-02T10:56:32.762493176Z`, 990 more values)", + "os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.app.label.component:keyword - 1 distinct values (`frontend`)", + "resource.attributes.deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "resource.attributes.host.arch:keyword - 1 distinct values (`arm64`)", + "resource.attributes.host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "resource.attributes.host.cpu.family:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "resource.attributes.host.cpu.model.name.text:text - 1 distinct values (``)", + "resource.attributes.host.cpu.model.name:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.stepping:keyword - 1 distinct values (`0`)", + "resource.attributes.host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "resource.attributes.host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "resource.attributes.host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "resource.attributes.host.name:keyword - 1 distinct values (`otel-daemonset-opentelemetry-collector-agent-7jlpk`)", + "resource.attributes.k8s.container.name.text:text - 1 distinct values (`frontend`)", + "resource.attributes.k8s.container.name:keyword - 1 distinct values (`frontend`)", + "resource.attributes.k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "resource.attributes.k8s.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "resource.attributes.k8s.namespace.name:keyword - 1 distinct values (`default`)", + "resource.attributes.k8s.node.name:keyword - 1 distinct values (`minikube`)", + "resource.attributes.k8s.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "resource.attributes.k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T09:00:25Z`)", + "resource.attributes.k8s.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "resource.attributes.os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "resource.attributes.os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.service.name.text:text - 1 distinct values (`frontend`)", + "resource.attributes.service.name:keyword - 1 distinct values (`frontend`)", + "resource.dropped_attributes_count:long - 1 distinct values (`0`)", + "resource.schema_url:keyword - 1 distinct values (`https://opentelemetry.io/schemas/1.6.1`)", + "scope.dropped_attributes_count:long - 1 distinct values (`0`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.name:keyword - 1 distinct values (`frontend`)", + "severity_number:byte - 1 distinct values (`0`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 3756, + "pattern": "at", + "regex": ".*?at.*?", + "sample": " at /app/.next/server/pages/api/cart.js:1:1025\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at /app/.next/server/pages/api/cart.js:1:1025\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1103 + }, + { + "x": 1730544960000, + "y": 1411 + }, + { + "x": 1730544990000, + "y": 1242 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "qlqu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 2366, + "pattern": "at app node_modules grpc grpc js build src", + "regex": ".*?at.+?app.+?node_modules.+?grpc.+?grpc.+?js.+?build.+?src.*?", + "sample": " at Object.onReceiveStatus (/app/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at Object.onReceiveStatus (/app/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 693 + }, + { + "x": 1730544960000, + "y": 896 + }, + { + "x": 1730544990000, + "y": 777 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "roms", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1014, + "pattern": "at app node_modules opentelemetry instrumentation grpc build src", + "regex": ".*?at.+?app.+?node_modules.+?opentelemetry.+?instrumentation.+?grpc.+?build.+?src.*?", + "sample": " at ServiceClientImpl.clientMethodTrace [as getCart] (/app/node_modules/@opentelemetry/instrumentation-grpc/build/src/instrumentation.js:211:42)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl.clientMethodTrace [as getCart] (/app/node_modules/@opentelemetry/instrumentation-grpc/build/src/instrumentation.js:211:42)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 297 + }, + { + "x": 1730544960000, + "y": 384 + }, + { + "x": 1730544990000, + "y": 333 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "eyqk", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 351, + "pattern": "at node:internal process task_queues", + "regex": ".*?at.+?node:internal.+?process.+?task_queues.*?", + "sample": " at process.processTicksAndRejections (node:internal/process/task_queues:77:11)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at process.processTicksAndRejections (node:internal/process/task_queues:77:11)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 104 + }, + { + "x": 1730544960000, + "y": 129 + }, + { + "x": 1730544990000, + "y": 118 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "mpyi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at AsyncLocalStorage.run node:async_hooks 346 14", + "regex": ".*?at.+?AsyncLocalStorage\\.run.+?node:async_hooks.+?346.+?14.*?", + "sample": " at AsyncLocalStorage.run (node:async_hooks:346:14)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at AsyncLocalStorage.run (node:async_hooks:346:14)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jsoq", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at AsyncLocalStorageContextManager.with app node_modules opentelemetry context async hooks build src AsyncLocalStorageContextManager.js 33 40", + "regex": ".*?at.+?AsyncLocalStorageContextManager\\.with.+?app.+?node_modules.+?opentelemetry.+?context.+?async.+?hooks.+?build.+?src.+?AsyncLocalStorageContextManager\\.js.+?33.+?40.*?", + "sample": " at AsyncLocalStorageContextManager.with (/app/node_modules/@opentelemetry/context-async-hooks/build/src/AsyncLocalStorageContextManager.js:33:40)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at AsyncLocalStorageContextManager.with (/app/node_modules/@opentelemetry/context-async-hooks/build/src/AsyncLocalStorageContextManager.js:33:40)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "viwd", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ContextAPI.with app node_modules opentelemetry api build src api context.js 60 46", + "regex": ".*?at.+?ContextAPI\\.with.+?app.+?node_modules.+?opentelemetry.+?api.+?build.+?src.+?api.+?context\\.js.+?60.+?46.*?", + "sample": " at ContextAPI.with (/app/node_modules/@opentelemetry/api/build/src/api/context.js:60:46)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ContextAPI.with (/app/node_modules/@opentelemetry/api/build/src/api/context.js:60:46)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "agsa", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at new ZoneAwarePromise app node_modules zone.js bundles zone.umd.js 1340 33", + "regex": ".*?at.+?new.+?ZoneAwarePromise.+?app.+?node_modules.+?zone\\.js.+?bundles.+?zone\\.umd\\.js.+?1340.+?33.*?", + "sample": " at new ZoneAwarePromise (/app/node_modules/zone.js/bundles/zone.umd.js:1340:33) {\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at new ZoneAwarePromise (/app/node_modules/zone.js/bundles/zone.umd.js:1340:33) {\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "dhes", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "code", + "regex": ".*?code.*?", + "sample": " code: 14,\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " code: 14,\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "feod", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "for call at", + "regex": ".*?for.+?call.+?at.*?", + "sample": "for call at\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + "for call at\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "enzg", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ServiceClientImpl anonymous /app/node_modules/@grpc/grpc-js/build/src/make-client.js", + "regex": ".*?at.+?ServiceClientImpl.+?anonymous.+?/app/node_modules/@grpc/grpc-js/build/src/make-client\\.js.*?", + "sample": " at ServiceClientImpl. (/app/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl. (/app/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "iook", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ServiceClientImpl.makeUnaryRequest /app/node_modules/@grpc/grpc-js/build/src/client.js", + "regex": ".*?at.+?ServiceClientImpl\\.makeUnaryRequest.+?/app/node_modules/@grpc/grpc-js/build/src/client\\.js.*?", + "sample": " at ServiceClientImpl.makeUnaryRequest (/app/node_modules/@grpc/grpc-js/build/src/client.js:161:32)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl.makeUnaryRequest (/app/node_modules/@grpc/grpc-js/build/src/client.js:161:32)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jvlx", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 335, + "pattern": "metadata Metadata internalRepr Map 0 options", + "regex": ".*?metadata.+?Metadata.+?internalRepr.+?Map.+?0.+?options.*?", + "sample": " metadata: Metadata { internalRepr: Map(0) {}, options: {} }\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " metadata: Metadata { internalRepr: Map(0) {}, options: {} }\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 97 + }, + { + "x": 1730544960000, + "y": 127 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "wczz", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "internalRepr Map content-type Array", + "regex": ".*?internalRepr.+?Map.+?content-type.+?Array.*?", + "sample": " internalRepr: Map(1) { 'content-type' => [Array] },\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " internalRepr: Map(1) { 'content-type' => [Array] },\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "pcyb", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "metadata Metadata", + "regex": ".*?metadata.+?Metadata.*?", + "sample": " metadata: Metadata {\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " metadata: Metadata {\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "tcak", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "options", + "regex": ".*?options.*?", + "sample": " options: {}\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " options: {}\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "xbsw", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1005, + "pattern": "No connection established Last error connect ECONNREFUSED 10.110.191.164 8080 2024 11 02T10", + "regex": ".*?No.+?connection.+?established.+?Last.+?error.+?connect.+?ECONNREFUSED.+?10\\.110\\.191\\.164.+?8080.+?2024.+?11.+?02T10.*?", + "sample": "14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)", + "highlight": { + "service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + "14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.676Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 291 + }, + { + "x": 1730544960000, + "y": 381 + }, + { + "x": 1730544990000, + "y": 333 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "pssf", + "relevance": "critical", + "interesting": true + }, + { + "field": "error.exception.message", + "count": 3, + "pattern": "INTERNAL cart failure failed to get user cart during checkout rpc error code Unavailable desc connection error desc transport Error while dialing dial tcp connect connection refused", + "regex": ".*?INTERNAL.+?cart.+?failure.+?failed.+?to.+?get.+?user.+?cart.+?during.+?checkout.+?rpc.+?error.+?code.+?Unavailable.+?desc.+?connection.+?error.+?desc.+?transport.+?Error.+?while.+?dialing.+?dial.+?tcp.+?connect.+?connection.+?refused.*?", + "sample": "13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"", + "highlight": { + "service.name": [ + "frontend" + ] + }, + "metadata": { + "error.exception.message": [ + "13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.038Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "crai", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [ + { + "field": "message", + "count": 246, + "pattern": "INFO 10.244.0.26 AAAA IN my otel demo frontend udp 39 false 512 NXDOMAIN qr rd ra 39", + "regex": ".*?INFO.+?10\\.244\\.0\\.26.+?AAAA.+?IN.+?my.+?otel.+?demo.+?frontend.+?udp.+?39.+?false.+?512.+?NXDOMAIN.+?qr.+?rd.+?ra.+?39.*?", + "sample": "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\n", + "highlight": { + "body.text": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra" + ], + "message": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra" + ] + }, + "metadata": { + "service.name": [ + "coredns" + ], + "message": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.917Z", + "lastOccurrence": "2024-11-02T10:56:58.646Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 6 + }, + { + "x": 1730543820000, + "y": 6 + }, + { + "x": 1730543850000, + "y": 6 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 6 + }, + { + "x": 1730543970000, + "y": 6 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 6 + }, + { + "x": 1730544060000, + "y": 6 + }, + { + "x": 1730544090000, + "y": 6 + }, + { + "x": 1730544120000, + "y": 6 + }, + { + "x": 1730544150000, + "y": 6 + }, + { + "x": 1730544180000, + "y": 6 + }, + { + "x": 1730544210000, + "y": 6 + }, + { + "x": 1730544240000, + "y": 6 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 6 + }, + { + "x": 1730544330000, + "y": 6 + }, + { + "x": 1730544360000, + "y": 6 + }, + { + "x": 1730544390000, + "y": 6 + }, + { + "x": 1730544420000, + "y": 6 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 6 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 6 + }, + { + "x": 1730544600000, + "y": 6 + }, + { + "x": 1730544630000, + "y": 6 + }, + { + "x": 1730544660000, + "y": 6 + }, + { + "x": 1730544690000, + "y": 6 + }, + { + "x": 1730544720000, + "y": 6 + }, + { + "x": 1730544750000, + "y": 6 + }, + { + "x": 1730544780000, + "y": 6 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 6 + }, + { + "x": 1730544870000, + "y": 6 + }, + { + "x": 1730544900000, + "y": 6 + }, + { + "x": 1730544930000, + "y": 6 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 6 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "rlzf", + "relevance": "normal", + "interesting": false + } + ], + "searches": [ + { + "fragments": [ + "10.110.191.164:8080", + "10.110.191.164", + "8080" + ], + "appearsAs": "This IP address and port are referenced in the investigated entity 'frontend'." + }, + { + "fragments": [ + "/api/cart", + "cartservice", + "/api" + ], + "appearsAs": "These URL fragments appear as attributes.request.url in the investigated entity 'frontend'. They could be related to 'cartservice'." + }, + { + "fragments": [ + "000aa", + "000bbb" + ], + "appearsAs": "These ids appear as span.id and parent.id in the investigated entity 'frontend'. They could be referring to spans found on upstream or downstream services" + } + ], + "relatedEntitiesSummaries": [ + "## Possible Relationships to `service.name:frontend`\n\n### 1. Entity: `service.name:controller`\n- **Indicators:**\n - #### Average Indicator:\n - **IP Address and Port:**\n - `frontend`: `10.110.191.164:8080`\n - `controller`: `10.244.0.26:8080`\n - **URL Fragment:**\n - `frontend`: `/api/cart`\n - `controller`: `/api/cart?session`\n- **Relationship Reasoning:**\n - The `controller` service is highly likely to be making requests to, or routing through, the `frontend` service as evidenced by the URL fragments referring to the `/api/cart` endpoint seen in both entities. Additionally, they share common IP address and port patterns indicating network interaction.\n- **Relevance Assessment:**\n - **Very High**: Given that the `controller` service handles endpoints like `/api/cart`, which directly correspond to transactions failing with a high error rate, this entity is a critical part of the interaction chain and should be closely examined.\n\n### 2. Entity: `service.name:cartservice`\n- **Indicators:**\n - #### Strong Indicator:\n - **Direct Call References:**\n - `frontend`: `details: 'cart failure: failed to get user cart during checkout: rpc error: ... desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"'\n - `cartservice`: `url.full.text:\"http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart\"`\n- **Relationship Reasoning:**\n - The `frontend` service attempts to interact with the `cartservice` as observed by the endpoint `/oteldemo.CartService/GetCart`. Connection errors from `frontend` indicate it is calling `cartservice` and facing issues getting responses, potentially causing the 500 errors.\n- **Relevance Assessment:**\n - **Very High**: Since `cartservice` manages user cart data and the connection errors reported in `frontend` logs directly correspond to attempts to communicate with `cartservice`, this entity is a probable cause of the service failures being observed.\n\n### 3. Entity: `service.name:frontend-web`\n- **Indicators:**\n - #### Average Indicator:\n - **URL Fragment:**\n - `frontend`: `/api/cart`\n - `frontend-web`: `/api/cart`\n - #### Weak Indicator:\n - **RUM Layer Mentions:**\n - `frontend`: `frontend-web (RUM)`\n - `frontend-web`: Described in System Architecture.\n- **Relationship Reasoning:**\n - `frontend-web` captures real user interactions with the `/api/cart` endpoint providing insights into the `frontend` service which might help trace the errors and delays externally perceived by users.\n- **Relevance Assessment:**\n - **Moderate**: While `frontend-web` can show the surface errors users are experiencing, it is more of an observability layer rather than directly causing the 500 errors or impacting the service interactions upstream.\n\n### 4. Entity: `service.name:frontendproxy`\n- **Indicators:**\n - #### Average Indicator:\n - **Network Routing Indicator:**\n - `frontend`: Receives traffic from `frontendproxy`\n - `frontendproxy`: Directs traffic to `frontend`\n - #### IP Address Mention:\n - Both services reference `8080`, indicating shared routing or proxying.\n- **Relationship Reasoning:**\n - `frontendproxy` routes traffic from external sources, including simulated user requests, to `frontend`. Any issues in this proxy layer could impact the incoming traffic to `frontend`, causing errors.\n- **Relevance Assessment:**\n - **High**: Considering `frontendproxy` handles traffic routing, any misconfiguration or failures here could lead to the issues observed on the `frontend`.\n\n### 5. Entity: `service.name:coredns`\n- **Indicators:**\n - #### Weak Indicators:\n - **DNS Resolution Events:**\n - Logs showing DNS queries involving `frontend`.\n - **IP Address and Port:**\n - Shared network interactions over common ports like `8080`.\n- **Relationship Reasoning:**\n - While not directly related to application logic, `coredns` is responsible for DNS resolution within the cluster, and issues here could result in connectivity problems seen in `frontend` when trying to reach `cartservice`.\n- **Relevance Assessment:**\n - **Moderate**: While less likely to be the root cause, DNS misconfigurations or failures could indirectly contribute to connectivity issues within services like `frontend`.\n\n### Summary\n\n1. **service.name:controller** - Very High relevance due to direct endpoint interaction.\n2. **service.name:cartservice** - Very High relevance for being the direct interaction target with observed failures.\n3. **service.name:frontend-web** - Moderate relevance in monitoring layer but not direct interaction.\n4. **service.name:frontendproxy** - High relevance for routing traffic to `frontend`.\n5. **service.name:coredns** - Moderate relevance for possible DNS resolution issues." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 5 + } + ] + } + } + }, + { + "name": "investigateEntity", + "role": "tool", + "toolCallId": "call_kstyq8qXoaWRX4CJQkC6wbhR", + "response": { + "entity": { + "service.name": "cartservice" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "controller" + }, + "reason": "The controller service is responsible for managing /api/cart requests. Thus, it acts as a direct upstream service to cartservice. High errors were observed in the controller when interacting with cartservice.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The frontend service interacts directly with the controller, handling user interactions that lead to /api/cart requests which are eventually processed by cartservice.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "The currencyservice interacts with cartservice to handle currency conversions, acting as an upstream dependency for cartservice when performing currency-related operations on cart items.", + "confidence": "medium-high" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "The checkoutservice relies on cart data from cartservice during the checkout process, making it a downstream service dependent on the correct functioning of cartservice.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "loadgenerator" + }, + "reason": "The loadgenerator service generates simulated traffic which impacts cartservice directly when testing the system's resilience.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "adservice" + }, + "reason": "The adservice might indirectly impact cartservice through the frontend service by affecting the overall user experience and potentially causing indirect latency.", + "confidence": "low-medium" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service is highly likely to be relevant as it has a direct interaction with the `cartservice` and would be significantly impacted by any issues within the `cartservice`.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "valkey" + }, + "reason": "The `valkey` service is highly relevant because it is the data store for `cartservice`, directly affecting its behavior and performance.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "The `currencyservice` may interact with `cartservice` to handle price conversions for the items in the cart. Problems with currency conversion could cause unexpected data formats or communication failures, leading to `500 Internal Server Error` in `cartservice`.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "The `checkoutservice` could potentially depend on `cartservice` to validate and finalize items in the user's cart as part of the checkout process. If the `cartservice` fails during these operations, it could propagate errors back up to the user level through `checkoutservice`.", + "confidence": "moderate" + } + ], + "summary": "Based on the context provided, including the system architecture and data samples, the entity `service.name:cartservice` can be described as follows:\n\n### Infrastructure & Environment\n`cartservice` operates within a microservices-based architecture. It is containerized and runs within a Kubernetes environment. Specifically, the data sample indicates that the service is deployed in the `default` Kubernetes namespace and is running on a pod named `my-otel-demo-cartservice-67575c6f84-vngzw` with an IP address of `10.244.0.61`. The service environment is tagged as `opentelemetry-demo`, indicating the use of OpenTelemetry for observability purposes. The infrastructure includes a Redis cache used by the cart service for data persistence.\n\n### Communication Characteristics (Protocols and Endpoints)\n`cartservice` primarily communicates using gRPC. It handles shopping cart data including adding and removing items. The service interacts with Redis (as indicated by the `labels.db_redis_flags` attribute). Additionally, the service integrates with external feature flag providers as seen from the `feature_flag_key` and `feature_flag_provider_name` attributes, which suggests it evaluates feature flags, possibly targeting the feature key `cartServiceFailure`. \n\nThe communication endpoints and protocols from the sample data are:\n- gRPC methods: `/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`\n- HTTP endpoint: `http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`\n- Redis database interactions: `HGET`, `HMSET`, `EXPIRE` commands targeting various identifiers like `my-otel-demo-valkey:6379`.\n\n### Context of Entity in Investigation\nThe cart service (`cartservice`) is being investigated as it is the downstream service handling `/api/cart` requests. Issues in `cartservice` are directly contributing to the error patterns in the controller layer for the `/api/cart` endpoint. A high error rate, culminating in `500 Internal Server Error` responses, started around `2024-11-02T10:56:42.498Z`. The context indicates that the cart service plays a crucial role in managing shopping cart operations.\n\nGiven the observed high error rates and the 500 errors, `cartservice` may be experiencing issues in processing gRPC calls, feature flag evaluations, or Redis caching, which could be causing failures that propagate upstream to the controller.\n\n### Related Entities and Previous Investigations\nPreviously, errors in the controller for POST requests to `/api/cart` have been observed. This investigation is a continuation, focusing on the downstream `cartservice` which is responsible for handling these requests. This service's interaction with other entities such as Redis (for caching cart data) and external feature flag services (potentially toggling functionality) suggest that any issues in these integrations could impact the `cartservice` performance and result in the observed errors upstream in the controller.\n\nThis detailed analysis moves us to further inspect specific logs, traces, and any interdependent service metrics that may provide clues about the root cause of the high error rate in `cartservice`.\n\n### Observations for service.name:cartservice\n\n#### Startup Messages\n1. **Pattern: \"OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.\"**\n - **Count:** 78\n - **Trend:** There was a spike at **2024-11-02T10:55:30.000Z** with 78 occurrences. Before this timestamp, the value was consistently 0.\n\n2. **Pattern: \"OTEL-SDK: [224] 'MeterProvider' Disposed.\"**\n - **Count:** 4\n - **Trend:** Occurrences were persistently recorded as 0 until a brief spike at **2024-11-02T10:55:30.000Z** with 4 instances.\n\n3. **Pattern: \"exiting...\"**\n - **Count:** 3\n - **Trend:** Consistently 0 until a minor rise at **2024-11-02T10:55:30.000Z**, with 3 instances recorded.\n\n4. **Pattern: \"Application is shutting down...\"**\n - **Count:** 2\n - **Trend:** Log entries were consistently at 0 with a brief spike at **2024-11-02T10:55:30.000Z** showing 2 instances.\n\n#### Fatal Errors\n5. **Pattern: \"FATAL: Could not start, bad entrypoint!\"**\n - **Count:** 3\n - **Trend:** This pattern shows a significant change at **2024-11-02T10:55:30.000Z** with 2 occurrences at the change point and one additional at **2024-11-02T10:56:30.000Z**. Before this period, the occurrence rate was consistently 0.\n\n### Conclusion\n\nThe patterns observed indicate that there were significant startup issues with the `cartservice` around the **2024-11-02T10:55:30.000Z** mark. Specifically, the `cartservice` encountered a fatal error (\"FATAL: Could not start, bad entrypoint!\") that aligns precisely with the rise in errors noted during this period. This error indicates that the service failed to start due to a misconfiguration or issues with the entry point. This corresponds with the sudden spike in errors and `500 Internal Server Error` responses observed in the downstream controller making POST requests to the `/api/cart` endpoint. Furthermore, logs related to the service's deactivation and shutdown support the perspective that the service was not running successfully or consistently after this time.\n\n### Timeline of significant events\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.”\n - **Description**: Spike detected with 78 occurrences, suggesting a potential issue within the service's runtime environment.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “OTEL-SDK: [224] 'MeterProvider' Disposed.”\n - **Description**: Sudden spike of 4 occurrences suggesting a significant change in the service’s telemetry setup or a potential abnormal shutdown initiation.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “exiting...”\n - **Description**: Log entries rose to 3 incidences, signaling the cart service may be initiating an unexpected exit.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “Application is shutting down...”\n - **Description**: 2 instances of this log message indicating the application is attempting to shut down around this time.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “FATAL: Could not start, bad entrypoint!”\n - **Description**: Abrupt fatal error occurs 2 times, implying a critical misconfiguration or startup failure.\n\n- **2024-11-02T10:56:30.000Z**: \n - **Log Message**: “FATAL: Could not start, bad entrypoint!”\n - **Description**: Another occurrence of the critical fatal error, indicating the issue persisted beyond the initial spike and may have contributed to continued disruptions.\n\n- **2024-11-02T10:56:42.498Z**: \n - **Alert**: High error rate observed in the controller (98.78% error rate for POST requests to /api/cart).\n - **Description**: Starts the period of significantly high error rates contributing to 500 Internal Server Error responses. \n\n### Context and reasoning\n\nGiven the high error rate (98.78%) starting from **2024-11-02T10:56:42.498Z** for POST requests to `/api/cart`, the `cartservice` is a critical component for investigation as it handles these requests directly. From logs, there is a clear indication that significant problems began around **2024-11-02T10:55:30.000Z**, approximately one minute before the heightened error rate was observed in the upstream controller.\n\n**1. Startup Issues**: Multiple startup-related log patterns such as instruments deactivation, disposal of the MeterProvider, and application exit/shutdown messages indicate that cartservice experienced troubles initializing. Notably, the log entries “FATAL: Could not start, bad entrypoint!” directly show critical failures in service entry points likely causing the downstream failures.\n\n**2. Fatal Errors**: The fatal errors precisely correlate with the timeframe where errors spiked in the upstream service (controller). The error messages “FATAL: Could not start, bad entrypoint!” occurring multiple times reflect a misconfiguration or critical code issue that prevented the service from running properly.\n\nThus, the evidence from log patterns strongly suggests that startup and initialization problems in `cartservice`, resulting in its inconsistent availability and operational failures, is the root cause of the observed high error rates and 500 Internal Server Errors in the controller's POST requests to `/api/cart`." + }, + "data": { + "attachments": { + "alerts": [], + "slos": [], + "analysis": { + "total": 41440, + "sampled": 1000, + "fields": [ + "@timestamp:date - 964 distinct values", + "agent.name.text:text - 1 distinct values (`opentelemetry/dotnet/elastic-dotnet`)", + "agent.name:keyword - 1 distinct values (`opentelemetry/dotnet/elastic-dotnet`)", + "agent.version:keyword - 1 distinct values (`1.0.0-alpha.4`)", + "container.id:keyword - 1 distinct values (`577d6250ad96a5d0016a42ed87bd5c3a63b86edd2af0a0c04b54a1cec2a1a50a`)", + "data_stream.dataset:keyword - 1 distinct values (`apm`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`traces`)", + "destination.address:keyword - 2 distinct values (`my-otel-demo-flagd`, `my-otel-demo-valkey`)", + "destination.port:long - 2 distinct values (`8013`, `6379`)", + "event.outcome:keyword - 1 distinct values (`success`)", + "event.success_count:byte - 1 distinct values (`1`)", + "host.hostname:keyword - 1 distinct values (`minikube`)", + "host.name.text:text - 1 distinct values (`minikube`)", + "host.name:keyword - 1 distinct values (`minikube`)", + "http.request.method:keyword - 1 distinct values (`POST`)", + "http.response.status_code:long - 1 distinct values (`200`)", + "kubernetes.namespace:keyword - 1 distinct values (`default`)", + "kubernetes.node.name.text:text - 1 distinct values (`minikube`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name.text:text - 1 distinct values (`my-otel-demo-cartservice-67575c6f84-vngzw`)", + "kubernetes.pod.name:keyword - 1 distinct values (`my-otel-demo-cartservice-67575c6f84-vngzw`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "labels.app_product_id:keyword - 8 distinct values (`66VCHSJNUP`, `L9ECAV7KIM`, `OLJCESPC7Z`, `0PUK6V6EV0`, `9SIQT8TOJO`, `LS4PSXUNUM`, `HQTGWGPNH4`, `6E92ZMYYFZ`)", + "labels.app_user_id:keyword - 418 distinct values (`11c5f5d4-37b4-46d4-a04e-a97fd71b8597`, `9017f637-12af-4671-b18c-0f86a58a7bbe`, `349478d0-862e-4b14-9d54-3120dd57865f`, `2a55d360-40c5-4a32-844b-4182e8050ca3`, `b9944e91-18d6-430c-90a5-4969e1917a2d`, `1ffeffe2-4e4a-47c4-8b33-e691f116c8cc`, `92ac0a92-4a23-4aa7-866b-44cd6b5d6e26`, `76765dfc-80f8-49db-8377-249e6930dd35`, `d5c375cc-6e38-45f8-b121-30fcb3ae1001`, `bdc5ebd9-a2ee-4dbf-8dae-b88a2b2b3643`, 408 more values)", + "labels.db_redis_flags:keyword - 2 distinct values (`DemandMaster`, `None`)", + "labels.feature_flag_key:keyword - 1 distinct values (`cartServiceFailure`)", + "labels.feature_flag_provider_name:keyword - 1 distinct values (`flagd Provider`)", + "labels.feature_flag_variant:keyword - 1 distinct values (`off`)", + "labels.grpc_method:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "labels.grpc_status_code:keyword - 1 distinct values (`0`)", + "labels.http_route:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "labels.k8s_deployment_name:keyword - 1 distinct values (`my-otel-demo-cartservice`)", + "labels.k8s_pod_ip:ip - 1 distinct values (`10.244.0.61`)", + "labels.k8s_pod_start_time:keyword - 1 distinct values (`2024-11-02T10:23:09Z`)", + "labels.network_protocol_version:keyword - 1 distinct values (`2`)", + "labels.service_namespace:keyword - 1 distinct values (`opentelemetry-demo`)", + "labels.transaction_id:keyword - 995 distinct values (`6b71cc6a08847a01`, `796c855656f17eb0`, `aaa6049d68006044`, `60ad645003eda9b7`, `be3e883fd0dd1a58`, `47247987037e78b3`, `180228f348c3d599`, `587dca65bb2463f3`, `c438d3e7c15d9a4f`, `0a9f738aa8e11c7e`, 985 more values)", + "numeric_labels.app_cart_items_count:scaled_float - 10 distinct values (`8`, `1`, `16`, `2`, `20`, `5`, `4`, `10`, `3`, `0`)", + "numeric_labels.app_product_quantity:scaled_float - 6 distinct values (`5`, `1`, `2`, `10`, `3`, `4`)", + "numeric_labels.db_redis_database_index:scaled_float - 1 distinct values (`0`)", + "observer.hostname:keyword - 1 distinct values (`Darios-MacBook-Pro-2.local`)", + "observer.type:keyword - 1 distinct values (`apm-server`)", + "observer.version:keyword - 1 distinct values (`8.15.0`)", + "parent.id:keyword - 999 distinct values (`76a22c35a2b363fb`, `6e363f007dd857fc`, `499ee35b7e0dc791`, `f05ead7365bb815b`, `46709d74aeb1ac57`, `c1e7aeb4f3b66351`, `d6e80a03ce62827e`, `5eff08ed000af546`, `80be4180f1c8985e`, `9154a028a223da28`, 989 more values)", + "processor.event:keyword - 2 distinct values (`transaction`, `span`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.framework.name.text:text - 1 distinct values (`Microsoft.AspNetCore`)", + "service.framework.name:keyword - 1 distinct values (`Microsoft.AspNetCore`)", + "service.language.name.text:text - 1 distinct values (`dotnet`)", + "service.language.name:keyword - 1 distinct values (`dotnet`)", + "service.name.text:text - 1 distinct values (`cartservice`)", + "service.name:keyword - 1 distinct values (`cartservice`)", + "service.node.name.text:text - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "service.node.name:keyword - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "service.target.name.text:text - 3 distinct values (`my-otel-demo-flagd:8013`, `flagd.evaluation.v1.Service`, 1 more values)", + "service.target.name:keyword - 3 distinct values (`my-otel-demo-flagd:8013`, `flagd.evaluation.v1.Service`, `my-otel-demo-valkey:6379`)", + "service.target.type:keyword - 3 distinct values (`http`, `grpc`, `redis`)", + "span.db.statement:keyword - 455 distinct values (`HGET dd1f04f3-aaf1-4a76-85e4-8c64a930684e`, `EXPIRE 2538c162-9908-11ef-8217-d64fba5f23b1`, `HGET 958266ce-2169-4a2a-bcd5-090fd75cf50b`, `HGET ba92c7a8-6623-4267-8512-9cf0cb23bbf8`, `HGET d96a9e44-a9ed-4f64-8f46-5d4acd05a1fa`, `HGET 4a0d4f33-09ee-4008-a826-027dd6e889c6`, `HGET 5bc59fca-3c11-4d71-9496-c9c29b9d37d1`, `HGET 0497c8a8-2da2-4ff7-80a7-ce647916277c`, `HGET 87d37a8f-cc58-483a-aad0-fc0c91421544`, `HGET 4dea82bc-1c58-4f0a-97fd-3a1369544dc0`, 445 more values)", + "span.db.type:keyword - 1 distinct values (`redis`)", + "span.destination.service.name.text:text - 3 distinct values (`http://my-otel-demo-flagd:8013`, `my-otel-demo-flagd:8013`, 1 more values)", + "span.destination.service.name:keyword - 3 distinct values (`http://my-otel-demo-flagd:8013`, `my-otel-demo-flagd:8013`, `my-otel-demo-valkey:6379`)", + "span.destination.service.resource:keyword - 2 distinct values (`my-otel-demo-flagd:8013`, `my-otel-demo-valkey:6379`)", + "span.destination.service.type:keyword - 2 distinct values (`external`, `db`)", + "span.duration.us:long - 399 distinct values (`501`, `154`, `355`, `3766`, `818`, `657`, `203`, `127`, `715`, `1092`, 389 more values)", + "span.id:keyword - 1000 distinct values (`6b71cc6a08847a01`, `796c855656f17eb0`, `aaa6049d68006044`, `60ad645003eda9b7`, `be3e883fd0dd1a58`, `47247987037e78b3`, `180228f348c3d599`, `587dca65bb2463f3`, `c438d3e7c15d9a4f`, `0a9f738aa8e11c7e`, 990 more values)", + "span.name.text:text - 5 distinct values (`POST`, `flagd.evaluation.v1.Service/ResolveBoolean`, 3 more values)", + "span.name:keyword - 5 distinct values (`POST`, `flagd.evaluation.v1.Service/ResolveBoolean`, `HMSET`, `EXPIRE`, `HGET`)", + "span.representative_count:scaled_float - 1 distinct values (`1`)", + "span.subtype:keyword - 3 distinct values (`http`, `grpc`, `redis`)", + "span.type:keyword - 2 distinct values (`external`, `db`)", + "tags:keyword - 1 distinct values (`_geoip_database_unavailable_GeoLite2-City.mmdb`)", + "timestamp.us:long - 1000 distinct values (`1730544111451828`, `1730544606786118`, `1730544573583321`, `1730544068214001`, `1730543971566630`, `1730544035740835`, `1730543892242334`, `1730544845301548`, `1730543919324800`, `1730544697032712`, 990 more values)", + "trace.id:keyword - 920 distinct values (`de1ed71d39ca87650a8401ae46f2cac0`, `1283785f372e3335e30ab82c53008ea6`, `be3fc18a45763f00d43954ef09300042`, `bffb395c41d9a23aa7acc6b1993f64c6`, `68df2f10f7b09e787c688046ae76f6ff`, `3ba13e99d34a48133415c71b9b523321`, `6405704384b681e891e1067b1c1e6e68`, `b137359ec568679804d48c699dd49df5`, `cce045bbbdbb711a2f5cae192c337cb3`, `85743f477e557162900c88409231222a`, 910 more values)", + "transaction.duration.us:long - 441 distinct values (`2497`, `2659`, `610`, `458`, `3158`, `1484`, `436`, `536`, `2602`, `3472`, 431 more values)", + "transaction.id:keyword - 469 distinct values (`60ad645003eda9b7`, `be3e883fd0dd1a58`, `180228f348c3d599`, `0a9f738aa8e11c7e`, `66622c930742b62b`, `2493315b88140ec7`, `7950f85af72baaa0`, `0fda0e8c0af2a975`, `0504e94fae4dd7f8`, `25ec77d0c132ceef`, 459 more values)", + "transaction.name.text:text - 3 distinct values (`POST /oteldemo.CartService/EmptyCart`, `POST /oteldemo.CartService/AddItem`, 1 more values)", + "transaction.name:keyword - 3 distinct values (`POST /oteldemo.CartService/EmptyCart`, `POST /oteldemo.CartService/AddItem`, `POST /oteldemo.CartService/GetCart`)", + "transaction.representative_count:scaled_float - 1 distinct values (`1`)", + "transaction.result:keyword - 1 distinct values (`HTTP 2xx`)", + "transaction.sampled:boolean - 1 distinct values (`true`)", + "transaction.type:keyword - 1 distinct values (`request`)", + "url.domain:keyword - 1 distinct values (`my-otel-demo-cartservice`)", + "url.full.text:text - 3 distinct values (`http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`, 1 more values)", + "url.full:keyword - 3 distinct values (`http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart`)", + "url.original.text:text - 4 distinct values (`http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`, `/oteldemo.CartService/EmptyCart`, 2 more values)", + "url.original:keyword - 4 distinct values (`http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`, `/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "url.path:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "url.port:long - 1 distinct values (`8080`)", + "url.scheme:keyword - 1 distinct values (`http`)", + "user_agent.device.name.text:text - 1 distinct values (`Other`)", + "user_agent.device.name:keyword - 1 distinct values (`Other`)", + "user_agent.name.text:text - 1 distinct values (`Other`)", + "user_agent.name:keyword - 1 distinct values (`Other`)", + "user_agent.original.text:text - 2 distinct values (`grpc-go/1.64.0`, `grpc-node-js/1.10.11`)", + "user_agent.original:keyword - 2 distinct values (`grpc-go/1.64.0`, `grpc-node-js/1.10.11`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 8414, + "pattern": "called with userId", + "regex": ".*?called.+?with.+?userId.*?", + "sample": "GetCartAsync called with userId=f96ee88a-4745-45f1-885a-16c3de0c668a", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=f96ee88a-4745-45f1-885a-16c3de0c668a" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 226 + }, + { + "x": 1730543820000, + "y": 194 + }, + { + "x": 1730543850000, + "y": 270 + }, + { + "x": 1730543880000, + "y": 200 + }, + { + "x": 1730543910000, + "y": 164 + }, + { + "x": 1730543940000, + "y": 198 + }, + { + "x": 1730543970000, + "y": 192 + }, + { + "x": 1730544000000, + "y": 228 + }, + { + "x": 1730544030000, + "y": 230 + }, + { + "x": 1730544060000, + "y": 212 + }, + { + "x": 1730544090000, + "y": 222 + }, + { + "x": 1730544120000, + "y": 210 + }, + { + "x": 1730544150000, + "y": 240 + }, + { + "x": 1730544180000, + "y": 200 + }, + { + "x": 1730544210000, + "y": 204 + }, + { + "x": 1730544240000, + "y": 222 + }, + { + "x": 1730544270000, + "y": 216 + }, + { + "x": 1730544300000, + "y": 254 + }, + { + "x": 1730544330000, + "y": 218 + }, + { + "x": 1730544360000, + "y": 228 + }, + { + "x": 1730544390000, + "y": 214 + }, + { + "x": 1730544420000, + "y": 220 + }, + { + "x": 1730544450000, + "y": 218 + }, + { + "x": 1730544480000, + "y": 224 + }, + { + "x": 1730544510000, + "y": 228 + }, + { + "x": 1730544540000, + "y": 228 + }, + { + "x": 1730544570000, + "y": 232 + }, + { + "x": 1730544600000, + "y": 218 + }, + { + "x": 1730544630000, + "y": 232 + }, + { + "x": 1730544660000, + "y": 212 + }, + { + "x": 1730544690000, + "y": 236 + }, + { + "x": 1730544720000, + "y": 246 + }, + { + "x": 1730544750000, + "y": 218 + }, + { + "x": 1730544780000, + "y": 214 + }, + { + "x": 1730544810000, + "y": 210 + }, + { + "x": 1730544840000, + "y": 228 + }, + { + "x": 1730544870000, + "y": 228 + }, + { + "x": 1730544900000, + "y": 224 + }, + { + "x": 1730544930000, + "y": 56 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "bfeu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "Enqueued", + "regex": ".*?Enqueued.*?", + "sample": "Enqueued", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Enqueued" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "oacl", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "ResponseReceived", + "regex": ".*?ResponseReceived.*?", + "sample": "ResponseReceived", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "ResponseReceived" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "hhud", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "Sent", + "regex": ".*?Sent.*?", + "sample": "Sent", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Sent" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ttbh", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4441, + "pattern": "info 0", + "regex": ".*?info.+?0.*?", + "sample": "info: cartservice.cartstore.ValkeyCartStore[0]\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "info: cartservice.cartstore.ValkeyCartStore[0]\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 117 + }, + { + "x": 1730543820000, + "y": 104 + }, + { + "x": 1730543850000, + "y": 146 + }, + { + "x": 1730543880000, + "y": 103 + }, + { + "x": 1730543910000, + "y": 86 + }, + { + "x": 1730543940000, + "y": 110 + }, + { + "x": 1730543970000, + "y": 97 + }, + { + "x": 1730544000000, + "y": 119 + }, + { + "x": 1730544030000, + "y": 120 + }, + { + "x": 1730544060000, + "y": 110 + }, + { + "x": 1730544090000, + "y": 116 + }, + { + "x": 1730544120000, + "y": 109 + }, + { + "x": 1730544150000, + "y": 124 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 109 + }, + { + "x": 1730544240000, + "y": 115 + }, + { + "x": 1730544270000, + "y": 113 + }, + { + "x": 1730544300000, + "y": 137 + }, + { + "x": 1730544330000, + "y": 114 + }, + { + "x": 1730544360000, + "y": 122 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 113 + }, + { + "x": 1730544450000, + "y": 114 + }, + { + "x": 1730544480000, + "y": 117 + }, + { + "x": 1730544510000, + "y": 116 + }, + { + "x": 1730544540000, + "y": 117 + }, + { + "x": 1730544570000, + "y": 121 + }, + { + "x": 1730544600000, + "y": 112 + }, + { + "x": 1730544630000, + "y": 123 + }, + { + "x": 1730544660000, + "y": 108 + }, + { + "x": 1730544690000, + "y": 129 + }, + { + "x": 1730544720000, + "y": 132 + }, + { + "x": 1730544750000, + "y": 111 + }, + { + "x": 1730544780000, + "y": 111 + }, + { + "x": 1730544810000, + "y": 108 + }, + { + "x": 1730544840000, + "y": 120 + }, + { + "x": 1730544870000, + "y": 118 + }, + { + "x": 1730544900000, + "y": 120 + }, + { + "x": 1730544930000, + "y": 73 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "eahn", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4152, + "pattern": "Fetch cart", + "regex": ".*?Fetch.+?cart.*?", + "sample": "Fetch cart", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Fetch cart" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 112 + }, + { + "x": 1730543820000, + "y": 94 + }, + { + "x": 1730543850000, + "y": 132 + }, + { + "x": 1730543880000, + "y": 100 + }, + { + "x": 1730543910000, + "y": 80 + }, + { + "x": 1730543940000, + "y": 97 + }, + { + "x": 1730543970000, + "y": 96 + }, + { + "x": 1730544000000, + "y": 113 + }, + { + "x": 1730544030000, + "y": 113 + }, + { + "x": 1730544060000, + "y": 105 + }, + { + "x": 1730544090000, + "y": 111 + }, + { + "x": 1730544120000, + "y": 104 + }, + { + "x": 1730544150000, + "y": 119 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 100 + }, + { + "x": 1730544240000, + "y": 110 + }, + { + "x": 1730544270000, + "y": 107 + }, + { + "x": 1730544300000, + "y": 123 + }, + { + "x": 1730544330000, + "y": 107 + }, + { + "x": 1730544360000, + "y": 112 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 109 + }, + { + "x": 1730544450000, + "y": 108 + }, + { + "x": 1730544480000, + "y": 111 + }, + { + "x": 1730544510000, + "y": 114 + }, + { + "x": 1730544540000, + "y": 114 + }, + { + "x": 1730544570000, + "y": 112 + }, + { + "x": 1730544600000, + "y": 107 + }, + { + "x": 1730544630000, + "y": 114 + }, + { + "x": 1730544660000, + "y": 105 + }, + { + "x": 1730544690000, + "y": 115 + }, + { + "x": 1730544720000, + "y": 119 + }, + { + "x": 1730544750000, + "y": 109 + }, + { + "x": 1730544780000, + "y": 106 + }, + { + "x": 1730544810000, + "y": 105 + }, + { + "x": 1730544840000, + "y": 113 + }, + { + "x": 1730544870000, + "y": 113 + }, + { + "x": 1730544900000, + "y": 109 + }, + { + "x": 1730544930000, + "y": 27 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "xpjx", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 602, + "pattern": "called with userId 11ef 8217 d64fba5f23b1", + "regex": ".*?called.+?with.+?userId.+?11ef.+?8217.+?d64fba5f23b1.*?", + "sample": "GetCartAsync called with userId=fb462b14-9908-11ef-8217-d64fba5f23b1", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=fb462b14-9908-11ef-8217-d64fba5f23b1" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.219Z", + "lastOccurrence": "2024-11-02T10:55:34.364Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 12 + }, + { + "x": 1730543820000, + "y": 26 + }, + { + "x": 1730543850000, + "y": 34 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 16 + }, + { + "x": 1730543940000, + "y": 30 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 14 + }, + { + "x": 1730544030000, + "y": 18 + }, + { + "x": 1730544060000, + "y": 12 + }, + { + "x": 1730544090000, + "y": 10 + }, + { + "x": 1730544120000, + "y": 12 + }, + { + "x": 1730544150000, + "y": 12 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 22 + }, + { + "x": 1730544240000, + "y": 12 + }, + { + "x": 1730544270000, + "y": 14 + }, + { + "x": 1730544300000, + "y": 36 + }, + { + "x": 1730544330000, + "y": 18 + }, + { + "x": 1730544360000, + "y": 24 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 10 + }, + { + "x": 1730544450000, + "y": 14 + }, + { + "x": 1730544480000, + "y": 14 + }, + { + "x": 1730544510000, + "y": 4 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 26 + }, + { + "x": 1730544600000, + "y": 14 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 8 + }, + { + "x": 1730544690000, + "y": 34 + }, + { + "x": 1730544720000, + "y": 34 + }, + { + "x": 1730544750000, + "y": 4 + }, + { + "x": 1730544780000, + "y": 12 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 16 + }, + { + "x": 1730544870000, + "y": 12 + }, + { + "x": 1730544900000, + "y": 28 + }, + { + "x": 1730544930000, + "y": 8 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ckdv", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 382, + "pattern": "AddItemAsync called with userId 11ef 8217 d64fba5f23b1 productId quantity", + "regex": ".*?AddItemAsync.+?called.+?with.+?userId.+?11ef.+?8217.+?d64fba5f23b1.+?productId.+?quantity.*?", + "sample": "AddItemAsync called with userId=fc4e8196-9908-11ef-8217-d64fba5f23b1, productId=1YMWWN1N4O, quantity=10", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "AddItemAsync called with userId=fc4e8196-9908-11ef-8217-d64fba5f23b1, productId=1YMWWN1N4O, quantity=10" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.216Z", + "lastOccurrence": "2024-11-02T10:55:34.363Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 8 + }, + { + "x": 1730543820000, + "y": 14 + }, + { + "x": 1730543850000, + "y": 22 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 8 + }, + { + "x": 1730543940000, + "y": 22 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 10 + }, + { + "x": 1730544030000, + "y": 10 + }, + { + "x": 1730544060000, + "y": 8 + }, + { + "x": 1730544090000, + "y": 10 + }, + { + "x": 1730544120000, + "y": 8 + }, + { + "x": 1730544150000, + "y": 8 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 14 + }, + { + "x": 1730544240000, + "y": 8 + }, + { + "x": 1730544270000, + "y": 10 + }, + { + "x": 1730544300000, + "y": 20 + }, + { + "x": 1730544330000, + "y": 10 + }, + { + "x": 1730544360000, + "y": 16 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 6 + }, + { + "x": 1730544450000, + "y": 10 + }, + { + "x": 1730544480000, + "y": 10 + }, + { + "x": 1730544510000, + "y": 4 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 10 + }, + { + "x": 1730544600000, + "y": 6 + }, + { + "x": 1730544630000, + "y": 14 + }, + { + "x": 1730544660000, + "y": 4 + }, + { + "x": 1730544690000, + "y": 22 + }, + { + "x": 1730544720000, + "y": 18 + }, + { + "x": 1730544750000, + "y": 4 + }, + { + "x": 1730544780000, + "y": 8 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 12 + }, + { + "x": 1730544870000, + "y": 8 + }, + { + "x": 1730544900000, + "y": 16 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "habq", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 216, + "pattern": "GetCartAsync called with userId", + "regex": ".*?GetCartAsync.+?called.+?with.+?userId.*?", + "sample": "GetCartAsync called with userId=", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=" + ] + }, + "firstOccurrence": "2024-11-02T10:36:36.912Z", + "lastOccurrence": "2024-11-02T10:55:33.022Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 10 + }, + { + "x": 1730543820000, + "y": 4 + }, + { + "x": 1730543850000, + "y": 6 + }, + { + "x": 1730543880000, + "y": 4 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 4 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 10 + }, + { + "x": 1730544030000, + "y": 10 + }, + { + "x": 1730544060000, + "y": 6 + }, + { + "x": 1730544090000, + "y": 2 + }, + { + "x": 1730544120000, + "y": 8 + }, + { + "x": 1730544150000, + "y": 8 + }, + { + "x": 1730544180000, + "y": 4 + }, + { + "x": 1730544210000, + "y": 6 + }, + { + "x": 1730544240000, + "y": 2 + }, + { + "x": 1730544270000, + "y": 4 + }, + { + "x": 1730544300000, + "y": 8 + }, + { + "x": 1730544330000, + "y": 4 + }, + { + "x": 1730544360000, + "y": 4 + }, + { + "x": 1730544390000, + "y": 2 + }, + { + "x": 1730544420000, + "y": 8 + }, + { + "x": 1730544450000, + "y": 4 + }, + { + "x": 1730544480000, + "y": 10 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 12 + }, + { + "x": 1730544570000, + "y": 8 + }, + { + "x": 1730544600000, + "y": 8 + }, + { + "x": 1730544630000, + "y": 6 + }, + { + "x": 1730544660000, + "y": 6 + }, + { + "x": 1730544690000, + "y": 2 + }, + { + "x": 1730544720000, + "y": 6 + }, + { + "x": 1730544750000, + "y": 6 + }, + { + "x": 1730544780000, + "y": 4 + }, + { + "x": 1730544810000, + "y": 2 + }, + { + "x": 1730544840000, + "y": 6 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 8 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "tzfi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 55, + "pattern": "Empty cart", + "regex": ".*?Empty.+?cart.*?", + "sample": "Empty cart", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Empty cart" + ] + }, + "firstOccurrence": "2024-11-02T10:36:41.806Z", + "lastOccurrence": "2024-11-02T10:55:32.654Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1 + }, + { + "x": 1730543820000, + "y": 3 + }, + { + "x": 1730543850000, + "y": 3 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 2 + }, + { + "x": 1730543940000, + "y": 2 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 1 + }, + { + "x": 1730544030000, + "y": 2 + }, + { + "x": 1730544060000, + "y": 1 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 1 + }, + { + "x": 1730544150000, + "y": 1 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 2 + }, + { + "x": 1730544240000, + "y": 1 + }, + { + "x": 1730544270000, + "y": 1 + }, + { + "x": 1730544300000, + "y": 4 + }, + { + "x": 1730544330000, + "y": 2 + }, + { + "x": 1730544360000, + "y": 2 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 1 + }, + { + "x": 1730544450000, + "y": 1 + }, + { + "x": 1730544480000, + "y": 1 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 4 + }, + { + "x": 1730544600000, + "y": 2 + }, + { + "x": 1730544630000, + "y": 2 + }, + { + "x": 1730544660000, + "y": 1 + }, + { + "x": 1730544690000, + "y": 3 + }, + { + "x": 1730544720000, + "y": 4 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 1 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 1 + }, + { + "x": 1730544870000, + "y": 1 + }, + { + "x": 1730544900000, + "y": 3 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ldwf", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 55, + "pattern": "feature_flag", + "regex": ".*?feature_flag.*?", + "sample": "feature_flag", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "feature_flag" + ] + }, + "firstOccurrence": "2024-11-02T10:36:41.809Z", + "lastOccurrence": "2024-11-02T10:55:32.655Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1 + }, + { + "x": 1730543820000, + "y": 3 + }, + { + "x": 1730543850000, + "y": 3 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 2 + }, + { + "x": 1730543940000, + "y": 2 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 1 + }, + { + "x": 1730544030000, + "y": 2 + }, + { + "x": 1730544060000, + "y": 1 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 1 + }, + { + "x": 1730544150000, + "y": 1 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 2 + }, + { + "x": 1730544240000, + "y": 1 + }, + { + "x": 1730544270000, + "y": 1 + }, + { + "x": 1730544300000, + "y": 4 + }, + { + "x": 1730544330000, + "y": 2 + }, + { + "x": 1730544360000, + "y": 2 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 1 + }, + { + "x": 1730544450000, + "y": 1 + }, + { + "x": 1730544480000, + "y": 1 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 4 + }, + { + "x": 1730544600000, + "y": 2 + }, + { + "x": 1730544630000, + "y": 2 + }, + { + "x": 1730544660000, + "y": 1 + }, + { + "x": 1730544690000, + "y": 3 + }, + { + "x": 1730544720000, + "y": 4 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 1 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 1 + }, + { + "x": 1730544870000, + "y": 1 + }, + { + "x": 1730544900000, + "y": 3 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "noer", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 2, + "pattern": "OTEL-SDK BatchActivityExportProcessor exporting to OtlpTraceExporter dropped items", + "regex": ".*?OTEL-SDK.+?BatchActivityExportProcessor.+?exporting.+?to.+?OtlpTraceExporter.+?dropped.+?items.*?", + "sample": "OTEL-SDK: [224] 'BatchActivityExportProcessor' exporting to 'OtlpTraceExporter' dropped '0' items.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] 'BatchActivityExportProcessor' exporting to 'OtlpTraceExporter' dropped '0' items." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.580Z", + "lastOccurrence": "2024-11-02T10:55:36.580Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "fiia", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1, + "pattern": "my-otel-demo-valkey open", + "regex": ".*?my-otel-demo-valkey.+?open.*?", + "sample": "my-otel-demo-valkey (10.106.38.244:6379) open\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "my-otel-demo-valkey (10.106.38.244:6379) open\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:34.993Z", + "lastOccurrence": "2024-11-02T10:55:34.993Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "uzyf", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 78, + "pattern": "OTEL-SDK Instrument Meter has been deactivated", + "regex": ".*?OTEL-SDK.+?Instrument.+?Meter.+?has.+?been.+?deactivated.*?", + "sample": "OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.585Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 78 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "lciw", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 4, + "pattern": "OTEL-SDK Disposed", + "regex": ".*?OTEL-SDK.+?Disposed.*?", + "sample": "OTEL-SDK: [224] 'MeterProvider' Disposed.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] 'MeterProvider' Disposed." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.581Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "woqa", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 3, + "pattern": "exiting", + "regex": ".*?exiting.*?", + "sample": "exiting...\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "exiting...\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:55.622Z", + "lastOccurrence": "2024-11-02T10:56:51.618Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 1 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ucuj", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 2, + "pattern": "Application is shutting down", + "regex": ".*?Application.+?is.+?shutting.+?down.*?", + "sample": "Application is shutting down...", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Application is shutting down..." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.560Z", + "lastOccurrence": "2024-11-02T10:55:36.560Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "cgip", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 3, + "pattern": "FATAL Could not start entrypoint", + "regex": ".*?FATAL.+?Could.+?not.+?start.+?entrypoint.*?", + "sample": "FATAL: Could not start, bad entrypoint!\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "FATAL: Could not start, bad entrypoint!\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:35.618Z", + "lastOccurrence": "2024-11-02T10:56:31.617Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 1 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 1.8274885276877215e-20, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "reqc", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [ + { + "field": "message", + "count": 26, + "pattern": "I1102 replica_set.go Finished syncing logger replicaset-controller kind ReplicaSet key duration", + "regex": ".*?I1102.+?replica_set\\.go.+?Finished.+?syncing.+?logger.+?replicaset-controller.+?kind.+?ReplicaSet.+?key.+?duration.*?", + "sample": "I1102 10:55:33.013871 1 replica_set.go:679] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice-7b585f4fb7\" duration=\"101.333µs\"\n", + "highlight": { + "body.text": [ + "] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice" + ], + "message": [ + "] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice" + ] + }, + "metadata": { + "service.name": [ + "kube-controller-manager" + ], + "message": [ + "I1102 10:55:33.013871 1 replica_set.go:679] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice-7b585f4fb7\" duration=\"101.333µs\"\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:33.014Z", + "lastOccurrence": "2024-11-02T10:56:52.568Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 19 + }, + { + "x": 1730544960000, + "y": 2 + }, + { + "x": 1730544990000, + "y": 5 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 7.313742705200763e-147, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "ofdx", + "relevance": "normal", + "interesting": true + }, + { + "field": "message", + "count": 21, + "pattern": "object apiVersion events.k8s.io/v1 deprecatedCount deprecatedFirstTimestamp deprecatedLastTimestamp deprecatedSource component eventTime null kind Event metadata creationTimestamp managedFields apiVersion v1 fieldsType FieldsV1 fieldsV1", + "regex": ".*?object.+?apiVersion.+?events\\.k8s\\.io/v1.+?deprecatedCount.+?deprecatedFirstTimestamp.+?deprecatedLastTimestamp.+?deprecatedSource.+?component.+?eventTime.+?null.+?kind.+?Event.+?metadata.+?creationTimestamp.+?managedFields.+?apiVersion.+?v1.+?fieldsType.+?FieldsV1.+?fieldsV1.*?", + "sample": "{\"object\":{\"apiVersion\":\"events.k8s.io/v1\",\"deprecatedCount\":2,\"deprecatedFirstTimestamp\":\"2024-11-02T10:56:17Z\",\"deprecatedLastTimestamp\":\"2024-11-02T10:56:52Z\",\"deprecatedSource\":{\"component\":\"kubelet\",\"host\":\"minikube\"},\"eventTime\":null,\"kind\":\"Event\",\"metadata\":{\"creationTimestamp\":\"2024-11-02T10:56:17Z\",\"managedFields\":[{\"apiVersion\":\"v1\",\"fieldsType\":\"FieldsV1\",\"fieldsV1\":{\"f:count\":{},\"f:firstTimestamp\":{},\"f:involvedObject\":{},\"f:lastTimestamp\":{},\"f:message\":{},\"f:reason\":{},\"f:reportingComponent\":{},\"f:reportingInstance\":{},\"f:source\":{\"f:component\":{},\"f:host\":{}},\"f:type\":{}},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz.1804217ce2cb3041\",\"namespace\":\"default\",\"resourceVersion\":\"375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":\"9580334a-7ef9-4ad5-baf6-0ad10ae49853\"},\"reportingController\":\"kubelet\",\"reportingInstance\":\"minikube\",\"type\":\"Warning\"},\"type\":\"MODIFIED\"}", + "highlight": { + "message": [ + "},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice", + "375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice", + "in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason", + "\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name", + "\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":" + ] + }, + "metadata": { + "service.name": [ + "unknown" + ], + "message": [ + "{\"object\":{\"apiVersion\":\"events.k8s.io/v1\",\"deprecatedCount\":2,\"deprecatedFirstTimestamp\":\"2024-11-02T10:56:17Z\",\"deprecatedLastTimestamp\":\"2024-11-02T10:56:52Z\",\"deprecatedSource\":{\"component\":\"kubelet\",\"host\":\"minikube\"},\"eventTime\":null,\"kind\":\"Event\",\"metadata\":{\"creationTimestamp\":\"2024-11-02T10:56:17Z\",\"managedFields\":[{\"apiVersion\":\"v1\",\"fieldsType\":\"FieldsV1\",\"fieldsV1\":{\"f:count\":{},\"f:firstTimestamp\":{},\"f:involvedObject\":{},\"f:lastTimestamp\":{},\"f:message\":{},\"f:reason\":{},\"f:reportingComponent\":{},\"f:reportingInstance\":{},\"f:source\":{\"f:component\":{},\"f:host\":{}},\"f:type\":{}},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz.1804217ce2cb3041\",\"namespace\":\"default\",\"resourceVersion\":\"375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":\"9580334a-7ef9-4ad5-baf6-0ad10ae49853\"},\"reportingController\":\"kubelet\",\"reportingInstance\":\"minikube\",\"type\":\"Warning\"},\"type\":\"MODIFIED\"}" + ] + }, + "firstOccurrence": "2024-11-02T10:55:33.051Z", + "lastOccurrence": "2024-11-02T10:56:52.563Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 16 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 4 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 2.9750218550484796e-168, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "lqan", + "relevance": "critical", + "interesting": true + } + ], + "searches": [ + { + "fragments": [ + "AddItemAsync called with userId", + "productId", + "quantity" + ], + "appearsAs": "userId, productId, and quantity are referenced in the request handling log entries in cartservice." + }, + { + "fragments": [ + "GetCartAsync called with userId" + ], + "appearsAs": "userId is referenced in the request handling log entries in cartservice." + }, + { + "fragments": [ + "Empty cart" + ], + "appearsAs": "Empty cart operation logs in cartservice." + }, + { + "fragments": [ + "cartServiceFailure" + ], + "appearsAs": "Feature flag 'cartServiceFailure' in cartservice." + }, + { + "fragments": [ + "10.244.0.61" + ], + "appearsAs": "IP address of the cartservice pod." + }, + { + "fragments": [ + "my-otel-demo-cartservice-67575c6f84-vngzw" + ], + "appearsAs": "Pod name of cartservice." + }, + { + "fragments": [ + "my-otel-demo-valkey:6379" + ], + "appearsAs": "Redis service used by cartservice." + }, + { + "fragments": [ + "/oteldemo.CartService/AddItem", + "/oteldemo.CartService/GetCart", + "/oteldemo.CartService/EmptyCart" + ], + "appearsAs": "Endpoints handled by cartservice." + }, + { + "fragments": [ + "flagd.evaluation.v1.Service" + ], + "appearsAs": "gRPC service involved in feature flag evaluation by cartservice." + }, + { + "fragments": [ + "HGET", + "EXPIRE", + "HMSET" + ], + "appearsAs": "Redis commands executed by cartservice." + } + ], + "relatedEntitiesSummaries": [ + "Based on the context provided, the following entities are potentially related to `service.name:cartservice` as either upstream or downstream dependencies. Below is a detailed analysis based on the indicators of relationships obtained from the observability data:\n\n### 1. Related Entity: `service.name:controller`\n- **Indicators**: \n - **Average Indicator**: The logs highlight high error rates (500 Internal Server Error) in the controller when POST requests to the `/api/cart` endpoint are made.\n - **Field values**: In controller logs, `message` contains references to the `/api/cart` endpoint, which is handled by `cartservice`.\n- **Reasoning**: \n - The `controller` service is responsible for managing /api/cart requests. Thus, it acts as a direct upstream service to `cartservice`.\n - It implies `controller` calls `cartservice` as part of handling requests, leading to the observed errors.\n- **Likeliness**: High\n - The `controller` service is likely a highly relevant entity since it directly interfaces with `cartservice`, with the errors surfacing in it when interacting with `cartservice`.\n\n### 2. Related Entity: `service.name:frontend`\n- **Indicators**:\n - **Strong Indicator**: The system architecture documentation shows that `frontend` interacts with various backend services, including those for user interactions such as adding items to the cart.\n - **Field values**: In the knowledge base, `frontend` receives HTTP traffic routed through `frontendproxy` and interacts with `cartservice`.\n- **Reasoning**:\n - `frontend` would act as a direct upstream service that communicates with `controller`, providing requests that eventually hit `cartservice`.\n- **Likeliness**: High\n - As `frontend` handles initial user interactions and routes requests to `cartservice`, it is essential in the request flow leading to `/api/cart` errors.\n\n### 3. Related Entity: `service.name:currencyservice`\n- **Indicators**:\n - **Strong Indicator**: The architecture description indicates `currencyservice` interacts with `cartservice` to handle currency conversions.\n - **Field values**: The entries show gRPC calls between `cartservice` and `currencyservice` for conversion operations.\n- **Reasoning**:\n - `currencyservice` may be an upstream dependency for `cartservice` for any currency-related manipulations on cart items.\n- **Likeliness**: Medium-High\n - While `currencyservice` interacts with `cartservice`, its exact influence on the direct issue might not be as strong as `controller` or `frontend`.\n\n### 4. Related Entity: `service.name:checkoutservice`\n- **Indicators**:\n - **Strong Indicator**: The architecture documentation lists `checkoutservice` as coordinating the checkout process involving interactions with `cartservice`.\n - **Field values**: Distributed tracing data could show `checkoutservice` making gRPC or HTTP calls to `cartservice`.\n- **Reasoning**:\n - `checkoutservice` is likely a downstream service that relies on the correct functioning of `cartservice` to gather cart data before proceeding to checkout operations.\n- **Likeliness**: Medium\n - Although primarily a downstream dependency, it processes important data from `cartservice`.\n\n### 5. Related Entity: `service.name:adservice`\n- **Indicators**:\n - **Weak Indicator**: `adservice` interfaces with `frontend` via gRPC to deliver ad content, it also might indirectly influence `cartservice` interactions if ads are part of the cart page.\n - **Field values**: Architectural details state `adservice` operates in the background to enhance user experience but no direct interaction with `cartservice`.\n- **Reasoning**:\n - While not directly connected, any latency in `adservice` may affect the overall user experience, causing indirect latency in calls that reach `cartservice`.\n- **Likeliness**: Low-Medium\n - Indirect connection and latent repercussions on user interactions with cart functionality.\n\n### 6. Related Entity: `service.name:loadgenerator`\n- **Indicators**:\n - **Average Indicator**: The `loadgenerator` service generates simulated traffic, handled by `frontendproxy` to stress-test services like `cartservice`.\n - **Field values**: Traffic from `loadgenerator` is routed through `frontendproxy` to reach core services, including `cartservice`.\n- **Reasoning**:\n - As an external input simulator, its traffic generation directly impacts `cartservice` when testing load resilience.\n- **Likeliness**: Medium\n - The errors can be a result of load tests conducted by `loadgenerator` surfacing bottlenecks in `cartservice`.\n\n### Summary\n\nTo conclude, the following entities are likely relevant to the issue in `service.name:cartservice`:\n\n1. **controller**: Direct upstream causing the 500 errors - High relevance.\n2. **frontend**: Upstream service initiating user interactions - High relevance.\n3. **currencyservice**: Upstream service for currency conversion - Medium-High relevance.\n4. **checkoutservice**: Downstream service relying on cart data - Medium relevance.\n5. **adservice**: Possible indirect impact due to latency - Low-Medium relevance.\n6. **loadgenerator**: Simulated traffic testing the system's resilience - Medium relevance.\n\nBy analyzing these entities, you can prioritize investigating `controller`, `frontend`, and `currencyservice` first, as they have the most direct and potentially impactful interactions with `cartservice`.", + "Based on the context provided, the architecture outlined in the knowledge base entry, the document analysis for the `cartservice`, and search keywords, several possible relationships with the `cartservice` can be identified. Below are the entities identified as highly relevant to the `cartservice`, along with the indicators, evidence, reasoning, and overall relevance.\n\n### 1. Entity: `service.name:frontend`\n\n#### Indicators and Evidence\n- **Average Indicator - URL Paths:**\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart`\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`\n- **Average Indicator - Logs:**\n - Observed in the search highlights for `service.name:frontend`: `@opentelemetry/api/build/src/api/context.js:60:46) at ServiceClientImpl.clientMethodTrace [as getCart]`\n- **Knowledge Base:**\n - The `frontend` is the core service for user interactions, receiving HTTP traffic and making requests to the `cartservice`.\n\n#### Relationship and Reasoning\nThe `frontend` service interacts directly with the `cartservice` by making requests to handle user shopping cart operations, such as adding items, retrieving the cart, and emptying the cart. Given the observed log entries and URL paths, it is reasonable to infer that the `frontend` service acts as a caller, while the `cartservice` acts as the callee. The high error rates observed in the context likely originate from the `frontend` making POST requests to the `/api/cart` endpoint handled by the `cartservice`.\n\n#### Likelihood of Relevance\n- **Very High:** The `frontend` service is highly likely to be relevant as it has a direct interaction with the `cartservice` and would be significantly impacted by any issues within the `cartservice`.\n\n### 2. Entity: `service.name:valkey`\n\n#### Indicators and Evidence\n- **Average Indicator - Redis Service:**\n - `my-otel-demo-valkey:6379`\n- **Average Indicator - Log Entries:**\n - Log patterns show Redis command execution by `cartservice`: `HGET`, `EXPIRE`, `HMSET`\n\n#### Relationship and Reasoning\nThe `valkey` service corresponds to the Redis instance that `cartservice` uses for data persistence. The `cartservice` makes frequent calls to Redis to perform operations such as fetching, expiring, and setting data, which is crucial for managing shopping cart state. As Redis is an upstream dependency, connection or data integrity issues here could directly contribute to the `500 Internal Server Errors` observed in the `cartservice`.\n\n#### Likelihood of Relevance\n- **High:** The `valkey` service is highly relevant because it is the data store for `cartservice`, directly affecting its behavior and performance.\n\n### 3. Entity: `service.name:currencyservice`\n\n#### Indicators and Evidence\n- **Average Indicator - gRPC Services:**\n - Mentioned in the architecture as handling currency conversions.\n- **Knowledge Base:**\n - Communicates with `cartservice` over gRPC to facilitate currency operations.\n\n#### Relationship and Reasoning\nThe `currencyservice` may interact with `cartservice` to handle price conversions for the items in the cart. Problems with currency conversion could cause unexpected data formats or communication failures, leading to `500 Internal Server Errors` in `cartservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** While the interaction exists, whether it directly contributes to the 500 errors specifically observed at `/api/cart` endpoints would require further investigation.\n\n### 4. Entity: `service.name:checkoutservice`\n\n#### Indicators and Evidence\n- **Strong Indicator - gRPC Services:**\n - Interacts with `checkoutservice` during the checkout process.\n- **Knowledge Base:**\n - Handles interactions including calls to `cartservice` for finalizing orders.\n\n#### Relationship and Reasoning\nThe `checkoutservice` could potentially depend on `cartservice` to validate and finalize items in the user's cart as part of the checkout process. If the `cartservice` fails during these operations, it could propagate errors back up to the user level through `checkoutservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** The relevance depends on whether the observed errors are occurring during checkout processes or general cart manipulations.\n\n### 5. Entity: `service.name:flagd`\n\n#### Indicators and Evidence\n- **Average Indicator - Feature Flag Evaluation:**\n - `flagd.evaluation.v1.Service`\n- **Log Metadata:**\n - `labels.feature_flag_key:keyword - cartServiceFailure`\n- **Trace Destination Service:**\n - `my-otel-demo-flagd:8013`\n\n#### Relationship and Reasoning\nThe `flagd` service is responsible for evaluating feature flags, such as the `cartServiceFailure`. If the feature flagging system is improperly toggled (e.g.: feature flag set to fail operations), it might contribute to the high error rates observed in `cartservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** The feature flags might be causing or amplifying existing issues within the `cartservice`.\n\n### Summary\nFrom the observed data, the following entities are highly likely to be related to `cartservice` with respect to the given context:\n\n1. **`frontend`** - High relevance due to direct HTTP interaction with `cartservice`.\n2. **`valkey`** - High relevance as the Redis data store for `cartservice`.\n\nEntities with moderate likelihood:\n\n1. **`currencyservice`** - Related via gRPC for currency operations.\n2. **`checkoutservice`** - Potentially related during checkout processes.\n3. **`flagd`** - Related to feature flag evaluations involving `cartservice`.\n\nThese relationships cover both upstream dependencies affecting `cartservice` and downstream services impacted by `cartservice` issues, aiding in comprehensive issue diagnosis." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 5 + } + ] + } + } + }, + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_KJwmI3xgY1gcstI4MVd4Olcf", + "function": { + "name": "observe", + "arguments": { + "observe": true + } + } + } + ] + }, + { + "name": "observe", + "response": { + "content": "### Acknowledgement of Investigations\n\nThe investigations of the `cartservice` and `frontend` services have provided valuable insights into the root cause of the high error rates observed in the `controller` service for POST requests to the `/api/cart` endpoint. The initial context indicated a severe and rapid onset of errors beginning at `2024-11-02T10:56:42.498Z`.\n\n### Key Observations\n\n**Investigation of Service `cartservice`**:\n- **Infrastructure & Environment**: The `cartservice` runs in a Kubernetes environment within the `default` namespace and uses Redis for data persistence.\n- **Log Patterns**:\n - Significant startup issues and fatal errors around `2024-11-02T10:55:30.000Z`.\n - A fatal \"bad entrypoint\" error prevented the service from starting correctly, leading to high error rates.\n- **Communication Endpoints**:\n - gRPC methods targeting shopping cart data.\n- **Critical Errors**:\n - \"FATAL: Could not start, bad entrypoint!\" log messages indicate critical failure.\n- **Implications**:\n - Startup issues likely prevented `cartservice` from responding to requests, causing downstream problems observed in the `controller` service.\n\n**Investigation of Service `frontend`**:\n- **Infrastructure & Environment**: The `frontend` service operates within a Kubernetes environment, communicating primarily via HTTP and gRPC.\n- **Log Patterns**:\n - Sharp increase in connection errors to the `cartservice` starting around `2024-11-02T10:55:30.000Z`.\n - \"ECONNREFUSED\" errors indicate failed attempts to connect to the `cartservice`.\n- **Communication Endpoints**:\n - Interacts with `cartservice` via gRPC.\n- **Critical Errors**:\n - \"No connection established\" and \"failed to get user cart during checkout\" errors.\n- **Implications**:\n - Communication failures between `frontend` and `cartservice` are strongly correlated with the high error rates in the `controller` service.\n\n### Timeline of Significant Events\n\n**2024-11-02T10:55:30.000Z**:\n- **`cartservice` Logs**:\n - \"OTEL-SDK: Instrument deactivation\" (78 occurrences), \"MeterProvider Disposed\" (4 occurrences), \"exiting...\" (3 occurrences), \"Application is shutting down...\" (2 occurrences), \"FATAL: Could not start, bad entrypoint!\" (2 occurrences).\n - **Meaning**: Indicates various startup and shutdown issues, especially the fatal entrypoint error that likely prevented the service from running correctly.\n\n**2024-11-02T10:55:30.000Z**:\n- **`frontend` Logs**:\n - \"No connection established\" errors (291 occurrences) and \"cart failure due to connection error\" (2 occurrences).\n - **Meaning**: Indicates immediate connection issues between `frontend` and `cartservice`.\n\n**2024-11-02T10:56:00.000Z**:\n- **`frontend` Logs**:\n - Further increase in \"No connection established\" errors (381 occurrences).\n - **Meaning**: Continued significant connection issues.\n\n**2024-11-02T10:56:30.000Z**:\n- **`cartservice` Logs**:\n - Another \"bad entrypoint\" fatal error (1 additional occurrence).\n - **Meaning**: Persistent startup issues continue to cause service failures.\n\n**2024-11-02T10:56:42.498Z**:\n- **Alert Triggered**:\n - High error rate (98.78%) in `controller` for POST requests to `/api/cart`.\n - **Meaning**: The timing aligns closely with the observed spikes in connection errors and startup issues, indicating a direct correlation.\n\n**2024-11-02T10:57:00.000Z**:\n- **`frontend` Logs**:\n - Connection errors return to zero occurrences.\n - **Meaning**: The immediate spike in connection issues appears to have subsided.\n\n### Context and Reasoning\n\nThe investigations reveal a clear temporal correlation and a logical causal pathway leading from the critical errors in `cartservice` to the observed high error rates in the `controller` service. The startup and initialization issues in `cartservice` around `2024-11-02T10:55:30.000Z` prevented it from functioning correctly, leading to connection refusal errors when `frontend` tried to access it. In turn, these failures propagated upstream, causing the `controller` to report high error rates for POST requests to the `/api/cart` endpoint.\n\n### Next Steps and Conclusion\n\n**Conclusion**:\n- The root cause of the high error rates in the `controller` service is the startup failure and persistent initialization issues in the `cartservice`.\n- These failures led to widespread connection refusal errors in `frontend`, which directly affected the ability of the `controller` to process requests to the `/api/cart` endpoint.\n\n**Recommendation**:\n- Address the misconfiguration or code issues in `cartservice` that are causing the fatal startup errors.\n- Ensure `cartservice` can reliably start and maintain stable communication with `frontend`.\n\nHaving thoroughly investigated the involved services, identified the key issues, and established the root cause, there is no need to further investigate other entities at this time. Conclude the investigation upon resolving the identified startup and initialization issues in `cartservice`." + }, + "data": { + "content": "### Acknowledgement of Investigations\n\nThe investigations of the `cartservice` and `frontend` services have provided valuable insights into the root cause of the high error rates observed in the `controller` service for POST requests to the `/api/cart` endpoint. The initial context indicated a severe and rapid onset of errors beginning at `2024-11-02T10:56:42.498Z`.\n\n### Key Observations\n\n**Investigation of Service `cartservice`**:\n- **Infrastructure & Environment**: The `cartservice` runs in a Kubernetes environment within the `default` namespace and uses Redis for data persistence.\n- **Log Patterns**:\n - Significant startup issues and fatal errors around `2024-11-02T10:55:30.000Z`.\n - A fatal \"bad entrypoint\" error prevented the service from starting correctly, leading to high error rates.\n- **Communication Endpoints**:\n - gRPC methods targeting shopping cart data.\n- **Critical Errors**:\n - \"FATAL: Could not start, bad entrypoint!\" log messages indicate critical failure.\n- **Implications**:\n - Startup issues likely prevented `cartservice` from responding to requests, causing downstream problems observed in the `controller` service.\n\n**Investigation of Service `frontend`**:\n- **Infrastructure & Environment**: The `frontend` service operates within a Kubernetes environment, communicating primarily via HTTP and gRPC.\n- **Log Patterns**:\n - Sharp increase in connection errors to the `cartservice` starting around `2024-11-02T10:55:30.000Z`.\n - \"ECONNREFUSED\" errors indicate failed attempts to connect to the `cartservice`.\n- **Communication Endpoints**:\n - Interacts with `cartservice` via gRPC.\n- **Critical Errors**:\n - \"No connection established\" and \"failed to get user cart during checkout\" errors.\n- **Implications**:\n - Communication failures between `frontend` and `cartservice` are strongly correlated with the high error rates in the `controller` service.\n\n### Timeline of Significant Events\n\n**2024-11-02T10:55:30.000Z**:\n- **`cartservice` Logs**:\n - \"OTEL-SDK: Instrument deactivation\" (78 occurrences), \"MeterProvider Disposed\" (4 occurrences), \"exiting...\" (3 occurrences), \"Application is shutting down...\" (2 occurrences), \"FATAL: Could not start, bad entrypoint!\" (2 occurrences).\n - **Meaning**: Indicates various startup and shutdown issues, especially the fatal entrypoint error that likely prevented the service from running correctly.\n\n**2024-11-02T10:55:30.000Z**:\n- **`frontend` Logs**:\n - \"No connection established\" errors (291 occurrences) and \"cart failure due to connection error\" (2 occurrences).\n - **Meaning**: Indicates immediate connection issues between `frontend` and `cartservice`.\n\n**2024-11-02T10:56:00.000Z**:\n- **`frontend` Logs**:\n - Further increase in \"No connection established\" errors (381 occurrences).\n - **Meaning**: Continued significant connection issues.\n\n**2024-11-02T10:56:30.000Z**:\n- **`cartservice` Logs**:\n - Another \"bad entrypoint\" fatal error (1 additional occurrence).\n - **Meaning**: Persistent startup issues continue to cause service failures.\n\n**2024-11-02T10:56:42.498Z**:\n- **Alert Triggered**:\n - High error rate (98.78%) in `controller` for POST requests to `/api/cart`.\n - **Meaning**: The timing aligns closely with the observed spikes in connection errors and startup issues, indicating a direct correlation.\n\n**2024-11-02T10:57:00.000Z**:\n- **`frontend` Logs**:\n - Connection errors return to zero occurrences.\n - **Meaning**: The immediate spike in connection issues appears to have subsided.\n\n### Context and Reasoning\n\nThe investigations reveal a clear temporal correlation and a logical causal pathway leading from the critical errors in `cartservice` to the observed high error rates in the `controller` service. The startup and initialization issues in `cartservice` around `2024-11-02T10:55:30.000Z` prevented it from functioning correctly, leading to connection refusal errors when `frontend` tried to access it. In turn, these failures propagated upstream, causing the `controller` to report high error rates for POST requests to the `/api/cart` endpoint.\n\n### Next Steps and Conclusion\n\n**Conclusion**:\n- The root cause of the high error rates in the `controller` service is the startup failure and persistent initialization issues in the `cartservice`.\n- These failures led to widespread connection refusal errors in `frontend`, which directly affected the ability of the `controller` to process requests to the `/api/cart` endpoint.\n\n**Recommendation**:\n- Address the misconfiguration or code issues in `cartservice` that are causing the fatal startup errors.\n- Ensure `cartservice` can reliably start and maintain stable communication with `frontend`.\n\nHaving thoroughly investigated the involved services, identified the key issues, and established the root cause, there is no need to further investigate other entities at this time. Conclude the investigation upon resolving the identified startup and initialization issues in `cartservice`.", + "investigations": [ + { + "attachments": { + "alerts": [], + "slos": [], + "analysis": { + "total": 41440, + "sampled": 1000, + "fields": [ + "@timestamp:date - 964 distinct values", + "agent.name.text:text - 1 distinct values (`opentelemetry/dotnet/elastic-dotnet`)", + "agent.name:keyword - 1 distinct values (`opentelemetry/dotnet/elastic-dotnet`)", + "agent.version:keyword - 1 distinct values (`1.0.0-alpha.4`)", + "container.id:keyword - 1 distinct values (`577d6250ad96a5d0016a42ed87bd5c3a63b86edd2af0a0c04b54a1cec2a1a50a`)", + "data_stream.dataset:keyword - 1 distinct values (`apm`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`traces`)", + "destination.address:keyword - 2 distinct values (`my-otel-demo-flagd`, `my-otel-demo-valkey`)", + "destination.port:long - 2 distinct values (`8013`, `6379`)", + "event.outcome:keyword - 1 distinct values (`success`)", + "event.success_count:byte - 1 distinct values (`1`)", + "host.hostname:keyword - 1 distinct values (`minikube`)", + "host.name.text:text - 1 distinct values (`minikube`)", + "host.name:keyword - 1 distinct values (`minikube`)", + "http.request.method:keyword - 1 distinct values (`POST`)", + "http.response.status_code:long - 1 distinct values (`200`)", + "kubernetes.namespace:keyword - 1 distinct values (`default`)", + "kubernetes.node.name.text:text - 1 distinct values (`minikube`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name.text:text - 1 distinct values (`my-otel-demo-cartservice-67575c6f84-vngzw`)", + "kubernetes.pod.name:keyword - 1 distinct values (`my-otel-demo-cartservice-67575c6f84-vngzw`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "labels.app_product_id:keyword - 8 distinct values (`66VCHSJNUP`, `L9ECAV7KIM`, `OLJCESPC7Z`, `0PUK6V6EV0`, `9SIQT8TOJO`, `LS4PSXUNUM`, `HQTGWGPNH4`, `6E92ZMYYFZ`)", + "labels.app_user_id:keyword - 418 distinct values (`11c5f5d4-37b4-46d4-a04e-a97fd71b8597`, `9017f637-12af-4671-b18c-0f86a58a7bbe`, `349478d0-862e-4b14-9d54-3120dd57865f`, `2a55d360-40c5-4a32-844b-4182e8050ca3`, `b9944e91-18d6-430c-90a5-4969e1917a2d`, `1ffeffe2-4e4a-47c4-8b33-e691f116c8cc`, `92ac0a92-4a23-4aa7-866b-44cd6b5d6e26`, `76765dfc-80f8-49db-8377-249e6930dd35`, `d5c375cc-6e38-45f8-b121-30fcb3ae1001`, `bdc5ebd9-a2ee-4dbf-8dae-b88a2b2b3643`, 408 more values)", + "labels.db_redis_flags:keyword - 2 distinct values (`DemandMaster`, `None`)", + "labels.feature_flag_key:keyword - 1 distinct values (`cartServiceFailure`)", + "labels.feature_flag_provider_name:keyword - 1 distinct values (`flagd Provider`)", + "labels.feature_flag_variant:keyword - 1 distinct values (`off`)", + "labels.grpc_method:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "labels.grpc_status_code:keyword - 1 distinct values (`0`)", + "labels.http_route:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "labels.k8s_deployment_name:keyword - 1 distinct values (`my-otel-demo-cartservice`)", + "labels.k8s_pod_ip:ip - 1 distinct values (`10.244.0.61`)", + "labels.k8s_pod_start_time:keyword - 1 distinct values (`2024-11-02T10:23:09Z`)", + "labels.network_protocol_version:keyword - 1 distinct values (`2`)", + "labels.service_namespace:keyword - 1 distinct values (`opentelemetry-demo`)", + "labels.transaction_id:keyword - 995 distinct values (`6b71cc6a08847a01`, `796c855656f17eb0`, `aaa6049d68006044`, `60ad645003eda9b7`, `be3e883fd0dd1a58`, `47247987037e78b3`, `180228f348c3d599`, `587dca65bb2463f3`, `c438d3e7c15d9a4f`, `0a9f738aa8e11c7e`, 985 more values)", + "numeric_labels.app_cart_items_count:scaled_float - 10 distinct values (`8`, `1`, `16`, `2`, `20`, `5`, `4`, `10`, `3`, `0`)", + "numeric_labels.app_product_quantity:scaled_float - 6 distinct values (`5`, `1`, `2`, `10`, `3`, `4`)", + "numeric_labels.db_redis_database_index:scaled_float - 1 distinct values (`0`)", + "observer.hostname:keyword - 1 distinct values (`Darios-MacBook-Pro-2.local`)", + "observer.type:keyword - 1 distinct values (`apm-server`)", + "observer.version:keyword - 1 distinct values (`8.15.0`)", + "parent.id:keyword - 999 distinct values (`76a22c35a2b363fb`, `6e363f007dd857fc`, `499ee35b7e0dc791`, `f05ead7365bb815b`, `46709d74aeb1ac57`, `c1e7aeb4f3b66351`, `d6e80a03ce62827e`, `5eff08ed000af546`, `80be4180f1c8985e`, `9154a028a223da28`, 989 more values)", + "processor.event:keyword - 2 distinct values (`transaction`, `span`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.framework.name.text:text - 1 distinct values (`Microsoft.AspNetCore`)", + "service.framework.name:keyword - 1 distinct values (`Microsoft.AspNetCore`)", + "service.language.name.text:text - 1 distinct values (`dotnet`)", + "service.language.name:keyword - 1 distinct values (`dotnet`)", + "service.name.text:text - 1 distinct values (`cartservice`)", + "service.name:keyword - 1 distinct values (`cartservice`)", + "service.node.name.text:text - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "service.node.name:keyword - 1 distinct values (`8e441d5c-4d52-42f2-937e-1f3b84a6baed`)", + "service.target.name.text:text - 3 distinct values (`my-otel-demo-flagd:8013`, `flagd.evaluation.v1.Service`, 1 more values)", + "service.target.name:keyword - 3 distinct values (`my-otel-demo-flagd:8013`, `flagd.evaluation.v1.Service`, `my-otel-demo-valkey:6379`)", + "service.target.type:keyword - 3 distinct values (`http`, `grpc`, `redis`)", + "span.db.statement:keyword - 455 distinct values (`HGET dd1f04f3-aaf1-4a76-85e4-8c64a930684e`, `EXPIRE 2538c162-9908-11ef-8217-d64fba5f23b1`, `HGET 958266ce-2169-4a2a-bcd5-090fd75cf50b`, `HGET ba92c7a8-6623-4267-8512-9cf0cb23bbf8`, `HGET d96a9e44-a9ed-4f64-8f46-5d4acd05a1fa`, `HGET 4a0d4f33-09ee-4008-a826-027dd6e889c6`, `HGET 5bc59fca-3c11-4d71-9496-c9c29b9d37d1`, `HGET 0497c8a8-2da2-4ff7-80a7-ce647916277c`, `HGET 87d37a8f-cc58-483a-aad0-fc0c91421544`, `HGET 4dea82bc-1c58-4f0a-97fd-3a1369544dc0`, 445 more values)", + "span.db.type:keyword - 1 distinct values (`redis`)", + "span.destination.service.name.text:text - 3 distinct values (`http://my-otel-demo-flagd:8013`, `my-otel-demo-flagd:8013`, 1 more values)", + "span.destination.service.name:keyword - 3 distinct values (`http://my-otel-demo-flagd:8013`, `my-otel-demo-flagd:8013`, `my-otel-demo-valkey:6379`)", + "span.destination.service.resource:keyword - 2 distinct values (`my-otel-demo-flagd:8013`, `my-otel-demo-valkey:6379`)", + "span.destination.service.type:keyword - 2 distinct values (`external`, `db`)", + "span.duration.us:long - 399 distinct values (`501`, `154`, `355`, `3766`, `818`, `657`, `203`, `127`, `715`, `1092`, 389 more values)", + "span.id:keyword - 1000 distinct values (`6b71cc6a08847a01`, `796c855656f17eb0`, `aaa6049d68006044`, `60ad645003eda9b7`, `be3e883fd0dd1a58`, `47247987037e78b3`, `180228f348c3d599`, `587dca65bb2463f3`, `c438d3e7c15d9a4f`, `0a9f738aa8e11c7e`, 990 more values)", + "span.name.text:text - 5 distinct values (`POST`, `flagd.evaluation.v1.Service/ResolveBoolean`, 3 more values)", + "span.name:keyword - 5 distinct values (`POST`, `flagd.evaluation.v1.Service/ResolveBoolean`, `HMSET`, `EXPIRE`, `HGET`)", + "span.representative_count:scaled_float - 1 distinct values (`1`)", + "span.subtype:keyword - 3 distinct values (`http`, `grpc`, `redis`)", + "span.type:keyword - 2 distinct values (`external`, `db`)", + "tags:keyword - 1 distinct values (`_geoip_database_unavailable_GeoLite2-City.mmdb`)", + "timestamp.us:long - 1000 distinct values (`1730544111451828`, `1730544606786118`, `1730544573583321`, `1730544068214001`, `1730543971566630`, `1730544035740835`, `1730543892242334`, `1730544845301548`, `1730543919324800`, `1730544697032712`, 990 more values)", + "trace.id:keyword - 920 distinct values (`de1ed71d39ca87650a8401ae46f2cac0`, `1283785f372e3335e30ab82c53008ea6`, `be3fc18a45763f00d43954ef09300042`, `bffb395c41d9a23aa7acc6b1993f64c6`, `68df2f10f7b09e787c688046ae76f6ff`, `3ba13e99d34a48133415c71b9b523321`, `6405704384b681e891e1067b1c1e6e68`, `b137359ec568679804d48c699dd49df5`, `cce045bbbdbb711a2f5cae192c337cb3`, `85743f477e557162900c88409231222a`, 910 more values)", + "transaction.duration.us:long - 441 distinct values (`2497`, `2659`, `610`, `458`, `3158`, `1484`, `436`, `536`, `2602`, `3472`, 431 more values)", + "transaction.id:keyword - 469 distinct values (`60ad645003eda9b7`, `be3e883fd0dd1a58`, `180228f348c3d599`, `0a9f738aa8e11c7e`, `66622c930742b62b`, `2493315b88140ec7`, `7950f85af72baaa0`, `0fda0e8c0af2a975`, `0504e94fae4dd7f8`, `25ec77d0c132ceef`, 459 more values)", + "transaction.name.text:text - 3 distinct values (`POST /oteldemo.CartService/EmptyCart`, `POST /oteldemo.CartService/AddItem`, 1 more values)", + "transaction.name:keyword - 3 distinct values (`POST /oteldemo.CartService/EmptyCart`, `POST /oteldemo.CartService/AddItem`, `POST /oteldemo.CartService/GetCart`)", + "transaction.representative_count:scaled_float - 1 distinct values (`1`)", + "transaction.result:keyword - 1 distinct values (`HTTP 2xx`)", + "transaction.sampled:boolean - 1 distinct values (`true`)", + "transaction.type:keyword - 1 distinct values (`request`)", + "url.domain:keyword - 1 distinct values (`my-otel-demo-cartservice`)", + "url.full.text:text - 3 distinct values (`http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`, 1 more values)", + "url.full:keyword - 3 distinct values (`http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`, `http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart`)", + "url.original.text:text - 4 distinct values (`http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`, `/oteldemo.CartService/EmptyCart`, 2 more values)", + "url.original:keyword - 4 distinct values (`http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`, `/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "url.path:keyword - 3 distinct values (`/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`)", + "url.port:long - 1 distinct values (`8080`)", + "url.scheme:keyword - 1 distinct values (`http`)", + "user_agent.device.name.text:text - 1 distinct values (`Other`)", + "user_agent.device.name:keyword - 1 distinct values (`Other`)", + "user_agent.name.text:text - 1 distinct values (`Other`)", + "user_agent.name:keyword - 1 distinct values (`Other`)", + "user_agent.original.text:text - 2 distinct values (`grpc-go/1.64.0`, `grpc-node-js/1.10.11`)", + "user_agent.original:keyword - 2 distinct values (`grpc-go/1.64.0`, `grpc-node-js/1.10.11`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 8414, + "pattern": "called with userId", + "regex": ".*?called.+?with.+?userId.*?", + "sample": "GetCartAsync called with userId=f96ee88a-4745-45f1-885a-16c3de0c668a", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=f96ee88a-4745-45f1-885a-16c3de0c668a" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 226 + }, + { + "x": 1730543820000, + "y": 194 + }, + { + "x": 1730543850000, + "y": 270 + }, + { + "x": 1730543880000, + "y": 200 + }, + { + "x": 1730543910000, + "y": 164 + }, + { + "x": 1730543940000, + "y": 198 + }, + { + "x": 1730543970000, + "y": 192 + }, + { + "x": 1730544000000, + "y": 228 + }, + { + "x": 1730544030000, + "y": 230 + }, + { + "x": 1730544060000, + "y": 212 + }, + { + "x": 1730544090000, + "y": 222 + }, + { + "x": 1730544120000, + "y": 210 + }, + { + "x": 1730544150000, + "y": 240 + }, + { + "x": 1730544180000, + "y": 200 + }, + { + "x": 1730544210000, + "y": 204 + }, + { + "x": 1730544240000, + "y": 222 + }, + { + "x": 1730544270000, + "y": 216 + }, + { + "x": 1730544300000, + "y": 254 + }, + { + "x": 1730544330000, + "y": 218 + }, + { + "x": 1730544360000, + "y": 228 + }, + { + "x": 1730544390000, + "y": 214 + }, + { + "x": 1730544420000, + "y": 220 + }, + { + "x": 1730544450000, + "y": 218 + }, + { + "x": 1730544480000, + "y": 224 + }, + { + "x": 1730544510000, + "y": 228 + }, + { + "x": 1730544540000, + "y": 228 + }, + { + "x": 1730544570000, + "y": 232 + }, + { + "x": 1730544600000, + "y": 218 + }, + { + "x": 1730544630000, + "y": 232 + }, + { + "x": 1730544660000, + "y": 212 + }, + { + "x": 1730544690000, + "y": 236 + }, + { + "x": 1730544720000, + "y": 246 + }, + { + "x": 1730544750000, + "y": 218 + }, + { + "x": 1730544780000, + "y": 214 + }, + { + "x": 1730544810000, + "y": 210 + }, + { + "x": 1730544840000, + "y": 228 + }, + { + "x": 1730544870000, + "y": 228 + }, + { + "x": 1730544900000, + "y": 224 + }, + { + "x": 1730544930000, + "y": 56 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "bfeu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "Enqueued", + "regex": ".*?Enqueued.*?", + "sample": "Enqueued", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Enqueued" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "oacl", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "ResponseReceived", + "regex": ".*?ResponseReceived.*?", + "sample": "ResponseReceived", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "ResponseReceived" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "hhud", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4835, + "pattern": "Sent", + "regex": ".*?Sent.*?", + "sample": "Sent", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Sent" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 126 + }, + { + "x": 1730543820000, + "y": 121 + }, + { + "x": 1730543850000, + "y": 171 + }, + { + "x": 1730543880000, + "y": 109 + }, + { + "x": 1730543910000, + "y": 96 + }, + { + "x": 1730543940000, + "y": 134 + }, + { + "x": 1730543970000, + "y": 99 + }, + { + "x": 1730544000000, + "y": 130 + }, + { + "x": 1730544030000, + "y": 132 + }, + { + "x": 1730544060000, + "y": 119 + }, + { + "x": 1730544090000, + "y": 126 + }, + { + "x": 1730544120000, + "y": 118 + }, + { + "x": 1730544150000, + "y": 133 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 125 + }, + { + "x": 1730544240000, + "y": 124 + }, + { + "x": 1730544270000, + "y": 124 + }, + { + "x": 1730544300000, + "y": 161 + }, + { + "x": 1730544330000, + "y": 126 + }, + { + "x": 1730544360000, + "y": 140 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 120 + }, + { + "x": 1730544450000, + "y": 125 + }, + { + "x": 1730544480000, + "y": 128 + }, + { + "x": 1730544510000, + "y": 120 + }, + { + "x": 1730544540000, + "y": 123 + }, + { + "x": 1730544570000, + "y": 135 + }, + { + "x": 1730544600000, + "y": 120 + }, + { + "x": 1730544630000, + "y": 139 + }, + { + "x": 1730544660000, + "y": 113 + }, + { + "x": 1730544690000, + "y": 154 + }, + { + "x": 1730544720000, + "y": 154 + }, + { + "x": 1730544750000, + "y": 115 + }, + { + "x": 1730544780000, + "y": 120 + }, + { + "x": 1730544810000, + "y": 114 + }, + { + "x": 1730544840000, + "y": 133 + }, + { + "x": 1730544870000, + "y": 127 + }, + { + "x": 1730544900000, + "y": 139 + }, + { + "x": 1730544930000, + "y": 35 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ttbh", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4441, + "pattern": "info 0", + "regex": ".*?info.+?0.*?", + "sample": "info: cartservice.cartstore.ValkeyCartStore[0]\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "info: cartservice.cartstore.ValkeyCartStore[0]\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 117 + }, + { + "x": 1730543820000, + "y": 104 + }, + { + "x": 1730543850000, + "y": 146 + }, + { + "x": 1730543880000, + "y": 103 + }, + { + "x": 1730543910000, + "y": 86 + }, + { + "x": 1730543940000, + "y": 110 + }, + { + "x": 1730543970000, + "y": 97 + }, + { + "x": 1730544000000, + "y": 119 + }, + { + "x": 1730544030000, + "y": 120 + }, + { + "x": 1730544060000, + "y": 110 + }, + { + "x": 1730544090000, + "y": 116 + }, + { + "x": 1730544120000, + "y": 109 + }, + { + "x": 1730544150000, + "y": 124 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 109 + }, + { + "x": 1730544240000, + "y": 115 + }, + { + "x": 1730544270000, + "y": 113 + }, + { + "x": 1730544300000, + "y": 137 + }, + { + "x": 1730544330000, + "y": 114 + }, + { + "x": 1730544360000, + "y": 122 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 113 + }, + { + "x": 1730544450000, + "y": 114 + }, + { + "x": 1730544480000, + "y": 117 + }, + { + "x": 1730544510000, + "y": 116 + }, + { + "x": 1730544540000, + "y": 117 + }, + { + "x": 1730544570000, + "y": 121 + }, + { + "x": 1730544600000, + "y": 112 + }, + { + "x": 1730544630000, + "y": 123 + }, + { + "x": 1730544660000, + "y": 108 + }, + { + "x": 1730544690000, + "y": 129 + }, + { + "x": 1730544720000, + "y": 132 + }, + { + "x": 1730544750000, + "y": 111 + }, + { + "x": 1730544780000, + "y": 111 + }, + { + "x": 1730544810000, + "y": 108 + }, + { + "x": 1730544840000, + "y": 120 + }, + { + "x": 1730544870000, + "y": 118 + }, + { + "x": 1730544900000, + "y": 120 + }, + { + "x": 1730544930000, + "y": 73 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "eahn", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 4152, + "pattern": "Fetch cart", + "regex": ".*?Fetch.+?cart.*?", + "sample": "Fetch cart", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Fetch cart" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.398Z", + "lastOccurrence": "2024-11-02T10:55:36.349Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 112 + }, + { + "x": 1730543820000, + "y": 94 + }, + { + "x": 1730543850000, + "y": 132 + }, + { + "x": 1730543880000, + "y": 100 + }, + { + "x": 1730543910000, + "y": 80 + }, + { + "x": 1730543940000, + "y": 97 + }, + { + "x": 1730543970000, + "y": 96 + }, + { + "x": 1730544000000, + "y": 113 + }, + { + "x": 1730544030000, + "y": 113 + }, + { + "x": 1730544060000, + "y": 105 + }, + { + "x": 1730544090000, + "y": 111 + }, + { + "x": 1730544120000, + "y": 104 + }, + { + "x": 1730544150000, + "y": 119 + }, + { + "x": 1730544180000, + "y": 100 + }, + { + "x": 1730544210000, + "y": 100 + }, + { + "x": 1730544240000, + "y": 110 + }, + { + "x": 1730544270000, + "y": 107 + }, + { + "x": 1730544300000, + "y": 123 + }, + { + "x": 1730544330000, + "y": 107 + }, + { + "x": 1730544360000, + "y": 112 + }, + { + "x": 1730544390000, + "y": 107 + }, + { + "x": 1730544420000, + "y": 109 + }, + { + "x": 1730544450000, + "y": 108 + }, + { + "x": 1730544480000, + "y": 111 + }, + { + "x": 1730544510000, + "y": 114 + }, + { + "x": 1730544540000, + "y": 114 + }, + { + "x": 1730544570000, + "y": 112 + }, + { + "x": 1730544600000, + "y": 107 + }, + { + "x": 1730544630000, + "y": 114 + }, + { + "x": 1730544660000, + "y": 105 + }, + { + "x": 1730544690000, + "y": 115 + }, + { + "x": 1730544720000, + "y": 119 + }, + { + "x": 1730544750000, + "y": 109 + }, + { + "x": 1730544780000, + "y": 106 + }, + { + "x": 1730544810000, + "y": 105 + }, + { + "x": 1730544840000, + "y": 113 + }, + { + "x": 1730544870000, + "y": 113 + }, + { + "x": 1730544900000, + "y": 109 + }, + { + "x": 1730544930000, + "y": 27 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "xpjx", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 602, + "pattern": "called with userId 11ef 8217 d64fba5f23b1", + "regex": ".*?called.+?with.+?userId.+?11ef.+?8217.+?d64fba5f23b1.*?", + "sample": "GetCartAsync called with userId=fb462b14-9908-11ef-8217-d64fba5f23b1", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=fb462b14-9908-11ef-8217-d64fba5f23b1" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.219Z", + "lastOccurrence": "2024-11-02T10:55:34.364Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 12 + }, + { + "x": 1730543820000, + "y": 26 + }, + { + "x": 1730543850000, + "y": 34 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 16 + }, + { + "x": 1730543940000, + "y": 30 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 14 + }, + { + "x": 1730544030000, + "y": 18 + }, + { + "x": 1730544060000, + "y": 12 + }, + { + "x": 1730544090000, + "y": 10 + }, + { + "x": 1730544120000, + "y": 12 + }, + { + "x": 1730544150000, + "y": 12 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 22 + }, + { + "x": 1730544240000, + "y": 12 + }, + { + "x": 1730544270000, + "y": 14 + }, + { + "x": 1730544300000, + "y": 36 + }, + { + "x": 1730544330000, + "y": 18 + }, + { + "x": 1730544360000, + "y": 24 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 10 + }, + { + "x": 1730544450000, + "y": 14 + }, + { + "x": 1730544480000, + "y": 14 + }, + { + "x": 1730544510000, + "y": 4 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 26 + }, + { + "x": 1730544600000, + "y": 14 + }, + { + "x": 1730544630000, + "y": 22 + }, + { + "x": 1730544660000, + "y": 8 + }, + { + "x": 1730544690000, + "y": 34 + }, + { + "x": 1730544720000, + "y": 34 + }, + { + "x": 1730544750000, + "y": 4 + }, + { + "x": 1730544780000, + "y": 12 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 16 + }, + { + "x": 1730544870000, + "y": 12 + }, + { + "x": 1730544900000, + "y": 28 + }, + { + "x": 1730544930000, + "y": 8 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ckdv", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 382, + "pattern": "AddItemAsync called with userId 11ef 8217 d64fba5f23b1 productId quantity", + "regex": ".*?AddItemAsync.+?called.+?with.+?userId.+?11ef.+?8217.+?d64fba5f23b1.+?productId.+?quantity.*?", + "sample": "AddItemAsync called with userId=fc4e8196-9908-11ef-8217-d64fba5f23b1, productId=1YMWWN1N4O, quantity=10", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "AddItemAsync called with userId=fc4e8196-9908-11ef-8217-d64fba5f23b1, productId=1YMWWN1N4O, quantity=10" + ] + }, + "firstOccurrence": "2024-11-02T10:36:34.216Z", + "lastOccurrence": "2024-11-02T10:55:34.363Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 8 + }, + { + "x": 1730543820000, + "y": 14 + }, + { + "x": 1730543850000, + "y": 22 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 8 + }, + { + "x": 1730543940000, + "y": 22 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 10 + }, + { + "x": 1730544030000, + "y": 10 + }, + { + "x": 1730544060000, + "y": 8 + }, + { + "x": 1730544090000, + "y": 10 + }, + { + "x": 1730544120000, + "y": 8 + }, + { + "x": 1730544150000, + "y": 8 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 14 + }, + { + "x": 1730544240000, + "y": 8 + }, + { + "x": 1730544270000, + "y": 10 + }, + { + "x": 1730544300000, + "y": 20 + }, + { + "x": 1730544330000, + "y": 10 + }, + { + "x": 1730544360000, + "y": 16 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 6 + }, + { + "x": 1730544450000, + "y": 10 + }, + { + "x": 1730544480000, + "y": 10 + }, + { + "x": 1730544510000, + "y": 4 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 10 + }, + { + "x": 1730544600000, + "y": 6 + }, + { + "x": 1730544630000, + "y": 14 + }, + { + "x": 1730544660000, + "y": 4 + }, + { + "x": 1730544690000, + "y": 22 + }, + { + "x": 1730544720000, + "y": 18 + }, + { + "x": 1730544750000, + "y": 4 + }, + { + "x": 1730544780000, + "y": 8 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 12 + }, + { + "x": 1730544870000, + "y": 8 + }, + { + "x": 1730544900000, + "y": 16 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "habq", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 216, + "pattern": "GetCartAsync called with userId", + "regex": ".*?GetCartAsync.+?called.+?with.+?userId.*?", + "sample": "GetCartAsync called with userId=", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "GetCartAsync called with userId=" + ] + }, + "firstOccurrence": "2024-11-02T10:36:36.912Z", + "lastOccurrence": "2024-11-02T10:55:33.022Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 10 + }, + { + "x": 1730543820000, + "y": 4 + }, + { + "x": 1730543850000, + "y": 6 + }, + { + "x": 1730543880000, + "y": 4 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 4 + }, + { + "x": 1730543970000, + "y": 2 + }, + { + "x": 1730544000000, + "y": 10 + }, + { + "x": 1730544030000, + "y": 10 + }, + { + "x": 1730544060000, + "y": 6 + }, + { + "x": 1730544090000, + "y": 2 + }, + { + "x": 1730544120000, + "y": 8 + }, + { + "x": 1730544150000, + "y": 8 + }, + { + "x": 1730544180000, + "y": 4 + }, + { + "x": 1730544210000, + "y": 6 + }, + { + "x": 1730544240000, + "y": 2 + }, + { + "x": 1730544270000, + "y": 4 + }, + { + "x": 1730544300000, + "y": 8 + }, + { + "x": 1730544330000, + "y": 4 + }, + { + "x": 1730544360000, + "y": 4 + }, + { + "x": 1730544390000, + "y": 2 + }, + { + "x": 1730544420000, + "y": 8 + }, + { + "x": 1730544450000, + "y": 4 + }, + { + "x": 1730544480000, + "y": 10 + }, + { + "x": 1730544510000, + "y": 2 + }, + { + "x": 1730544540000, + "y": 12 + }, + { + "x": 1730544570000, + "y": 8 + }, + { + "x": 1730544600000, + "y": 8 + }, + { + "x": 1730544630000, + "y": 6 + }, + { + "x": 1730544660000, + "y": 6 + }, + { + "x": 1730544690000, + "y": 2 + }, + { + "x": 1730544720000, + "y": 6 + }, + { + "x": 1730544750000, + "y": 6 + }, + { + "x": 1730544780000, + "y": 4 + }, + { + "x": 1730544810000, + "y": 2 + }, + { + "x": 1730544840000, + "y": 6 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 8 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "tzfi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 55, + "pattern": "Empty cart", + "regex": ".*?Empty.+?cart.*?", + "sample": "Empty cart", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Empty cart" + ] + }, + "firstOccurrence": "2024-11-02T10:36:41.806Z", + "lastOccurrence": "2024-11-02T10:55:32.654Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1 + }, + { + "x": 1730543820000, + "y": 3 + }, + { + "x": 1730543850000, + "y": 3 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 2 + }, + { + "x": 1730543940000, + "y": 2 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 1 + }, + { + "x": 1730544030000, + "y": 2 + }, + { + "x": 1730544060000, + "y": 1 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 1 + }, + { + "x": 1730544150000, + "y": 1 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 2 + }, + { + "x": 1730544240000, + "y": 1 + }, + { + "x": 1730544270000, + "y": 1 + }, + { + "x": 1730544300000, + "y": 4 + }, + { + "x": 1730544330000, + "y": 2 + }, + { + "x": 1730544360000, + "y": 2 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 1 + }, + { + "x": 1730544450000, + "y": 1 + }, + { + "x": 1730544480000, + "y": 1 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 4 + }, + { + "x": 1730544600000, + "y": 2 + }, + { + "x": 1730544630000, + "y": 2 + }, + { + "x": 1730544660000, + "y": 1 + }, + { + "x": 1730544690000, + "y": 3 + }, + { + "x": 1730544720000, + "y": 4 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 1 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 1 + }, + { + "x": 1730544870000, + "y": 1 + }, + { + "x": 1730544900000, + "y": 3 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ldwf", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 55, + "pattern": "feature_flag", + "regex": ".*?feature_flag.*?", + "sample": "feature_flag", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "feature_flag" + ] + }, + "firstOccurrence": "2024-11-02T10:36:41.809Z", + "lastOccurrence": "2024-11-02T10:55:32.655Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 1 + }, + { + "x": 1730543820000, + "y": 3 + }, + { + "x": 1730543850000, + "y": 3 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 2 + }, + { + "x": 1730543940000, + "y": 2 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 1 + }, + { + "x": 1730544030000, + "y": 2 + }, + { + "x": 1730544060000, + "y": 1 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 1 + }, + { + "x": 1730544150000, + "y": 1 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 2 + }, + { + "x": 1730544240000, + "y": 1 + }, + { + "x": 1730544270000, + "y": 1 + }, + { + "x": 1730544300000, + "y": 4 + }, + { + "x": 1730544330000, + "y": 2 + }, + { + "x": 1730544360000, + "y": 2 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 1 + }, + { + "x": 1730544450000, + "y": 1 + }, + { + "x": 1730544480000, + "y": 1 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 4 + }, + { + "x": 1730544600000, + "y": 2 + }, + { + "x": 1730544630000, + "y": 2 + }, + { + "x": 1730544660000, + "y": 1 + }, + { + "x": 1730544690000, + "y": 3 + }, + { + "x": 1730544720000, + "y": 4 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 1 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 1 + }, + { + "x": 1730544870000, + "y": 1 + }, + { + "x": 1730544900000, + "y": 3 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "noer", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 2, + "pattern": "OTEL-SDK BatchActivityExportProcessor exporting to OtlpTraceExporter dropped items", + "regex": ".*?OTEL-SDK.+?BatchActivityExportProcessor.+?exporting.+?to.+?OtlpTraceExporter.+?dropped.+?items.*?", + "sample": "OTEL-SDK: [224] 'BatchActivityExportProcessor' exporting to 'OtlpTraceExporter' dropped '0' items.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] 'BatchActivityExportProcessor' exporting to 'OtlpTraceExporter' dropped '0' items." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.580Z", + "lastOccurrence": "2024-11-02T10:55:36.580Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "fiia", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1, + "pattern": "my-otel-demo-valkey open", + "regex": ".*?my-otel-demo-valkey.+?open.*?", + "sample": "my-otel-demo-valkey (10.106.38.244:6379) open\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "my-otel-demo-valkey (10.106.38.244:6379) open\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:34.993Z", + "lastOccurrence": "2024-11-02T10:55:34.993Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "uzyf", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 78, + "pattern": "OTEL-SDK Instrument Meter has been deactivated", + "regex": ".*?OTEL-SDK.+?Instrument.+?Meter.+?has.+?been.+?deactivated.*?", + "sample": "OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.585Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 78 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "lciw", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 4, + "pattern": "OTEL-SDK Disposed", + "regex": ".*?OTEL-SDK.+?Disposed.*?", + "sample": "OTEL-SDK: [224] 'MeterProvider' Disposed.", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "OTEL-SDK: [224] 'MeterProvider' Disposed." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.581Z", + "lastOccurrence": "2024-11-02T10:55:36.586Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 4 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "woqa", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 3, + "pattern": "exiting", + "regex": ".*?exiting.*?", + "sample": "exiting...\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "exiting...\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:55.622Z", + "lastOccurrence": "2024-11-02T10:56:51.618Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 1 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "ucuj", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 2, + "pattern": "Application is shutting down", + "regex": ".*?Application.+?is.+?shutting.+?down.*?", + "sample": "Application is shutting down...", + "highlight": { + "service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "Application is shutting down..." + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.560Z", + "lastOccurrence": "2024-11-02T10:55:36.560Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "cgip", + "relevance": "unusual", + "interesting": true + }, + { + "field": "message", + "count": 3, + "pattern": "FATAL Could not start entrypoint", + "regex": ".*?FATAL.+?Could.+?not.+?start.+?entrypoint.*?", + "sample": "FATAL: Could not start, bad entrypoint!\n", + "highlight": { + "service.name": [ + "cartservice" + ], + "resource.attributes.service.name": [ + "cartservice" + ] + }, + "metadata": { + "message": [ + "FATAL: Could not start, bad entrypoint!\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:35.618Z", + "lastOccurrence": "2024-11-02T10:56:31.617Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 0 + }, + { + "x": 1730544990000, + "y": 1 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 1.8274885276877215e-20, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "reqc", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [ + { + "field": "message", + "count": 26, + "pattern": "I1102 replica_set.go Finished syncing logger replicaset-controller kind ReplicaSet key duration", + "regex": ".*?I1102.+?replica_set\\.go.+?Finished.+?syncing.+?logger.+?replicaset-controller.+?kind.+?ReplicaSet.+?key.+?duration.*?", + "sample": "I1102 10:55:33.013871 1 replica_set.go:679] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice-7b585f4fb7\" duration=\"101.333µs\"\n", + "highlight": { + "body.text": [ + "] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice" + ], + "message": [ + "] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice" + ] + }, + "metadata": { + "service.name": [ + "kube-controller-manager" + ], + "message": [ + "I1102 10:55:33.013871 1 replica_set.go:679] \"Finished syncing\" logger=\"replicaset-controller\" kind=\"ReplicaSet\" key=\"default/my-otel-demo-cartservice-7b585f4fb7\" duration=\"101.333µs\"\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:33.014Z", + "lastOccurrence": "2024-11-02T10:56:52.568Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 19 + }, + { + "x": 1730544960000, + "y": 2 + }, + { + "x": 1730544990000, + "y": 5 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 7.313742705200763e-147, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "ofdx", + "relevance": "normal", + "interesting": true + }, + { + "field": "message", + "count": 21, + "pattern": "object apiVersion events.k8s.io/v1 deprecatedCount deprecatedFirstTimestamp deprecatedLastTimestamp deprecatedSource component eventTime null kind Event metadata creationTimestamp managedFields apiVersion v1 fieldsType FieldsV1 fieldsV1", + "regex": ".*?object.+?apiVersion.+?events\\.k8s\\.io/v1.+?deprecatedCount.+?deprecatedFirstTimestamp.+?deprecatedLastTimestamp.+?deprecatedSource.+?component.+?eventTime.+?null.+?kind.+?Event.+?metadata.+?creationTimestamp.+?managedFields.+?apiVersion.+?v1.+?fieldsType.+?FieldsV1.+?fieldsV1.*?", + "sample": "{\"object\":{\"apiVersion\":\"events.k8s.io/v1\",\"deprecatedCount\":2,\"deprecatedFirstTimestamp\":\"2024-11-02T10:56:17Z\",\"deprecatedLastTimestamp\":\"2024-11-02T10:56:52Z\",\"deprecatedSource\":{\"component\":\"kubelet\",\"host\":\"minikube\"},\"eventTime\":null,\"kind\":\"Event\",\"metadata\":{\"creationTimestamp\":\"2024-11-02T10:56:17Z\",\"managedFields\":[{\"apiVersion\":\"v1\",\"fieldsType\":\"FieldsV1\",\"fieldsV1\":{\"f:count\":{},\"f:firstTimestamp\":{},\"f:involvedObject\":{},\"f:lastTimestamp\":{},\"f:message\":{},\"f:reason\":{},\"f:reportingComponent\":{},\"f:reportingInstance\":{},\"f:source\":{\"f:component\":{},\"f:host\":{}},\"f:type\":{}},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz.1804217ce2cb3041\",\"namespace\":\"default\",\"resourceVersion\":\"375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":\"9580334a-7ef9-4ad5-baf6-0ad10ae49853\"},\"reportingController\":\"kubelet\",\"reportingInstance\":\"minikube\",\"type\":\"Warning\"},\"type\":\"MODIFIED\"}", + "highlight": { + "message": [ + "},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice", + "375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice", + "in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason", + "\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name", + "\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":" + ] + }, + "metadata": { + "service.name": [ + "unknown" + ], + "message": [ + "{\"object\":{\"apiVersion\":\"events.k8s.io/v1\",\"deprecatedCount\":2,\"deprecatedFirstTimestamp\":\"2024-11-02T10:56:17Z\",\"deprecatedLastTimestamp\":\"2024-11-02T10:56:52Z\",\"deprecatedSource\":{\"component\":\"kubelet\",\"host\":\"minikube\"},\"eventTime\":null,\"kind\":\"Event\",\"metadata\":{\"creationTimestamp\":\"2024-11-02T10:56:17Z\",\"managedFields\":[{\"apiVersion\":\"v1\",\"fieldsType\":\"FieldsV1\",\"fieldsV1\":{\"f:count\":{},\"f:firstTimestamp\":{},\"f:involvedObject\":{},\"f:lastTimestamp\":{},\"f:message\":{},\"f:reason\":{},\"f:reportingComponent\":{},\"f:reportingInstance\":{},\"f:source\":{\"f:component\":{},\"f:host\":{}},\"f:type\":{}},\"manager\":\"kubelet\",\"operation\":\"Update\",\"time\":\"2024-11-02T10:56:52Z\"}],\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz.1804217ce2cb3041\",\"namespace\":\"default\",\"resourceVersion\":\"375301\",\"uid\":\"cf211b63-a5e4-40c4-bed3-fe90aea76a38\"},\"note\":\"Back-off restarting failed container cartservice in pod my-otel-demo-cartservice-7b585f4fb7-79ccz_default(9580334a-7ef9-4ad5-baf6-0ad10ae49853)\",\"reason\":\"BackOff\",\"regarding\":{\"apiVersion\":\"v1\",\"fieldPath\":\"spec.containers{cartservice}\",\"kind\":\"Pod\",\"name\":\"my-otel-demo-cartservice-7b585f4fb7-79ccz\",\"namespace\":\"default\",\"resourceVersion\":\"375163\",\"uid\":\"9580334a-7ef9-4ad5-baf6-0ad10ae49853\"},\"reportingController\":\"kubelet\",\"reportingInstance\":\"minikube\",\"type\":\"Warning\"},\"type\":\"MODIFIED\"}" + ] + }, + "firstOccurrence": "2024-11-02T10:55:33.051Z", + "lastOccurrence": "2024-11-02T10:56:52.563Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 16 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 4 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "spike", + "significance": "high", + "change_point": 38, + "p_value": 2.9750218550484796e-168, + "timestamp": "2024-11-02T10:55:30.000Z" + }, + "shortId": "lqan", + "relevance": "critical", + "interesting": true + } + ], + "searches": [ + { + "fragments": [ + "AddItemAsync called with userId", + "productId", + "quantity" + ], + "appearsAs": "userId, productId, and quantity are referenced in the request handling log entries in cartservice." + }, + { + "fragments": [ + "GetCartAsync called with userId" + ], + "appearsAs": "userId is referenced in the request handling log entries in cartservice." + }, + { + "fragments": [ + "Empty cart" + ], + "appearsAs": "Empty cart operation logs in cartservice." + }, + { + "fragments": [ + "cartServiceFailure" + ], + "appearsAs": "Feature flag 'cartServiceFailure' in cartservice." + }, + { + "fragments": [ + "10.244.0.61" + ], + "appearsAs": "IP address of the cartservice pod." + }, + { + "fragments": [ + "my-otel-demo-cartservice-67575c6f84-vngzw" + ], + "appearsAs": "Pod name of cartservice." + }, + { + "fragments": [ + "my-otel-demo-valkey:6379" + ], + "appearsAs": "Redis service used by cartservice." + }, + { + "fragments": [ + "/oteldemo.CartService/AddItem", + "/oteldemo.CartService/GetCart", + "/oteldemo.CartService/EmptyCart" + ], + "appearsAs": "Endpoints handled by cartservice." + }, + { + "fragments": [ + "flagd.evaluation.v1.Service" + ], + "appearsAs": "gRPC service involved in feature flag evaluation by cartservice." + }, + { + "fragments": [ + "HGET", + "EXPIRE", + "HMSET" + ], + "appearsAs": "Redis commands executed by cartservice." + } + ], + "relatedEntitiesSummaries": [ + "Based on the context provided, the following entities are potentially related to `service.name:cartservice` as either upstream or downstream dependencies. Below is a detailed analysis based on the indicators of relationships obtained from the observability data:\n\n### 1. Related Entity: `service.name:controller`\n- **Indicators**: \n - **Average Indicator**: The logs highlight high error rates (500 Internal Server Error) in the controller when POST requests to the `/api/cart` endpoint are made.\n - **Field values**: In controller logs, `message` contains references to the `/api/cart` endpoint, which is handled by `cartservice`.\n- **Reasoning**: \n - The `controller` service is responsible for managing /api/cart requests. Thus, it acts as a direct upstream service to `cartservice`.\n - It implies `controller` calls `cartservice` as part of handling requests, leading to the observed errors.\n- **Likeliness**: High\n - The `controller` service is likely a highly relevant entity since it directly interfaces with `cartservice`, with the errors surfacing in it when interacting with `cartservice`.\n\n### 2. Related Entity: `service.name:frontend`\n- **Indicators**:\n - **Strong Indicator**: The system architecture documentation shows that `frontend` interacts with various backend services, including those for user interactions such as adding items to the cart.\n - **Field values**: In the knowledge base, `frontend` receives HTTP traffic routed through `frontendproxy` and interacts with `cartservice`.\n- **Reasoning**:\n - `frontend` would act as a direct upstream service that communicates with `controller`, providing requests that eventually hit `cartservice`.\n- **Likeliness**: High\n - As `frontend` handles initial user interactions and routes requests to `cartservice`, it is essential in the request flow leading to `/api/cart` errors.\n\n### 3. Related Entity: `service.name:currencyservice`\n- **Indicators**:\n - **Strong Indicator**: The architecture description indicates `currencyservice` interacts with `cartservice` to handle currency conversions.\n - **Field values**: The entries show gRPC calls between `cartservice` and `currencyservice` for conversion operations.\n- **Reasoning**:\n - `currencyservice` may be an upstream dependency for `cartservice` for any currency-related manipulations on cart items.\n- **Likeliness**: Medium-High\n - While `currencyservice` interacts with `cartservice`, its exact influence on the direct issue might not be as strong as `controller` or `frontend`.\n\n### 4. Related Entity: `service.name:checkoutservice`\n- **Indicators**:\n - **Strong Indicator**: The architecture documentation lists `checkoutservice` as coordinating the checkout process involving interactions with `cartservice`.\n - **Field values**: Distributed tracing data could show `checkoutservice` making gRPC or HTTP calls to `cartservice`.\n- **Reasoning**:\n - `checkoutservice` is likely a downstream service that relies on the correct functioning of `cartservice` to gather cart data before proceeding to checkout operations.\n- **Likeliness**: Medium\n - Although primarily a downstream dependency, it processes important data from `cartservice`.\n\n### 5. Related Entity: `service.name:adservice`\n- **Indicators**:\n - **Weak Indicator**: `adservice` interfaces with `frontend` via gRPC to deliver ad content, it also might indirectly influence `cartservice` interactions if ads are part of the cart page.\n - **Field values**: Architectural details state `adservice` operates in the background to enhance user experience but no direct interaction with `cartservice`.\n- **Reasoning**:\n - While not directly connected, any latency in `adservice` may affect the overall user experience, causing indirect latency in calls that reach `cartservice`.\n- **Likeliness**: Low-Medium\n - Indirect connection and latent repercussions on user interactions with cart functionality.\n\n### 6. Related Entity: `service.name:loadgenerator`\n- **Indicators**:\n - **Average Indicator**: The `loadgenerator` service generates simulated traffic, handled by `frontendproxy` to stress-test services like `cartservice`.\n - **Field values**: Traffic from `loadgenerator` is routed through `frontendproxy` to reach core services, including `cartservice`.\n- **Reasoning**:\n - As an external input simulator, its traffic generation directly impacts `cartservice` when testing load resilience.\n- **Likeliness**: Medium\n - The errors can be a result of load tests conducted by `loadgenerator` surfacing bottlenecks in `cartservice`.\n\n### Summary\n\nTo conclude, the following entities are likely relevant to the issue in `service.name:cartservice`:\n\n1. **controller**: Direct upstream causing the 500 errors - High relevance.\n2. **frontend**: Upstream service initiating user interactions - High relevance.\n3. **currencyservice**: Upstream service for currency conversion - Medium-High relevance.\n4. **checkoutservice**: Downstream service relying on cart data - Medium relevance.\n5. **adservice**: Possible indirect impact due to latency - Low-Medium relevance.\n6. **loadgenerator**: Simulated traffic testing the system's resilience - Medium relevance.\n\nBy analyzing these entities, you can prioritize investigating `controller`, `frontend`, and `currencyservice` first, as they have the most direct and potentially impactful interactions with `cartservice`.", + "Based on the context provided, the architecture outlined in the knowledge base entry, the document analysis for the `cartservice`, and search keywords, several possible relationships with the `cartservice` can be identified. Below are the entities identified as highly relevant to the `cartservice`, along with the indicators, evidence, reasoning, and overall relevance.\n\n### 1. Entity: `service.name:frontend`\n\n#### Indicators and Evidence\n- **Average Indicator - URL Paths:**\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/AddItem`\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart`\n - `http://my-otel-demo-cartservice:8080/oteldemo.CartService/EmptyCart`\n- **Average Indicator - Logs:**\n - Observed in the search highlights for `service.name:frontend`: `@opentelemetry/api/build/src/api/context.js:60:46) at ServiceClientImpl.clientMethodTrace [as getCart]`\n- **Knowledge Base:**\n - The `frontend` is the core service for user interactions, receiving HTTP traffic and making requests to the `cartservice`.\n\n#### Relationship and Reasoning\nThe `frontend` service interacts directly with the `cartservice` by making requests to handle user shopping cart operations, such as adding items, retrieving the cart, and emptying the cart. Given the observed log entries and URL paths, it is reasonable to infer that the `frontend` service acts as a caller, while the `cartservice` acts as the callee. The high error rates observed in the context likely originate from the `frontend` making POST requests to the `/api/cart` endpoint handled by the `cartservice`.\n\n#### Likelihood of Relevance\n- **Very High:** The `frontend` service is highly likely to be relevant as it has a direct interaction with the `cartservice` and would be significantly impacted by any issues within the `cartservice`.\n\n### 2. Entity: `service.name:valkey`\n\n#### Indicators and Evidence\n- **Average Indicator - Redis Service:**\n - `my-otel-demo-valkey:6379`\n- **Average Indicator - Log Entries:**\n - Log patterns show Redis command execution by `cartservice`: `HGET`, `EXPIRE`, `HMSET`\n\n#### Relationship and Reasoning\nThe `valkey` service corresponds to the Redis instance that `cartservice` uses for data persistence. The `cartservice` makes frequent calls to Redis to perform operations such as fetching, expiring, and setting data, which is crucial for managing shopping cart state. As Redis is an upstream dependency, connection or data integrity issues here could directly contribute to the `500 Internal Server Errors` observed in the `cartservice`.\n\n#### Likelihood of Relevance\n- **High:** The `valkey` service is highly relevant because it is the data store for `cartservice`, directly affecting its behavior and performance.\n\n### 3. Entity: `service.name:currencyservice`\n\n#### Indicators and Evidence\n- **Average Indicator - gRPC Services:**\n - Mentioned in the architecture as handling currency conversions.\n- **Knowledge Base:**\n - Communicates with `cartservice` over gRPC to facilitate currency operations.\n\n#### Relationship and Reasoning\nThe `currencyservice` may interact with `cartservice` to handle price conversions for the items in the cart. Problems with currency conversion could cause unexpected data formats or communication failures, leading to `500 Internal Server Errors` in `cartservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** While the interaction exists, whether it directly contributes to the 500 errors specifically observed at `/api/cart` endpoints would require further investigation.\n\n### 4. Entity: `service.name:checkoutservice`\n\n#### Indicators and Evidence\n- **Strong Indicator - gRPC Services:**\n - Interacts with `checkoutservice` during the checkout process.\n- **Knowledge Base:**\n - Handles interactions including calls to `cartservice` for finalizing orders.\n\n#### Relationship and Reasoning\nThe `checkoutservice` could potentially depend on `cartservice` to validate and finalize items in the user's cart as part of the checkout process. If the `cartservice` fails during these operations, it could propagate errors back up to the user level through `checkoutservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** The relevance depends on whether the observed errors are occurring during checkout processes or general cart manipulations.\n\n### 5. Entity: `service.name:flagd`\n\n#### Indicators and Evidence\n- **Average Indicator - Feature Flag Evaluation:**\n - `flagd.evaluation.v1.Service`\n- **Log Metadata:**\n - `labels.feature_flag_key:keyword - cartServiceFailure`\n- **Trace Destination Service:**\n - `my-otel-demo-flagd:8013`\n\n#### Relationship and Reasoning\nThe `flagd` service is responsible for evaluating feature flags, such as the `cartServiceFailure`. If the feature flagging system is improperly toggled (e.g.: feature flag set to fail operations), it might contribute to the high error rates observed in `cartservice`.\n\n#### Likelihood of Relevance\n- **Moderate:** The feature flags might be causing or amplifying existing issues within the `cartservice`.\n\n### Summary\nFrom the observed data, the following entities are highly likely to be related to `cartservice` with respect to the given context:\n\n1. **`frontend`** - High relevance due to direct HTTP interaction with `cartservice`.\n2. **`valkey`** - High relevance as the Redis data store for `cartservice`.\n\nEntities with moderate likelihood:\n\n1. **`currencyservice`** - Related via gRPC for currency operations.\n2. **`checkoutservice`** - Potentially related during checkout processes.\n3. **`flagd`** - Related to feature flag evaluations involving `cartservice`.\n\nThese relationships cover both upstream dependencies affecting `cartservice` and downstream services impacted by `cartservice` issues, aiding in comprehensive issue diagnosis." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 5 + } + ] + }, + "entity": { + "service.name": "cartservice" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "controller" + }, + "reason": "The controller service is responsible for managing /api/cart requests. Thus, it acts as a direct upstream service to cartservice. High errors were observed in the controller when interacting with cartservice.", + "confidence": "high" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The frontend service interacts directly with the controller, handling user interactions that lead to /api/cart requests which are eventually processed by cartservice.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "The currencyservice interacts with cartservice to handle currency conversions, acting as an upstream dependency for cartservice when performing currency-related operations on cart items.", + "confidence": "medium-high" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "The checkoutservice relies on cart data from cartservice during the checkout process, making it a downstream service dependent on the correct functioning of cartservice.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "loadgenerator" + }, + "reason": "The loadgenerator service generates simulated traffic which impacts cartservice directly when testing the system's resilience.", + "confidence": "medium" + }, + { + "entity": { + "service.name": "adservice" + }, + "reason": "The adservice might indirectly impact cartservice through the frontend service by affecting the overall user experience and potentially causing indirect latency.", + "confidence": "low-medium" + }, + { + "entity": { + "service.name": "frontend" + }, + "reason": "The `frontend` service is highly likely to be relevant as it has a direct interaction with the `cartservice` and would be significantly impacted by any issues within the `cartservice`.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "valkey" + }, + "reason": "The `valkey` service is highly relevant because it is the data store for `cartservice`, directly affecting its behavior and performance.", + "confidence": "high" + }, + { + "entity": { + "service.name": "currencyservice" + }, + "reason": "The `currencyservice` may interact with `cartservice` to handle price conversions for the items in the cart. Problems with currency conversion could cause unexpected data formats or communication failures, leading to `500 Internal Server Error` in `cartservice`.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "checkoutservice" + }, + "reason": "The `checkoutservice` could potentially depend on `cartservice` to validate and finalize items in the user's cart as part of the checkout process. If the `cartservice` fails during these operations, it could propagate errors back up to the user level through `checkoutservice`.", + "confidence": "moderate" + } + ], + "summary": "Based on the context provided, including the system architecture and data samples, the entity `service.name:cartservice` can be described as follows:\n\n### Infrastructure & Environment\n`cartservice` operates within a microservices-based architecture. It is containerized and runs within a Kubernetes environment. Specifically, the data sample indicates that the service is deployed in the `default` Kubernetes namespace and is running on a pod named `my-otel-demo-cartservice-67575c6f84-vngzw` with an IP address of `10.244.0.61`. The service environment is tagged as `opentelemetry-demo`, indicating the use of OpenTelemetry for observability purposes. The infrastructure includes a Redis cache used by the cart service for data persistence.\n\n### Communication Characteristics (Protocols and Endpoints)\n`cartservice` primarily communicates using gRPC. It handles shopping cart data including adding and removing items. The service interacts with Redis (as indicated by the `labels.db_redis_flags` attribute). Additionally, the service integrates with external feature flag providers as seen from the `feature_flag_key` and `feature_flag_provider_name` attributes, which suggests it evaluates feature flags, possibly targeting the feature key `cartServiceFailure`. \n\nThe communication endpoints and protocols from the sample data are:\n- gRPC methods: `/oteldemo.CartService/EmptyCart`, `/oteldemo.CartService/AddItem`, `/oteldemo.CartService/GetCart`\n- HTTP endpoint: `http://my-otel-demo-flagd:8013/flagd.evaluation.v1.Service/ResolveBoolean`\n- Redis database interactions: `HGET`, `HMSET`, `EXPIRE` commands targeting various identifiers like `my-otel-demo-valkey:6379`.\n\n### Context of Entity in Investigation\nThe cart service (`cartservice`) is being investigated as it is the downstream service handling `/api/cart` requests. Issues in `cartservice` are directly contributing to the error patterns in the controller layer for the `/api/cart` endpoint. A high error rate, culminating in `500 Internal Server Error` responses, started around `2024-11-02T10:56:42.498Z`. The context indicates that the cart service plays a crucial role in managing shopping cart operations.\n\nGiven the observed high error rates and the 500 errors, `cartservice` may be experiencing issues in processing gRPC calls, feature flag evaluations, or Redis caching, which could be causing failures that propagate upstream to the controller.\n\n### Related Entities and Previous Investigations\nPreviously, errors in the controller for POST requests to `/api/cart` have been observed. This investigation is a continuation, focusing on the downstream `cartservice` which is responsible for handling these requests. This service's interaction with other entities such as Redis (for caching cart data) and external feature flag services (potentially toggling functionality) suggest that any issues in these integrations could impact the `cartservice` performance and result in the observed errors upstream in the controller.\n\nThis detailed analysis moves us to further inspect specific logs, traces, and any interdependent service metrics that may provide clues about the root cause of the high error rate in `cartservice`.\n\n### Observations for service.name:cartservice\n\n#### Startup Messages\n1. **Pattern: \"OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.\"**\n - **Count:** 78\n - **Trend:** There was a spike at **2024-11-02T10:55:30.000Z** with 78 occurrences. Before this timestamp, the value was consistently 0.\n\n2. **Pattern: \"OTEL-SDK: [224] 'MeterProvider' Disposed.\"**\n - **Count:** 4\n - **Trend:** Occurrences were persistently recorded as 0 until a brief spike at **2024-11-02T10:55:30.000Z** with 4 instances.\n\n3. **Pattern: \"exiting...\"**\n - **Count:** 3\n - **Trend:** Consistently 0 until a minor rise at **2024-11-02T10:55:30.000Z**, with 3 instances recorded.\n\n4. **Pattern: \"Application is shutting down...\"**\n - **Count:** 2\n - **Trend:** Log entries were consistently at 0 with a brief spike at **2024-11-02T10:55:30.000Z** showing 2 instances.\n\n#### Fatal Errors\n5. **Pattern: \"FATAL: Could not start, bad entrypoint!\"**\n - **Count:** 3\n - **Trend:** This pattern shows a significant change at **2024-11-02T10:55:30.000Z** with 2 occurrences at the change point and one additional at **2024-11-02T10:56:30.000Z**. Before this period, the occurrence rate was consistently 0.\n\n### Conclusion\n\nThe patterns observed indicate that there were significant startup issues with the `cartservice` around the **2024-11-02T10:55:30.000Z** mark. Specifically, the `cartservice` encountered a fatal error (\"FATAL: Could not start, bad entrypoint!\") that aligns precisely with the rise in errors noted during this period. This error indicates that the service failed to start due to a misconfiguration or issues with the entry point. This corresponds with the sudden spike in errors and `500 Internal Server Error` responses observed in the downstream controller making POST requests to the `/api/cart` endpoint. Furthermore, logs related to the service's deactivation and shutdown support the perspective that the service was not running successfully or consistently after this time.\n\n### Timeline of significant events\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “OTEL-SDK: [224] Instrument 'process.runtime.dotnet.jit.compilation_time', Meter 'OpenTelemetry.Instrumentation.Runtime' has been deactivated.”\n - **Description**: Spike detected with 78 occurrences, suggesting a potential issue within the service's runtime environment.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “OTEL-SDK: [224] 'MeterProvider' Disposed.”\n - **Description**: Sudden spike of 4 occurrences suggesting a significant change in the service’s telemetry setup or a potential abnormal shutdown initiation.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “exiting...”\n - **Description**: Log entries rose to 3 incidences, signaling the cart service may be initiating an unexpected exit.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “Application is shutting down...”\n - **Description**: 2 instances of this log message indicating the application is attempting to shut down around this time.\n\n- **2024-11-02T10:55:30.000Z**: \n - **Log Message**: “FATAL: Could not start, bad entrypoint!”\n - **Description**: Abrupt fatal error occurs 2 times, implying a critical misconfiguration or startup failure.\n\n- **2024-11-02T10:56:30.000Z**: \n - **Log Message**: “FATAL: Could not start, bad entrypoint!”\n - **Description**: Another occurrence of the critical fatal error, indicating the issue persisted beyond the initial spike and may have contributed to continued disruptions.\n\n- **2024-11-02T10:56:42.498Z**: \n - **Alert**: High error rate observed in the controller (98.78% error rate for POST requests to /api/cart).\n - **Description**: Starts the period of significantly high error rates contributing to 500 Internal Server Error responses. \n\n### Context and reasoning\n\nGiven the high error rate (98.78%) starting from **2024-11-02T10:56:42.498Z** for POST requests to `/api/cart`, the `cartservice` is a critical component for investigation as it handles these requests directly. From logs, there is a clear indication that significant problems began around **2024-11-02T10:55:30.000Z**, approximately one minute before the heightened error rate was observed in the upstream controller.\n\n**1. Startup Issues**: Multiple startup-related log patterns such as instruments deactivation, disposal of the MeterProvider, and application exit/shutdown messages indicate that cartservice experienced troubles initializing. Notably, the log entries “FATAL: Could not start, bad entrypoint!” directly show critical failures in service entry points likely causing the downstream failures.\n\n**2. Fatal Errors**: The fatal errors precisely correlate with the timeframe where errors spiked in the upstream service (controller). The error messages “FATAL: Could not start, bad entrypoint!” occurring multiple times reflect a misconfiguration or critical code issue that prevented the service from running properly.\n\nThus, the evidence from log patterns strongly suggests that startup and initialization problems in `cartservice`, resulting in its inconsistent availability and operational failures, is the root cause of the observed high error rates and 500 Internal Server Errors in the controller's POST requests to `/api/cart`." + }, + { + "attachments": { + "alerts": [], + "slos": [], + "analysis": { + "total": 98017, + "sampled": 1000, + "fields": [ + "@timestamp:date - 327 distinct values", + "app.label.component:keyword - 1 distinct values (`frontend`)", + "attributes.log.file.path.text:text - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "attributes.log.file.path:keyword - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "attributes.log.iostream:keyword - 1 distinct values (`stderr`)", + "body.text:text - 36 distinct values (` code: 13,\n`, ` details: 'cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"',\n`, 34 more values)", + "data_stream.dataset:keyword - 1 distinct values (`generic.otel`)", + "data_stream.namespace:keyword - 1 distinct values (`default`)", + "data_stream.type:keyword - 1 distinct values (`logs`)", + "deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "dropped_attributes_count:long - 1 distinct values (`0`)", + "event.dataset:keyword - 1 distinct values (`generic.otel`)", + "host.arch:keyword - 1 distinct values (`arm64`)", + "host.architecture:keyword - 1 distinct values (`arm64`)", + "host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "host.cpu.family:keyword - 1 distinct values (``)", + "host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "host.cpu.model.name:keyword - 1 distinct values (``)", + "host.cpu.stepping:keyword - 1 distinct values (`0`)", + "host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "host.os.full:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "host.os.platform:keyword - 1 distinct values (`linux`)", + "k8s.container.name:keyword - 1 distinct values (`frontend`)", + "k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "k8s.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "k8s.namespace.name:keyword - 1 distinct values (`default`)", + "k8s.node.name:keyword - 1 distinct values (`minikube`)", + "k8s.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T09:00:25Z`)", + "k8s.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "kubernetes.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "kubernetes.namespace:keyword - 1 distinct values (`default`)", + "kubernetes.node.name:keyword - 1 distinct values (`minikube`)", + "kubernetes.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "kubernetes.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "log.file.path:keyword - 1 distinct values (`/var/log/pods/default_my-otel-demo-frontend-5bbf4d78bc-qtwdr_a9fcfff9-3524-411e-8ac9-4e4341c1a121/frontend/0.log`)", + "log.iostream:keyword - 1 distinct values (`stderr`)", + "message:text - 36 distinct values (` code: 13,\n`, ` details: 'cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"',\n`, 34 more values)", + "observed_timestamp:date_nanos - 1000 distinct values (`2024-11-02T10:56:50.564274379Z`, `2024-11-02T10:55:46.563453752Z`, `2024-11-02T10:56:52.76272763Z`, `2024-11-02T10:56:14.764728626Z`, `2024-11-02T10:55:51.964080879Z`, `2024-11-02T10:55:51.764216463Z`, `2024-11-02T10:56:50.564063629Z`, `2024-11-02T10:55:38.563141804Z`, `2024-11-02T10:55:53.963540672Z`, `2024-11-02T10:56:32.762493176Z`, 990 more values)", + "os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.app.label.component:keyword - 1 distinct values (`frontend`)", + "resource.attributes.deployment.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "resource.attributes.host.arch:keyword - 1 distinct values (`arm64`)", + "resource.attributes.host.cpu.cache.l2.size:long - 1 distinct values (`0`)", + "resource.attributes.host.cpu.family:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.model.id:keyword - 1 distinct values (`0x000`)", + "resource.attributes.host.cpu.model.name.text:text - 1 distinct values (``)", + "resource.attributes.host.cpu.model.name:keyword - 1 distinct values (``)", + "resource.attributes.host.cpu.stepping:keyword - 1 distinct values (`0`)", + "resource.attributes.host.cpu.vendor.id:keyword - 1 distinct values (`Apple`)", + "resource.attributes.host.ip:ip - 2 distinct values (`10.244.0.19`, `fe80::28ce:acff:fe42:368e`)", + "resource.attributes.host.mac:keyword - 1 distinct values (`2A-CE-AC-42-36-8E`)", + "resource.attributes.host.name:keyword - 1 distinct values (`otel-daemonset-opentelemetry-collector-agent-7jlpk`)", + "resource.attributes.k8s.container.name.text:text - 1 distinct values (`frontend`)", + "resource.attributes.k8s.container.name:keyword - 1 distinct values (`frontend`)", + "resource.attributes.k8s.container.restart_count:keyword - 1 distinct values (`0`)", + "resource.attributes.k8s.deployment.name:keyword - 1 distinct values (`my-otel-demo-frontend`)", + "resource.attributes.k8s.namespace.name:keyword - 1 distinct values (`default`)", + "resource.attributes.k8s.node.name:keyword - 1 distinct values (`minikube`)", + "resource.attributes.k8s.pod.name:keyword - 1 distinct values (`my-otel-demo-frontend-5bbf4d78bc-qtwdr`)", + "resource.attributes.k8s.pod.start_time:keyword - 1 distinct values (`2024-10-26T09:00:25Z`)", + "resource.attributes.k8s.pod.uid:keyword - 1 distinct values (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`)", + "resource.attributes.os.description:keyword - 1 distinct values (`Ubuntu 24.04.1 LTS (Noble Numbat) (Linux otel-daemonset-opentelemetry-collector-agent-7jlpk 6.10.4-linuxkit #1 SMP Wed Oct 2 16:38:00 UTC 2024 aarch64)`)", + "resource.attributes.os.type:keyword - 1 distinct values (`linux`)", + "resource.attributes.service.name.text:text - 1 distinct values (`frontend`)", + "resource.attributes.service.name:keyword - 1 distinct values (`frontend`)", + "resource.dropped_attributes_count:long - 1 distinct values (`0`)", + "resource.schema_url:keyword - 1 distinct values (`https://opentelemetry.io/schemas/1.6.1`)", + "scope.dropped_attributes_count:long - 1 distinct values (`0`)", + "service.environment:keyword - 1 distinct values (`opentelemetry-demo`)", + "service.name:keyword - 1 distinct values (`frontend`)", + "severity_number:byte - 1 distinct values (`0`)" + ] + }, + "ownPatterns": [ + { + "field": "message", + "count": 3756, + "pattern": "at", + "regex": ".*?at.*?", + "sample": " at /app/.next/server/pages/api/cart.js:1:1025\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at /app/.next/server/pages/api/cart.js:1:1025\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 1103 + }, + { + "x": 1730544960000, + "y": 1411 + }, + { + "x": 1730544990000, + "y": 1242 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "qlqu", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 2366, + "pattern": "at app node_modules grpc grpc js build src", + "regex": ".*?at.+?app.+?node_modules.+?grpc.+?grpc.+?js.+?build.+?src.*?", + "sample": " at Object.onReceiveStatus (/app/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at Object.onReceiveStatus (/app/node_modules/@grpc/grpc-js/build/src/client-interceptors.js:360:141)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 693 + }, + { + "x": 1730544960000, + "y": 896 + }, + { + "x": 1730544990000, + "y": 777 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "roms", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1014, + "pattern": "at app node_modules opentelemetry instrumentation grpc build src", + "regex": ".*?at.+?app.+?node_modules.+?opentelemetry.+?instrumentation.+?grpc.+?build.+?src.*?", + "sample": " at ServiceClientImpl.clientMethodTrace [as getCart] (/app/node_modules/@opentelemetry/instrumentation-grpc/build/src/instrumentation.js:211:42)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl.clientMethodTrace [as getCart] (/app/node_modules/@opentelemetry/instrumentation-grpc/build/src/instrumentation.js:211:42)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 297 + }, + { + "x": 1730544960000, + "y": 384 + }, + { + "x": 1730544990000, + "y": 333 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "eyqk", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 351, + "pattern": "at node:internal process task_queues", + "regex": ".*?at.+?node:internal.+?process.+?task_queues.*?", + "sample": " at process.processTicksAndRejections (node:internal/process/task_queues:77:11)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at process.processTicksAndRejections (node:internal/process/task_queues:77:11)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 104 + }, + { + "x": 1730544960000, + "y": 129 + }, + { + "x": 1730544990000, + "y": 118 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "mpyi", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at AsyncLocalStorage.run node:async_hooks 346 14", + "regex": ".*?at.+?AsyncLocalStorage\\.run.+?node:async_hooks.+?346.+?14.*?", + "sample": " at AsyncLocalStorage.run (node:async_hooks:346:14)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at AsyncLocalStorage.run (node:async_hooks:346:14)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jsoq", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at AsyncLocalStorageContextManager.with app node_modules opentelemetry context async hooks build src AsyncLocalStorageContextManager.js 33 40", + "regex": ".*?at.+?AsyncLocalStorageContextManager\\.with.+?app.+?node_modules.+?opentelemetry.+?context.+?async.+?hooks.+?build.+?src.+?AsyncLocalStorageContextManager\\.js.+?33.+?40.*?", + "sample": " at AsyncLocalStorageContextManager.with (/app/node_modules/@opentelemetry/context-async-hooks/build/src/AsyncLocalStorageContextManager.js:33:40)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at AsyncLocalStorageContextManager.with (/app/node_modules/@opentelemetry/context-async-hooks/build/src/AsyncLocalStorageContextManager.js:33:40)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "viwd", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ContextAPI.with app node_modules opentelemetry api build src api context.js 60 46", + "regex": ".*?at.+?ContextAPI\\.with.+?app.+?node_modules.+?opentelemetry.+?api.+?build.+?src.+?api.+?context\\.js.+?60.+?46.*?", + "sample": " at ContextAPI.with (/app/node_modules/@opentelemetry/api/build/src/api/context.js:60:46)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ContextAPI.with (/app/node_modules/@opentelemetry/api/build/src/api/context.js:60:46)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "agsa", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at new ZoneAwarePromise app node_modules zone.js bundles zone.umd.js 1340 33", + "regex": ".*?at.+?new.+?ZoneAwarePromise.+?app.+?node_modules.+?zone\\.js.+?bundles.+?zone\\.umd\\.js.+?1340.+?33.*?", + "sample": " at new ZoneAwarePromise (/app/node_modules/zone.js/bundles/zone.umd.js:1340:33) {\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at new ZoneAwarePromise (/app/node_modules/zone.js/bundles/zone.umd.js:1340:33) {\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "dhes", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "code", + "regex": ".*?code.*?", + "sample": " code: 14,\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " code: 14,\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "feod", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "for call at", + "regex": ".*?for.+?call.+?at.*?", + "sample": "for call at\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + "for call at\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "enzg", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ServiceClientImpl anonymous /app/node_modules/@grpc/grpc-js/build/src/make-client.js", + "regex": ".*?at.+?ServiceClientImpl.+?anonymous.+?/app/node_modules/@grpc/grpc-js/build/src/make-client\\.js.*?", + "sample": " at ServiceClientImpl. (/app/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl. (/app/node_modules/@grpc/grpc-js/build/src/make-client.js:105:19)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "iook", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 338, + "pattern": "at ServiceClientImpl.makeUnaryRequest /app/node_modules/@grpc/grpc-js/build/src/client.js", + "regex": ".*?at.+?ServiceClientImpl\\.makeUnaryRequest.+?/app/node_modules/@grpc/grpc-js/build/src/client\\.js.*?", + "sample": " at ServiceClientImpl.makeUnaryRequest (/app/node_modules/@grpc/grpc-js/build/src/client.js:161:32)\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " at ServiceClientImpl.makeUnaryRequest (/app/node_modules/@grpc/grpc-js/build/src/client.js:161:32)\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 99 + }, + { + "x": 1730544960000, + "y": 128 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "jvlx", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 335, + "pattern": "metadata Metadata internalRepr Map 0 options", + "regex": ".*?metadata.+?Metadata.+?internalRepr.+?Map.+?0.+?options.*?", + "sample": " metadata: Metadata { internalRepr: Map(0) {}, options: {} }\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " metadata: Metadata { internalRepr: Map(0) {}, options: {} }\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.678Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 97 + }, + { + "x": 1730544960000, + "y": 127 + }, + { + "x": 1730544990000, + "y": 111 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "wczz", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "internalRepr Map content-type Array", + "regex": ".*?internalRepr.+?Map.+?content-type.+?Array.*?", + "sample": " internalRepr: Map(1) { 'content-type' => [Array] },\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " internalRepr: Map(1) { 'content-type' => [Array] },\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "pcyb", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "metadata Metadata", + "regex": ".*?metadata.+?Metadata.*?", + "sample": " metadata: Metadata {\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " metadata: Metadata {\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "tcak", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 3, + "pattern": "options", + "regex": ".*?options.*?", + "sample": " options: {}\n", + "highlight": { + "service.name": [ + "frontend" + ], + "resource.attributes.service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + " options: {}\n" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.039Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "xbsw", + "relevance": "normal", + "interesting": false + }, + { + "field": "message", + "count": 1005, + "pattern": "No connection established Last error connect ECONNREFUSED 10.110.191.164 8080 2024 11 02T10", + "regex": ".*?No.+?connection.+?established.+?Last.+?error.+?connect.+?ECONNREFUSED.+?10\\.110\\.191\\.164.+?8080.+?2024.+?11.+?02T10.*?", + "sample": "14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)", + "highlight": { + "service.name": [ + "frontend" + ] + }, + "metadata": { + "message": [ + "14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)" + ] + }, + "firstOccurrence": "2024-11-02T10:55:36.676Z", + "lastOccurrence": "2024-11-02T10:56:59.370Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 291 + }, + { + "x": 1730544960000, + "y": 381 + }, + { + "x": 1730544990000, + "y": 333 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "pssf", + "relevance": "critical", + "interesting": true + }, + { + "field": "error.exception.message", + "count": 3, + "pattern": "INTERNAL cart failure failed to get user cart during checkout rpc error code Unavailable desc connection error desc transport Error while dialing dial tcp connect connection refused", + "regex": ".*?INTERNAL.+?cart.+?failure.+?failed.+?to.+?get.+?user.+?cart.+?during.+?checkout.+?rpc.+?error.+?code.+?Unavailable.+?desc.+?connection.+?error.+?desc.+?transport.+?Error.+?while.+?dialing.+?dial.+?tcp.+?connect.+?connection.+?refused.*?", + "sample": "13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"", + "highlight": { + "service.name": [ + "frontend" + ] + }, + "metadata": { + "error.exception.message": [ + "13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"" + ] + }, + "firstOccurrence": "2024-11-02T10:55:43.710Z", + "lastOccurrence": "2024-11-02T10:56:15.038Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 0 + }, + { + "x": 1730543820000, + "y": 0 + }, + { + "x": 1730543850000, + "y": 0 + }, + { + "x": 1730543880000, + "y": 0 + }, + { + "x": 1730543910000, + "y": 0 + }, + { + "x": 1730543940000, + "y": 0 + }, + { + "x": 1730543970000, + "y": 0 + }, + { + "x": 1730544000000, + "y": 0 + }, + { + "x": 1730544030000, + "y": 0 + }, + { + "x": 1730544060000, + "y": 0 + }, + { + "x": 1730544090000, + "y": 0 + }, + { + "x": 1730544120000, + "y": 0 + }, + { + "x": 1730544150000, + "y": 0 + }, + { + "x": 1730544180000, + "y": 0 + }, + { + "x": 1730544210000, + "y": 0 + }, + { + "x": 1730544240000, + "y": 0 + }, + { + "x": 1730544270000, + "y": 0 + }, + { + "x": 1730544300000, + "y": 0 + }, + { + "x": 1730544330000, + "y": 0 + }, + { + "x": 1730544360000, + "y": 0 + }, + { + "x": 1730544390000, + "y": 0 + }, + { + "x": 1730544420000, + "y": 0 + }, + { + "x": 1730544450000, + "y": 0 + }, + { + "x": 1730544480000, + "y": 0 + }, + { + "x": 1730544510000, + "y": 0 + }, + { + "x": 1730544540000, + "y": 0 + }, + { + "x": 1730544570000, + "y": 0 + }, + { + "x": 1730544600000, + "y": 0 + }, + { + "x": 1730544630000, + "y": 0 + }, + { + "x": 1730544660000, + "y": 0 + }, + { + "x": 1730544690000, + "y": 0 + }, + { + "x": 1730544720000, + "y": 0 + }, + { + "x": 1730544750000, + "y": 0 + }, + { + "x": 1730544780000, + "y": 0 + }, + { + "x": 1730544810000, + "y": 0 + }, + { + "x": 1730544840000, + "y": 0 + }, + { + "x": 1730544870000, + "y": 0 + }, + { + "x": 1730544900000, + "y": 0 + }, + { + "x": 1730544930000, + "y": 2 + }, + { + "x": 1730544960000, + "y": 1 + }, + { + "x": 1730544990000, + "y": 0 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "crai", + "relevance": "critical", + "interesting": true + } + ], + "patternsFromOtherEntities": [ + { + "field": "message", + "count": 246, + "pattern": "INFO 10.244.0.26 AAAA IN my otel demo frontend udp 39 false 512 NXDOMAIN qr rd ra 39", + "regex": ".*?INFO.+?10\\.244\\.0\\.26.+?AAAA.+?IN.+?my.+?otel.+?demo.+?frontend.+?udp.+?39.+?false.+?512.+?NXDOMAIN.+?qr.+?rd.+?ra.+?39.*?", + "sample": "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\n", + "highlight": { + "body.text": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra" + ], + "message": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra" + ] + }, + "metadata": { + "service.name": [ + "coredns" + ], + "message": [ + "[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\n" + ] + }, + "firstOccurrence": "2024-11-02T10:36:30.917Z", + "lastOccurrence": "2024-11-02T10:56:58.646Z", + "timeseries": [ + { + "x": 1730543790000, + "y": 6 + }, + { + "x": 1730543820000, + "y": 6 + }, + { + "x": 1730543850000, + "y": 6 + }, + { + "x": 1730543880000, + "y": 6 + }, + { + "x": 1730543910000, + "y": 6 + }, + { + "x": 1730543940000, + "y": 6 + }, + { + "x": 1730543970000, + "y": 6 + }, + { + "x": 1730544000000, + "y": 6 + }, + { + "x": 1730544030000, + "y": 6 + }, + { + "x": 1730544060000, + "y": 6 + }, + { + "x": 1730544090000, + "y": 6 + }, + { + "x": 1730544120000, + "y": 6 + }, + { + "x": 1730544150000, + "y": 6 + }, + { + "x": 1730544180000, + "y": 6 + }, + { + "x": 1730544210000, + "y": 6 + }, + { + "x": 1730544240000, + "y": 6 + }, + { + "x": 1730544270000, + "y": 6 + }, + { + "x": 1730544300000, + "y": 6 + }, + { + "x": 1730544330000, + "y": 6 + }, + { + "x": 1730544360000, + "y": 6 + }, + { + "x": 1730544390000, + "y": 6 + }, + { + "x": 1730544420000, + "y": 6 + }, + { + "x": 1730544450000, + "y": 6 + }, + { + "x": 1730544480000, + "y": 6 + }, + { + "x": 1730544510000, + "y": 6 + }, + { + "x": 1730544540000, + "y": 6 + }, + { + "x": 1730544570000, + "y": 6 + }, + { + "x": 1730544600000, + "y": 6 + }, + { + "x": 1730544630000, + "y": 6 + }, + { + "x": 1730544660000, + "y": 6 + }, + { + "x": 1730544690000, + "y": 6 + }, + { + "x": 1730544720000, + "y": 6 + }, + { + "x": 1730544750000, + "y": 6 + }, + { + "x": 1730544780000, + "y": 6 + }, + { + "x": 1730544810000, + "y": 6 + }, + { + "x": 1730544840000, + "y": 6 + }, + { + "x": 1730544870000, + "y": 6 + }, + { + "x": 1730544900000, + "y": 6 + }, + { + "x": 1730544930000, + "y": 6 + }, + { + "x": 1730544960000, + "y": 6 + }, + { + "x": 1730544990000, + "y": 6 + }, + { + "x": 1730545020000, + "y": 0 + } + ], + "change": { + "type": "stationary", + "significance": null + }, + "shortId": "rlzf", + "relevance": "normal", + "interesting": false + } + ], + "searches": [ + { + "fragments": [ + "10.110.191.164:8080", + "10.110.191.164", + "8080" + ], + "appearsAs": "This IP address and port are referenced in the investigated entity 'frontend'." + }, + { + "fragments": [ + "/api/cart", + "cartservice", + "/api" + ], + "appearsAs": "These URL fragments appear as attributes.request.url in the investigated entity 'frontend'. They could be related to 'cartservice'." + }, + { + "fragments": [ + "000aa", + "000bbb" + ], + "appearsAs": "These ids appear as span.id and parent.id in the investigated entity 'frontend'. They could be referring to spans found on upstream or downstream services" + } + ], + "relatedEntitiesSummaries": [ + "## Possible Relationships to `service.name:frontend`\n\n### 1. Entity: `service.name:controller`\n- **Indicators:**\n - #### Average Indicator:\n - **IP Address and Port:**\n - `frontend`: `10.110.191.164:8080`\n - `controller`: `10.244.0.26:8080`\n - **URL Fragment:**\n - `frontend`: `/api/cart`\n - `controller`: `/api/cart?session`\n- **Relationship Reasoning:**\n - The `controller` service is highly likely to be making requests to, or routing through, the `frontend` service as evidenced by the URL fragments referring to the `/api/cart` endpoint seen in both entities. Additionally, they share common IP address and port patterns indicating network interaction.\n- **Relevance Assessment:**\n - **Very High**: Given that the `controller` service handles endpoints like `/api/cart`, which directly correspond to transactions failing with a high error rate, this entity is a critical part of the interaction chain and should be closely examined.\n\n### 2. Entity: `service.name:cartservice`\n- **Indicators:**\n - #### Strong Indicator:\n - **Direct Call References:**\n - `frontend`: `details: 'cart failure: failed to get user cart during checkout: rpc error: ... desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"'\n - `cartservice`: `url.full.text:\"http://my-otel-demo-cartservice:8080/oteldemo.CartService/GetCart\"`\n- **Relationship Reasoning:**\n - The `frontend` service attempts to interact with the `cartservice` as observed by the endpoint `/oteldemo.CartService/GetCart`. Connection errors from `frontend` indicate it is calling `cartservice` and facing issues getting responses, potentially causing the 500 errors.\n- **Relevance Assessment:**\n - **Very High**: Since `cartservice` manages user cart data and the connection errors reported in `frontend` logs directly correspond to attempts to communicate with `cartservice`, this entity is a probable cause of the service failures being observed.\n\n### 3. Entity: `service.name:frontend-web`\n- **Indicators:**\n - #### Average Indicator:\n - **URL Fragment:**\n - `frontend`: `/api/cart`\n - `frontend-web`: `/api/cart`\n - #### Weak Indicator:\n - **RUM Layer Mentions:**\n - `frontend`: `frontend-web (RUM)`\n - `frontend-web`: Described in System Architecture.\n- **Relationship Reasoning:**\n - `frontend-web` captures real user interactions with the `/api/cart` endpoint providing insights into the `frontend` service which might help trace the errors and delays externally perceived by users.\n- **Relevance Assessment:**\n - **Moderate**: While `frontend-web` can show the surface errors users are experiencing, it is more of an observability layer rather than directly causing the 500 errors or impacting the service interactions upstream.\n\n### 4. Entity: `service.name:frontendproxy`\n- **Indicators:**\n - #### Average Indicator:\n - **Network Routing Indicator:**\n - `frontend`: Receives traffic from `frontendproxy`\n - `frontendproxy`: Directs traffic to `frontend`\n - #### IP Address Mention:\n - Both services reference `8080`, indicating shared routing or proxying.\n- **Relationship Reasoning:**\n - `frontendproxy` routes traffic from external sources, including simulated user requests, to `frontend`. Any issues in this proxy layer could impact the incoming traffic to `frontend`, causing errors.\n- **Relevance Assessment:**\n - **High**: Considering `frontendproxy` handles traffic routing, any misconfiguration or failures here could lead to the issues observed on the `frontend`.\n\n### 5. Entity: `service.name:coredns`\n- **Indicators:**\n - #### Weak Indicators:\n - **DNS Resolution Events:**\n - Logs showing DNS queries involving `frontend`.\n - **IP Address and Port:**\n - Shared network interactions over common ports like `8080`.\n- **Relationship Reasoning:**\n - While not directly related to application logic, `coredns` is responsible for DNS resolution within the cluster, and issues here could result in connectivity problems seen in `frontend` when trying to reach `cartservice`.\n- **Relevance Assessment:**\n - **Moderate**: While less likely to be the root cause, DNS misconfigurations or failures could indirectly contribute to connectivity issues within services like `frontend`.\n\n### Summary\n\n1. **service.name:controller** - Very High relevance due to direct endpoint interaction.\n2. **service.name:cartservice** - Very High relevance for being the direct interaction target with observed failures.\n3. **service.name:frontend-web** - Moderate relevance in monitoring layer but not direct interaction.\n4. **service.name:frontendproxy** - High relevance for routing traffic to `frontend`.\n5. **service.name:coredns** - Moderate relevance for possible DNS resolution issues." + ], + "kbEntries": [ + { + "id": "System architecture", + "text": "The architecture described here outlines a microservices-based system, where each service is implemented in a distinct programming language and communicates via gRPC, HTTP, or TCP. This system is designed to handle simulated user traffic, supported by a variety of interconnected services and components.\n\n### System Architecture\n\n1. **`loadgenerator`** - Simulates external user traffic by sending HTTP requests, which are managed by an Nginx ingress controller. This ingress directs traffic to the `frontendproxy` service.\n\n2. **`frontendproxy` (Envoy)** - Acts as a reverse proxy, routing incoming traffic from `loadgenerator` to `frontend`.\n\n3. **`frontend` (Node.js)** - The core service for user interactions, receiving HTTP traffic from `frontendproxy` and interfacing with various backend services to fulfill requests.\n\n4. **`frontend-web` (RUM)** - A Real User Monitoring (RUM) layer that runs in the user's browser, enabling insights into end-user experiences and frontend performance.\n\n5. **`adservice`** - Delivers advertisements to the `frontend` using gRPC, enhancing the user experience with relevant ad content.\n\n6. **`cartservice`** - Manages shopping cart data, including adding and removing items. It communicates over gRPC and leverages a Redis cache for data persistence.\n\n7. **`currencyservice`** - Handles currency conversions and facilitates interactions between `cartservice` and `checkoutservice` over gRPC.\n\n8. **`checkoutservice`** - Coordinates the checkout process, calling various services for payments, shipping, and emails. It utilizes both gRPC and HTTP protocols to aggregate the necessary information for order completion.\n\n9. **`emailservice`** - Sends order confirmation emails to users via gRPC, triggered by interactions with `checkoutservice`.\n\n10. **`productcatalogservice`** - Maintains the product catalog, storing details about available items and providing this data to other services via gRPC.\n\n11. **`recommendationservice`** - Generates personalized product recommendations, accessed by `frontend` over gRPC.\n\n12. **`shippingservice`** - Manages shipping information, providing essential data to `checkoutservice` over gRPC.\n\n13. **`quoteservice`** - Supplies shipping quotes over HTTP, which are accessed by `shippingservice` to estimate shipping costs.\n\n14. **`paymentservice`** - Processes payment transactions through gRPC, enabling secure and efficient payments for `checkoutservice`.\n\n15. **`accountingservice`** - Responsible for recording transactions, it connects to a Kafka queue and interacts over TCP.\n\n16. **`frauddetectionservice`** - Monitors orders for potential fraud, also interfacing with the Kafka queue over TCP to receive relevant transaction data.\n\n17. **`imageprovider` (Nginx)** - Serves static images for the frontend interface, accessible over HTTP.\n\n18. **`queue` (Kafka)** - Acts as a central message broker, facilitating communication between `accountingservice` and `frauddetectionservice`.\n\n### Key Components\nThe system is structured to enable robust, distributed communication across services, allowing each component to focus on a specific aspect of the user experience or business logic. The ingress controller, in particular, plays a critical role in directing traffic, ensuring that incoming requests from `loadgenerator` are correctly routed through `frontendproxy` and into the system’s core services.\n\nThis architecture ensures that the system can handle high traffic loads and provides a modular, flexible framework to meet user demands while monitoring for performance and security.\n", + "tokens": 733, + "score": 5 + } + ] + }, + "entity": { + "service.name": "frontend" + }, + "relatedEntities": [ + { + "entity": { + "service.name": "controller" + }, + "reason": "`controller` service handles endpoints like `/api/cart`, which corresponds to failed transactions. They share common IP address and port patterns, indicating network interaction.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "cartservice" + }, + "reason": "`frontend` service attempts to interact with `cartservice`. Connection errors from `frontend` indicate it is calling `cartservice` and facing issues getting responses, potentially causing the 500 errors.", + "confidence": "very high" + }, + { + "entity": { + "service.name": "frontend-web" + }, + "reason": "`frontend-web` captures real user interactions with the `/api/cart` endpoint. Helps trace the errors and delays externally perceived by users.", + "confidence": "moderate" + }, + { + "entity": { + "service.name": "coredns" + }, + "reason": "`coredns` is responsible for DNS resolution within the cluster and issues here could result in connectivity problems seen in `frontend`.", + "confidence": "moderate" + } + ], + "summary": "### Investigation of Entity `service.name:frontend`\n\n#### Context of Investigation\n\nThe `frontend` service is critical in this investigation due to its role in the overall request handling and the observed high error rate (98.78%) in the `controller` service for POST requests to the `/api/cart` endpoint. This issue began at `2024-11-02T10:56:42.498Z`. The `frontend` service routes requests to `cartservice`, a key interaction in this error occurrence.\n\n#### Infrastructure & Environment\n\n- **Environment**: The `frontend` operates within the `opentelemetry-demo` deployment environment.\n- **Deployment**: It is deployed on Kubernetes (k8s), specifically within the `default` namespace. \n- **Pod Information**: The pod name is `my-otel-demo-frontend-5bbf4d78bc-qtwdr`, on the `minikube` node. Each pod has a unique UID (`a9fcfff9-3524-411e-8ac9-4e4341c1a121`), with logs stored at `/var/log/pods/`.\n- **Resource Attributes**: Hosts `Apple` Arm64 architecture processor (`host.cpu.vendor.id: Apple`), running `Ubuntu 24.04.1 LTS (Noble Numbat)`, indicating a Linux-based system.\n\n#### Communication Characteristics \n\n- **Protocols**: \n - The `frontend` service communicates primarily using HTTP for inbound requests and gRPC for backend service interactions. \n - gRPC is specifically used when the `frontend` communicates with `cartservice`, highlighted by recorded RPC errors in the logs.\n- **Endpoints**:\n - The `frontend` gets requests from `frontendproxy` (Envoy), acting as the reverse proxy.\n - The service then routes these requests to different backend services, including `cartservice`, via gRPC.\n - The critical IP noted in logs is `10.110.191.164:8080`, pointing to a potential endpoint for the `cartservice`.\n\n#### Context of Entity in Investigation\n\n- **Reason for Investigation**: The `frontend` service is under scrutiny due to its pivotal role in processing and routing incoming user requests. Any disruption in its communication or processing mechanism directly affects the functionality of connected services, notably `cartservice`, leading to high error rates observed in the `controller`.\n- **Relation to Other Entities**:\n - **Previous Entity**: `controller` - The `controller` service logs indicate a high number of 500 errors were triggered during POST requests to `/api/cart`.\n - **Dependencies**: `frontend` relies on successful communication with `cartservice` to fulfill these cart-related requests.\n- **Architectural Role**: The `frontend` is the nucleus for user interactions within this microservices architecture. Any network or communication issue in `frontend` can cascade, influencing its backend dependencies like `cartservice` and leading to broader service disruptions.\n\nThis comprehensive analysis of the `frontend` service’s infrastructure, communication protocols, and its investigative context emphasizes its critical position in the service chain responsible for the reported errors. Further scrutiny of the communication logs, particularly focusing on the gRPC interactions with `cartservice`, is essential to identify and mitigate the root cause of these failures.\n\nObservations for service `frontend`.\n\n### Connection Issues to Upstream Dependency\n\n1. **Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Sample**: \"14 UNAVAILABLE: No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080 (2024-11-02T10:56:12.143Z)\"\n - **Timeseries Observations**: \n - No occurrences before 2024-11-02T10:55:30.000Z.\n - Sharp increase at 2024-11-02T10:55:30.000Z (291 occurrences).\n - Further increase at 2024-11-02T10:56:00.000Z (381 occurrences).\n - Next timestamp at 2024-11-02T10:57:00.000Z showing a return to 0 occurrences.\n\n2. **Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"\n - **Sample**: \"13 INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp 10.110.191.164:8080: connect: connection refused\"\n - **Timeseries Observations**:\n - No occurrences before 2024-11-02T10:55:30.000Z.\n - Slight increase at 2024-11-02T10:55:30.000Z (2 occurrences).\n - One more occurrence at 2024-11-02T10:56:00.000Z (1 occurrence).\n - No occurrences after 2024-11-02T10:56:30.000Z.\n\nThe significant spike in connection errors starting from 2024-11-02T10:55:30.000Z aligns with the high error rate in the `controller` service, suggesting a strong correlation between these established connection issues in the `frontend` service and the 500 errors seen in the `controller` service for POST requests to the `/api/cart` endpoint around the same time.\n\n### Possibly Relevant Log Patterns from coredns\n\n1. **Pattern**: \"INFO 10.244.0.26 AAAA IN my-otel-demo-frontend udp 39 false 512 NXDOMAIN qr,rd,ra 39\"\n - **Sample**: \"[INFO] 10.244.0.26:59273 - 48617 \"AAAA IN my-otel-demo-frontend. udp 39 false 512\" NXDOMAIN qr,rd,ra 39 0.003716084s\"\n - **Timeseries Observations**:\n - Frequent steady occurrences of 6 events every 30 seconds.\n - No notable changes or spikes coinciding with the connection issues observed in the `frontend` service.\n\nFrom these observations, it appears that the connection refusal errors in the `frontend` service are likely caused by issues in communication with the `cartservice`, potentially as a result of intermittent network disruptions or dependency service failures at the IP 10.110.191.164. Further investigation should focus on the health and connectivity status of the `cartservice` around the specified time period.\n\n### Timeline of Significant Events\n\n1. **2024-11-02T10:55:30.000Z**\n - **Event**: Sharp increase in connection error logs.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 291 occurrences reported.\n\n2. **2024-11-02T10:55:30.000Z**\n - **Event**: Appearance of an internal error in accessing cart services.\n - **Log Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error\"\n - **Details**: 2 occurrences reported.\n\n3. **2024-11-02T10:56:00.000Z**\n - **Event**: Further increase in connection error logs.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 381 occurrences reported.\n\n4. **2024-11-02T10:56:00.000Z**\n - **Event**: Additional occurrence of an internal error in accessing cart services.\n - **Log Pattern**: \"INTERNAL: cart failure: failed to get user cart during checkout: rpc error: code = Unavailable desc = connection error\"\n - **Details**: 1 occurrence reported.\n\n5. **2024-11-02T10:56:42.498Z**\n - **Event**: High error rate (98.78%) in the `controller` service for POST requests to the `/api/cart` endpoint begins.\n - **Details**: Corresponds to the timing of observed spikes in connection errors in the `frontend` service.\n\n6. **2024-11-02T10:57:00.000Z**\n - **Event**: Connection error logs return to zero occurrences.\n - **Log Pattern**: \"No connection established. Last error: connect ECONNREFUSED 10.110.191.164:8080\"\n - **Details**: 0 occurrences reported.\n\n### Context and Reasoning\n\nThe `frontend` service is critical in this investigation due to its role in routing requests to the `cartservice`. The high error rates observed in the `controller` service's POST requests to the `/api/cart` endpoint started precisely at 2024-11-02T10:56:42.498Z, which is within the timeframe when the `frontend` service experienced significant connection issues with the `cartservice`.\n\nUpon analyzing log patterns, it is evident that the communication disruptions between the `frontend` service and the `cartservice` began at 2024-11-02T10:55:30.000Z with a sharp increase in connection errors, and these errors persisted until approximately 2024-11-02T10:57:00.000Z. During this period, numerous `ECONNREFUSED` errors were logged, indicating the `frontend` service's inability to establish a connection to the `cartservice` (IP 10.110.191.164:8080). This aligns with the timeline of the high error rates in the `controller` service, suggesting a direct correlation.\n\nFurthermore, the internal errors logged by the `frontend` service related to RPC errors and failed attempts to get the user cart during checkout reaffirm these connection problems. These errors underscore an impaired communication pathway to the `cartservice`, affecting the `controller` service's ability to process POST requests to the `/api/cart` endpoint.\n\nTherefore, the evidence strongly indicates that network or connection failures between the `frontend` service and its upstream dependency, `cartservice`, are the principal cause of the cascading 500 errors seen in the `controller` service. This highlights the critical position of the `frontend` service in ensuring stable communication pathways within the microservice architecture." + } + ] + }, + "role": "tool", + "toolCallId": "call_KJwmI3xgY1gcstI4MVd4Olcf" + }, + { + "role": "assistant", + "content": "", + "toolCalls": [ + { + "toolCallId": "call_1Y45Vw3lRUFLsImFilvrXPos", + "function": { + "name": "endProcessAndWriteReport", + "arguments": { + "endProcess": true + } + } + } + ] + }, + { + "name": "endProcessAndWriteReport", + "role": "tool", + "toolCallId": "call_1Y45Vw3lRUFLsImFilvrXPos", + "response": { + "report": "# Root Cause Analysis (RCA) Report\n\n## 1. Introduction\n\n### Overview\nThis RCA report investigates the elevated error rates in the `controller` service and its downstream dependencies. The incident was first detected by an alert indicating an unusually high number of 500 Internal Server Errors on the `/api/cart` endpoint, with an error rate of 98.78%, significantly exceeding the 1% threshold over a 1-minute period. The primary entities investigated were the `controller`, `cartservice`, and `frontend` services.\n\n### Initial Hypothesis\nThe initial hypothesis proposed that either a deployment, a critical configuration change, or a dependency failure caused the high error rate in the `controller` service.\n\n## 2. Investigation Summary\n\n### Proposed Hypotheses\n1. **Resource exhaustion in `controller`**:\n - **Evidence**:\n - **Strong:** High error rate specifically for POST requests to the `/api/cart` endpoint.\n - **Weak:** No indications of resource limits being exceeded in the `controller` service.\n2. **Upstream latency or failure from `frontendproxy`**:\n - **Evidence**:\n - **Strong:** Error patterns suggested connectivity issues starting from the `frontendproxy`.\n - **Weak:** Eventually ruled out as communication problems upstream of the `controller`.\n3. **Backend service issue or configuration error in `cartservice`**:\n - **Evidence**:\n - **Strong:** Identified startup issues and fatal errors in `cartservice`.\n - **Weak:** None.\n\n### Discarded Hypotheses\n- **Resource exhaustion in `controller`**: Ruled out due to stable resource usage.\n- **Upstream latency or failure from `frontendproxy`**: Connectivity issues traced further downstream to `cartservice`.\n\n### Investigated Entities\n- `controller`\n- `frontend`\n- `cartservice`\n\n## 3. Cause and Effect\n\n### Cause\nThe root cause was identified as a startup failure in the `cartservice`, with fatal \"bad entrypoint\" errors preventing the service from starting correctly.\n\n### Effect\nThe startup issues in `cartservice` caused connection failures, leading to `frontend` being unable to establish connections. This resulted in `controller` reporting a high number of 500 errors for POST requests to the `/api/cart` endpoint, thus impacting user-facing services relying on this endpoint.\n\n## 4. Evidence for Root Cause\n\n### Log Patterns\n- **`cartservice` Logs**: Repeated \"FATAL: Could not start, bad entrypoint!\" errors indicating startup failures.\n- **`frontend` Logs**: \"ECONNREFUSED\" errors and spikes in \"No connection established\" errors to `cartservice`.\n- **`controller` Logs**: High frequency of 500 errors for HTTP POST requests to `/api/cart`.\n\n### Alerts and SLOs\n- An alert was triggered at `2024-11-02T10:56:42.498Z` due to a high error rate of 98.78% for the `/api/cart` endpoint.\n\n### Data Analysis\n- **Memory and CPU usage** of `controller` remained stable, ruling out resource exhaustion.\n- **Network traffic** analysis showed no abnormalities at the `controller` level but issues at the connectivity to `cartservice`.\n\n## 5. Proposed Impact\n\n### Affected Services\n- `controller`\n- `frontend`\n- `cartservice`\n\n### Performance Degradation\n- High error rate and degraded performance for the `/api/cart` endpoint.\n- Impact on downstream services and user-facing operations reliant on `cartservice`.\n\n### User Impact\n- Elevated latency and failed transactions affecting users trying to access and modify shopping cart data.\n\n## 6. Timeline of Significant Events\n\n- **2024-11-02T10:55:30.000Z**:\n - `cartservice` logs indicate a series of startup issues, marked by \"FATAL: Could not start, bad entrypoint!\" errors.\n - `frontend` logs show initial \"No connection established\" errors to `cartservice`.\n\n- **2024-11-02T10:56:00.000Z**:\n - `frontend` logs continue to show increased \"No connection established\" errors.\n\n- **2024-11-02T10:56:30.000Z**:\n - Additional \"bad entrypoint\" errors recorded in `cartservice` logs.\n\n- **2024-11-02T10:56:42.498Z**:\n - Alert triggered due to a high error rate of 98.78% for `/api/cart` in `controller`.\n\n- **2024-11-02T10:57:00.000Z**:\n - `frontend` logs show a return to zero connection errors as the startup issue in `cartservice` stabilizes.\n\n## 7. Conclusion and Next Steps\n\n### Conclusion\nThe investigation concludes that the root cause of the incident was the startup failure and persistent initialization issues in `cartservice`. This prevented `cartservice` from functioning correctly, causing connection refusal errors in `frontend`, which in turn resulted in the `controller` reporting high error rates for POST requests to the `/api/cart` endpoint.\n\n### Next Steps\n1. **Fixes or Mitigations**:\n - Address and rectify the misconfiguration or code issues in `cartservice` causing fatal startup errors.\n - Ensure proper configuration and reliable startup for `cartservice` moving forward.\n\n2. **Monitoring Improvements**:\n - Implement more granular alerts and monitoring for `cartservice` startup and initialization errors.\n - Enhance network error monitoring between `frontend` and `cartservice`.\n\n3. **Further Investigations**:\n - Monitor `cartservice` and observe for any recurrent patterns indicating startup issues.\n - Investigate the potential for automated failover or redundancy mechanisms to handle startup failures in critical services.\n\nThis concluded RCA encapsulates the findings from the investigations and provides clear guidance on addressing identified issues.\n\n---\n\n**Prepared by:**\nElastic Observability Support Team", + "timeline": { + "events": [ + { + "@timestamp": "2024-11-02T10:55:30.000Z", + "severity": "unusual", + "type": "event", + "description": "`cartservice` logs indicate a series of startup issues, marked by 'FATAL: Could not start, bad entrypoint!' errors." + }, + { + "@timestamp": "2024-11-02T10:55:30.000Z", + "severity": "unusual", + "type": "event", + "description": "`frontend` logs show initial 'No connection established' errors to `cartservice`." + }, + { + "@timestamp": "2024-11-02T10:56:00.000Z", + "severity": "unusual", + "type": "event", + "description": "`frontend` logs continue to show increased 'No connection established' errors." + }, + { + "@timestamp": "2024-11-02T10:56:30.000Z", + "severity": "unusual", + "type": "event", + "description": "Additional 'bad entrypoint' errors recorded in `cartservice` logs." + }, + { + "@timestamp": "2024-11-02T10:56:42.498Z", + "severity": "critical", + "type": "alert", + "description": "Alert triggered due to a high error rate of 98.78% for `/api/cart` in `controller`." + }, + { + "@timestamp": "2024-11-02T10:57:00.000Z", + "severity": "info", + "type": "event", + "description": "`frontend` logs show a return to zero connection errors as the startup issue in `cartservice` stabilizes." + } + ] + } + } + } +] diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/index.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/index.ts new file mode 100644 index 0000000000000..5068c8d5055fb --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/mock/index.ts @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { + InvestigateEntityToolMessage, + RootCauseAnalysisEvent, +} from '@kbn/observability-ai-server/root_cause_analysis'; +import { RCA_INVESTIGATE_ENTITY_TOOL_NAME } from '@kbn/observability-ai-common/root_cause_analysis'; +import { MessageRole } from '@kbn/inference-common'; +import { Required } from 'utility-types'; +// @ts-ignore +import completeRootCauseAnalysisJson from './complete_root_cause_analysis.json'; + +export const completeRootCauseAnalysis = completeRootCauseAnalysisJson as RootCauseAnalysisEvent[]; + +export const controllerEntityHealthAnalysis = completeRootCauseAnalysis.find( + (event) => + 'role' in event && + event.role === MessageRole.Tool && + event.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME && + event.response.entity['service.name'] === 'cartservice' +) as Required; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.stories.tsx new file mode 100644 index 0000000000000..6328835af1f63 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.stories.tsx @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { RootCauseAnalysisCallout } from '.'; + +const stories: Meta<{}> = { + title: 'RCA/Callout', + component: RootCauseAnalysisCallout, +}; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + {}} + onCompleteInBackgroundClick={() => {}} + completeInBackground + /> + ); +}; + +export const CompleteInBackgroundDisabled: Story<{}> = () => { + return ( + {}} + onCompleteInBackgroundClick={() => {}} + completeInBackground={false} + /> + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.tsx new file mode 100644 index 0000000000000..0487172aca6a6 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_callout/index.tsx @@ -0,0 +1,106 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + EuiButton, + EuiCallOut, + EuiCheckbox, + EuiFlexGroup, + EuiFlexItem, + EuiFormLabel, + EuiPanel, + EuiText, + EuiTitle, + useGeneratedHtmlId, +} from '@elastic/eui'; +import React from 'react'; +import { i18n } from '@kbn/i18n'; +import { AssistantAvatar } from '@kbn/observability-ai-assistant-plugin/public'; +import { css } from '@emotion/css'; + +export function RootCauseAnalysisCallout({ + onClick, + onCompleteInBackgroundClick, + completeInBackground, +}: { + onClick: () => void; + onCompleteInBackgroundClick: () => void; + completeInBackground: boolean; +}) { + const checkboxId = useGeneratedHtmlId(); + + return ( + + + + + + + + +

+ {i18n.translate('xpack.observabilityAiAssistant.rca.calloutTitle', { + defaultMessage: 'AI-assisted root cause analysis', + })} +

+
+
+
+ + + {i18n.translate('xpack.observabilityAiAssistant.rca.calloutText', { + defaultMessage: `Start an automated investigation that will analyze + log patterns, SLOs and alerts for entities and provide an evidence- + based root cause analysis of issues in your system.`, + })} + + + + + { + onCompleteInBackgroundClick(); + }} + checked={completeInBackground} + /> + + {i18n.translate( + 'xpack.observabilityAiAssistant.rootCauseAnalysisCallout.keepAnalysisRunningInFormLabelLabel', + { defaultMessage: 'Keep analysis running in background' } + )} + + + + {i18n.translate('xpack.observabilityAiAssistant.rca.calloutText', { + defaultMessage: 'Start analysis', + })} + + +
+
+
+
+ ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_collapsible_panel/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_collapsible_panel/index.tsx new file mode 100644 index 0000000000000..8cdf518f84abe --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_collapsible_panel/index.tsx @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { EuiAccordion, EuiPanel, EuiSpacer, useGeneratedHtmlId } from '@elastic/eui'; +import React from 'react'; +import { RootCauseAnalysisPanel } from '../rca_panel'; + +export function RootCauseAnalysisCollapsiblePanel({ + title, + content, + color, + isDisabled, +}: { + title: React.ReactNode; + content: React.ReactNode; + color?: React.ComponentProps['color']; + isDisabled?: boolean; +}) { + const htmlId = useGeneratedHtmlId(); + return ( + + + + {content} + + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.stories.tsx new file mode 100644 index 0000000000000..492d6ecc59386 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.stories.tsx @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { noop } from 'lodash'; +import { RootCauseAnalysisContainer } from '.'; +// @ts-ignore +import fullAnalysis from '../mock/complete_root_cause_analysis.json'; + +const stories: Meta<{}> = { + title: 'RCA/Container', + component: RootCauseAnalysisContainer, +}; + +export default stories; + +const handlers = { + onStartAnalysisClick: noop, + onStopAnalysisClick: noop, + onResetAnalysisClick: noop, + onCompleteInBackgroundClick: noop, + onClearAnalysisClick: noop, +}; + +export const Empty: Story<{}> = () => { + return ; +}; + +export const Loading: Story<{}> = () => { + return ; +}; + +export const LoadingWithoutCompleteInBackground: Story<{}> = () => { + return ; +}; + +const error = new Error('Failed to load analysis'); + +export const WithError: Story<{}> = () => { + return ( + + ); +}; + +export const Completed: Story<{}> = () => { + return ( + + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.tsx new file mode 100644 index 0000000000000..1651f039bdbf5 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_container/index.tsx @@ -0,0 +1,338 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { + RCA_END_PROCESS_TOOL_NAME, + RCA_INVESTIGATE_ENTITY_TOOL_NAME, + RCA_OBSERVE_TOOL_NAME, +} from '@kbn/observability-ai-common/root_cause_analysis'; +import { EuiButton, EuiFlexGroup, EuiFlexItem, EuiText, useEuiTheme } from '@elastic/eui'; +import { i18n } from '@kbn/i18n'; +import { AssistantMessage, MessageRole, ToolMessage } from '@kbn/inference-common'; +import type { + RootCauseAnalysisEvent, + RootCauseAnalysisToolMessage, + RootCauseAnalysisToolRequest, + ToolErrorMessage, +} from '@kbn/observability-ai-server/root_cause_analysis'; +import { findLast } from 'lodash'; +import React from 'react'; +import { css } from '@emotion/css'; +import { EntityBadge } from '../entity_badge'; +import { RootCauseAnalysisCallout } from '../rca_callout'; +import { RootCauseAnalysisEntityInvestigation } from '../rca_entity_investigation'; +import { RootCauseAnalysisObservationPanel } from '../rca_observation_panel'; +import { RootCauseAnalysisReport } from '../rca_report'; +import { RootCauseAnalysisStepItem } from '../rca_step'; +import { RootCauseAnalysisStopButton } from '../rca_stop_button'; + +export function RootCauseAnalysisContainer({ + events, + completeInBackground, + onStartAnalysisClick, + onStopAnalysisClick, + onResetAnalysisClick, + onClearAnalysisClick, + onCompleteInBackgroundClick, + loading, + error, +}: { + events?: RootCauseAnalysisEvent[]; + completeInBackground: boolean; + onStartAnalysisClick: () => void; + onStopAnalysisClick: () => void; + onResetAnalysisClick: () => void; + onClearAnalysisClick: () => void; + onCompleteInBackgroundClick: () => void; + loading: boolean; + error?: Error; +}) { + const theme = useEuiTheme(); + + if (!events?.length && !loading && !error) { + return ( + + ); + } + + const elements: React.ReactElement[] = []; + + const toolResponsesById = new Map( + events + ?.filter( + (event): event is Extract => + event.role === MessageRole.Tool + ) + .map((event) => [event.toolCallId, event]) + ); + + events?.forEach((event) => { + if (event.role === MessageRole.Assistant) { + event.toolCalls.forEach((toolCall) => { + switch (toolCall.function.name) { + case RCA_OBSERVE_TOOL_NAME: + elements.push( + getObservationItem( + toolCall.function.arguments.title, + toolResponsesById.get(toolCall.toolCallId) + ) + ); + break; + + case RCA_INVESTIGATE_ENTITY_TOOL_NAME: + case RCA_END_PROCESS_TOOL_NAME: + const response = toolResponsesById.get(toolCall.toolCallId); + const element = response ? getToolResponseItem(response) : undefined; + if (element) { + elements.push(element); + } + break; + } + }); + } + }); + + const clearButton = ( + { + onClearAnalysisClick(); + }} + iconType="crossInCircle" + > + {i18n.translate('xpack.observabilityAiAssistant.rca.clearButtonLabel', { + defaultMessage: 'Clear', + })} + + ); + + const restartButton = ( + { + onResetAnalysisClick(); + }} + iconType="refresh" + > + {i18n.translate('xpack.observabilityAiAssistant.rca.restartButtonLabel', { + defaultMessage: 'Restart', + })} + + ); + + if (loading) { + const label = getLoadingLabel(events); + elements.push( + { + onStopAnalysisClick(); + }} + /> + ) + } + loading + /> + ); + } else if (error) { + elements.push( + + {clearButton} + {restartButton} + + } + /> + ); + } else { + // completed + elements.push( + + {i18n.translate('xpack.observabilityAiAssistant.rca.analysisCompleted', { + defaultMessage: 'Completed analysis', + })} + + } + iconType="checkInCircleFilled" + color="primary" + button={ + + {clearButton} + {restartButton} + + } + /> + ); + } + + return ( + + {elements.map((element, index) => { + return React.cloneElement(element, { key: index }); + })} + + ); +} + +function getLoadingLabel(events?: RootCauseAnalysisEvent[]) { + const lastAssistantMessage = findLast( + events, + (event): event is Extract => + event.role === MessageRole.Assistant + ); + + if (lastAssistantMessage) { + const toolResponsesByToolCallId = new Map( + events + ?.filter( + (event): event is Extract => + event.role === MessageRole.Tool + ) + .map((event) => [event.toolCallId, event]) + ); + const pendingToolCalls = lastAssistantMessage.toolCalls.filter((event) => { + const response = toolResponsesByToolCallId.get(event.toolCallId); + + return !response; + }); + + const allInvestigateEntityToolCalls = pendingToolCalls.filter( + (event): event is RootCauseAnalysisToolRequest => + event.function.name === RCA_INVESTIGATE_ENTITY_TOOL_NAME + ); + + if (allInvestigateEntityToolCalls.length) { + return ( + + + {i18n.translate('xpack.observabilityAiAssistant.rca.investigatingEntitiesTextLabel', { + defaultMessage: 'Investigating entities', + })} + + + + {allInvestigateEntityToolCalls.map((toolCall) => { + return ( + + + + ); + })} + + + + ); + } + + if (pendingToolCalls[0]?.function.name === RCA_END_PROCESS_TOOL_NAME) { + return i18n.translate('xpack.observabilityAiAssistant.rca.finalizingReport', { + defaultMessage: 'Finalizing report', + }); + } + } + + return i18n.translate('xpack.observabilityAiAssistant.rca.analysisLoadingLabel', { + defaultMessage: 'Thinking...', + }); +} + +function getToolResponseErrorItem(response: ToolErrorMessage['response']) { + return ( + + ); +} + +function getToolResponseItem( + message: RootCauseAnalysisToolMessage | ToolErrorMessage +): React.ReactElement | null { + if (message.name === 'observe') { + return null; + } + + if (message.name === 'error') { + return getToolResponseErrorItem(message.response); + } + + if (message.name === 'investigateEntity') { + return ( + + ); + } + + return ( + + ); +} + +function getObservationItem( + title: React.ReactNode, + toolResponse?: RootCauseAnalysisToolMessage | ToolErrorMessage +) { + let content: string | undefined; + switch (toolResponse?.name) { + case 'observe': + content = toolResponse.response.content; + break; + + case 'error': + content = i18n.translate('xpack.observabilityAiAssistant.rca.failedObservation', { + defaultMessage: 'Failed to generate observations', + }); + break; + } + + return ( + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.stories.tsx new file mode 100644 index 0000000000000..0601a0cdf1f6f --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.stories.tsx @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { RootCauseAnalysisEntityInvestigation } from '.'; +import { controllerEntityHealthAnalysis } from '../mock'; + +const stories: Meta<{}> = { + title: 'RCA/EntityInvestigation', + component: RootCauseAnalysisEntityInvestigation, +}; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.tsx new file mode 100644 index 0000000000000..b74f5969fa08f --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_investigation/index.tsx @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { EuiFlexGroup, EuiFlexItem, EuiMarkdownFormat, EuiPanel, EuiTitle } from '@elastic/eui'; +import { i18n } from '@kbn/i18n'; +import type { EntityInvestigation } from '@kbn/observability-ai-server/root_cause_analysis/tasks/investigate_entity/types'; +import React from 'react'; +import { EntityBadge } from '../entity_badge'; +import { RootCauseAnalysisCollapsiblePanel } from '../rca_collapsible_panel'; +import { RootCauseAnalysisEntityLogPatternTable } from '../rca_entity_log_pattern_table'; + +export function RootCauseAnalysisEntityInvestigation({ + summary, + entity, + ownPatterns, + patternsFromOtherEntities, +}: { + summary: string; + entity: Record; +} & Pick) { + return ( + + +

+ {i18n.translate( + 'xpack.observabilityAiAssistant.rootCauseAnalysisEntityInvestigation.title', + { + defaultMessage: 'Investigation', + } + )} +

+
+ + + + + } + content={ + + + {summary} + + + + } + /> + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.stories.tsx new file mode 100644 index 0000000000000..16596832eff89 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.stories.tsx @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { RootCauseAnalysisEntityLogPatternTable } from '.'; +import { controllerEntityHealthAnalysis } from '../mock'; + +const stories: Meta<{}> = { + title: 'RCA/EntityLogPatternTable', + component: RootCauseAnalysisEntityLogPatternTable, +}; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.tsx new file mode 100644 index 0000000000000..10d78eef6d475 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_entity_log_pattern_table/index.tsx @@ -0,0 +1,299 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { + EuiBadge, + EuiBasicTable, + EuiBasicTableColumn, + EuiCheckbox, + EuiFlexGroup, + EuiFlexItem, + EuiText, + EuiThemeComputed, +} from '@elastic/eui'; +import { css } from '@emotion/css'; +import { i18n } from '@kbn/i18n'; +import { formatInteger } from '@kbn/observability-utils-common/format/integer'; +import { highlightPatternFromRegex } from '@kbn/observability-utils-common/llm/log_analysis/highlight_patterns_from_regex'; +import type { EntityInvestigation } from '@kbn/observability-ai-server/root_cause_analysis/tasks/investigate_entity/types'; +import React, { useMemo, useState } from 'react'; +import { orderBy } from 'lodash'; +import type { AnalyzedLogPattern } from '@kbn/observability-ai-server/root_cause_analysis/tasks/analyze_log_patterns'; +import { useTheme } from '../../../hooks/use_theme'; +import { SparkPlot } from '../../charts/spark_plot'; + +const badgeClassName = css` + width: 100%; + .euiBadge__content { + justify-content: center; + } +`; + +const PER_PAGE = 5; + +export function RootCauseAnalysisEntityLogPatternTable({ + entity, + ownPatterns, + patternsFromOtherEntities, +}: Pick & { + entity: Record; +}) { + const theme = useTheme(); + + const [showUsualPatterns, setShowUsualPatterns] = useState(false); + + const [pageIndex, setPageIndex] = useState(0); + + const columns = useMemo((): Array> => { + return [ + { + field: 'relevance', + name: '', + width: '128px', + render: (_, { relevance, metadata }) => { + const color = getRelevanceColor(relevance); + + return ( + + {relevance} + + ); + }, + }, + { + field: 'pattern', + name: i18n.translate( + 'xpack.observabilityAiAssistant.rca.logPatternTable.messageColumnTitle', + { defaultMessage: 'Message' } + ), + render: (_, { regex, sample }) => { + return ( + + ); + }, + }, + { + field: 'count', + name: i18n.translate( + 'xpack.observabilityAiAssistant.rca.logPatternTable.countColumnTitle', + { defaultMessage: 'Count' } + ), + width: '96px', + render: (_, { count }) => { + return ( + + {formatInteger(count)} + + ); + }, + }, + { + field: 'change', + name: i18n.translate( + 'xpack.observabilityAiAssistant.rca.logPatternTable.changeColumnTitle', + { defaultMessage: 'Change' } + ), + width: '128px', + render: (_, { change }) => { + return getChangeBadge(change); + }, + }, + { + field: 'timeseries', + width: '128px', + name: i18n.translate( + 'xpack.observabilityAiAssistant.rca.logPatternTable.trendColumnTitle', + { defaultMessage: 'Trend' } + ), + render: (_, { timeseries, change }) => { + return ( + + ); + }, + }, + ]; + }, [theme]); + + const allPatterns = useMemo(() => { + return [...ownPatterns, ...patternsFromOtherEntities]; + }, [ownPatterns, patternsFromOtherEntities]); + + const items = useMemo(() => { + return allPatterns.filter((pattern) => { + if (!showUsualPatterns) { + return pattern.relevance !== 'normal'; + } + return pattern; + }); + }, [allPatterns, showUsualPatterns]); + + const visibleItems = useMemo(() => { + const start = pageIndex * PER_PAGE; + return orderBy(items, (item) => relevanceToInt(item.relevance), 'desc').slice( + start, + start + PER_PAGE + ); + }, [pageIndex, items]); + + const paginationOptions = useMemo(() => { + return { + pageIndex, + totalItemCount: items.length, + pageSize: PER_PAGE, + }; + }, [pageIndex, items.length]); + + return ( + + + + + {i18n.translate( + 'xpack.observabilityAiAssistant.rootCauseAnalysisEntityInvestigation.logPatternsTableTitle', + { + defaultMessage: 'Showing {count} of {total} log patterns', + values: { + total: items.length, + count: visibleItems.length, + }, + } + )} + + + + + { + setShowUsualPatterns((prev) => !prev); + }} + /> + + {i18n.translate( + 'xpack.observabilityAiAssistant.rca.logPatternTable.showUsualPatternsCheckbox', + { + defaultMessage: 'Show unremarkable patterns', + } + )} + + + + + { + setPageIndex(criteria.page.index); + }} + /> + + ); +} + +function getRelevanceColor(relevance: 'normal' | 'unusual' | 'warning' | 'critical') { + switch (relevance) { + case 'normal': + return 'plain'; + + case 'critical': + return 'danger'; + + case 'warning': + return 'warning'; + + case 'unusual': + return 'primary'; + } +} + +function getSignificanceColor(significance: 'high' | 'medium' | 'low' | null) { + switch (significance) { + case 'high': + return 'danger'; + + case 'medium': + return 'warning'; + + case 'low': + case null: + return 'plain'; + } +} + +function relevanceToInt(relevance: 'normal' | 'unusual' | 'warning' | 'critical') { + switch (relevance) { + case 'normal': + return 0; + case 'unusual': + return 1; + case 'warning': + return 2; + case 'critical': + return 3; + } +} + +function getAnnotationsFromChangePoint({ + change, + theme, + timeseries, +}: { + change: AnalyzedLogPattern['change']; + theme: EuiThemeComputed<{}>; + timeseries: Array<{ x: number; y: number }>; +}): Required['annotations']> { + if (!change.change_point || !change.type) { + return []; + } + + const color = getSignificanceColor(change.significance); + + return [ + { + color: color === 'plain' ? theme.colors.subduedText : theme.colors[color], + id: '1', + icon: '*', + label: {change.type}, + x: timeseries[change.change_point].x, + }, + ]; +} + +export function getChangeBadge(change: AnalyzedLogPattern['change']) { + return ( + + {change.significance ?? 'No change'} + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.stories.tsx new file mode 100644 index 0000000000000..5ec46729e7c74 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.stories.tsx @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { RootCauseAnalysisObservationPanel } from '.'; + +const stories: Meta<{}> = { + title: 'RCA/ObservationPanel', + component: RootCauseAnalysisObservationPanel, +}; + +const content = + 'The high rate of HTTP 500 errors in the controller service for the /api/cart endpoint is likely due to issues with the upstream service default-my-otel-demo-frontendproxy-8080, as indicated by logs showing upstream prematurely closed connections. The next step is to investigate the health and performance of the upstream service default-my-otel-demo-frontendproxy-8080.'; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + + ); +}; + +export const Loading: Story<{}> = () => { + return ( + + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.tsx new file mode 100644 index 0000000000000..ee00f568ca688 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_observation_panel/index.tsx @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + EuiFlexGroup, + EuiFlexItem, + EuiLoadingSpinner, + EuiMarkdownFormat, + EuiText, + EuiTitle, + useEuiTheme, +} from '@elastic/eui'; +import { i18n } from '@kbn/i18n'; +import React from 'react'; +import { css } from '@emotion/css'; +import { RootCauseAnalysisCollapsiblePanel } from '../rca_collapsible_panel'; + +export function RootCauseAnalysisObservationPanel({ + content, + title, + loading, +}: { + content?: string; + title: React.ReactNode; + loading?: boolean; +}) { + const theme = useEuiTheme().euiTheme; + return ( + + +

+ {i18n.translate('xpack.observabilityAiAssistant.rca.observationPanelTitle', { + defaultMessage: 'Observations', + })} +

+
+ + + + {title} + + {loading ? : null} + + + + } + color="success" + content={content ? {content} : null} + /> + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_panel/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_panel/index.tsx new file mode 100644 index 0000000000000..2ad1225a10f71 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_panel/index.tsx @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { EuiPanel } from '@elastic/eui'; +import { css } from '@emotion/css'; +import { rgba } from 'polished'; +import React from 'react'; +import { useTheme } from '../../../hooks/use_theme'; + +export function RootCauseAnalysisPanel({ + children, + color, +}: { + children: React.ReactElement; + color?: React.ComponentProps['color']; +}) { + const theme = useTheme(); + + const panelClassName = + color && color !== 'transparent' && color !== 'plain' && color !== 'subdued' + ? css` + border: 1px solid; + border-color: ${rgba(theme.colors[color], 0.25)}; + ` + : undefined; + + return ( + + {children} + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.stories.tsx new file mode 100644 index 0000000000000..cb32631fc7a73 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.stories.tsx @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { RootCauseAnalysisReport } from '.'; + +const report = + '## 1. Understanding the Context and Hypothesis\n\n### Context:\n- **Issue:** High rate of HTTP 500 errors for the `/api/cart` endpoint in the `controller` service.\n- **Objective:** Investigate internal issues or anomalies that could explain the alert.\n\n### Hypothesis:\n- The high rate of HTTP 500 errors could be due to internal issues within the `controller` service, such as resource exhaustion, misconfigurations, or upstream service failures.\n\n## 2. Entity Overview\n\n### Entity: `controller`\n- **Environment:** Kubernetes pod, running in `opentelemetry-demo`\n- **Language:** Not specified\n- **Communication:** HTTP\n- **Cloud Provider:** Not specified, but running on `minikube`\n\n## 3. Identifying Related Entities\n\n### Upstream Dependencies:\n- **Upstream Service:** `default-my-otel-demo-frontendproxy-8080`\n - **Evidence:** Log entries show frequent API calls to `default-my-otel-demo-frontendproxy-8080` for various endpoints, including `/api/cart`.\n\n### Downstream Dependencies:\n- **Downstream Service:** Not explicitly mentioned, but the `controller` service is likely serving multiple endpoints, including `/api/cart`.\n\n### Infrastructure:\n- **Pod:** `ingress-nginx-controller-bc57996ff-qrd25`\n - **Evidence:** Logs and metadata indicate the `controller` service is running in this pod.\n\n## 4. Health Status Assessment\n\n### Active Alerts:\n- **Alert:** Custom threshold alert for HTTP 500 errors on `/api/cart` endpoint.\n - **Reason:** Custom equation is 100, above the threshold of 5.\n - **Duration:** 1 min\n - **Data View:** logs-*\n - **Group:** `controller`, `/api/cart`\n - **Evaluation Values:** [100]\n - **Threshold:** [5]\n - **Status:** Active\n - **Start Time:** 2024-10-21T08:51:51.846Z\n\n### SLO Performance:\n- **No SLOs** specified for this entity.\n\n### Log Patterns and Anomalies:\n- **Normal Logs:**\n - Logs show normal GET and POST requests with HTTP 200 responses.\n- **Critical Logs:**\n - Logs indicate upstream prematurely closed connection while reading upstream, leading to HTTP 500 errors.\n - Example: `2024/10/21 08:37:11 [error] 36#36: *3518505 upstream prematurely closed connection while reading upstream, client: 10.244.0.117, server: otel-demo.internal, request: "POST /flagservice/flagd.evaluation.v1.Service/EventStream HTTP/1.1", upstream: "http://10.244.0.119:8080/flagservice/flagd.evaluation.v1.Service/EventStream", host: "ingress-nginx-controller.ingress-nginx.svc.cluster.local", referrer: "http://ingress-nginx-controller.ingress-nginx.svc.cluster.local/cart"`\n\n## 5. Relevance of Entity to the Investigation\n\n### Relevance:\n- The `controller` service is directly related to the investigation due to the elevated HTTP 500 error rate on the `/api/cart` endpoint.\n- The critical log entries suggest that the issue might be related to upstream service failures, specifically the `default-my-otel-demo-frontendproxy-8080` service.\n\n### Signs Aligning with Hypothesis:\n- **Resource Exhaustion:** Not explicitly indicated in the logs, but the high rate of HTTP 500 errors could be a symptom.\n- **Upstream Service Failures:** Logs indicate upstream prematurely closed connections, which aligns with the hypothesis of upstream service issues.\n\n## 6. Timeline of Significant Events\n\n### Timeline:\n1. **2024-10-21T08:37:11.050Z:** Critical log entry indicating upstream prematurely closed connection while reading upstream.\n2. **2024-10-21T08:51:51.846Z:** Alert for high rate of HTTP 500 errors on `/api/cart` endpoint becomes active.\n3. **2024-10-21T09:34:52.720Z:** Alert status remains active with a custom equation value of 100, above the threshold of 5.\n\n## 7. Next Steps or Root Cause Identification\n\n### Next Steps:\n1. **Investigate Upstream Service (`default-my-otel-demo-frontendproxy-8080`):**\n - Check the health and performance of the upstream service.\n - Look for any signs of resource exhaustion, connection issues, or misconfigurations.\n2. **Review Recent Changes:**\n - Check for any recent deployments or configuration changes in the `controller` service and its upstream dependencies.\n3. **Analyze Resource Usage:**\n - Monitor CPU, memory, and network usage for the `controller` service to identify any resource-related issues.\n\n### Potential Root Cause:\n- **Upstream Service Failures:** The critical log entries indicating upstream prematurely closed connections suggest that the root cause might be related to issues in the upstream service (`default-my-otel-demo-frontendproxy-8080`), leading to HTTP 500 errors in the `controller` service.\n\nBy following these steps, we can further narrow down the root cause and take appropriate actions to resolve the issue.'; + +const stories: Meta<{}> = { + title: 'RCA/Report', + component: RootCauseAnalysisReport, +}; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.tsx new file mode 100644 index 0000000000000..cd793786eb6cb --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_report/index.tsx @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import React from 'react'; +import { i18n } from '@kbn/i18n'; +import { + EuiFlexGroup, + EuiHorizontalRule, + EuiMarkdownFormat, + EuiPanel, + EuiTitle, +} from '@elastic/eui'; +import type { SignificantEventsTimeline } from '@kbn/observability-ai-server/root_cause_analysis'; + +export function RootCauseAnalysisReport({ + report, + timeline, +}: { + report: string; + timeline: SignificantEventsTimeline; +}) { + return ( + + + + +

+ {i18n.translate('xpack.observabilityAiAssistant.rootCauseAnalysisReport.title', { + defaultMessage: 'Report', + })} +

+
+
+
+ + {report} +
+ ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.stories.tsx new file mode 100644 index 0000000000000..fa5b184800216 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.stories.tsx @@ -0,0 +1,60 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { EuiBadge, EuiFlexGroup, EuiFlexItem } from '@elastic/eui'; +import { RootCauseAnalysisStepItem } from '.'; + +const stories: Meta<{}> = { + title: 'RCA/StepItem', + component: RootCauseAnalysisStepItem, +}; + +export default stories; + +export const Default: Story<{}> = () => { + return ( + + Investigating + service.name:controller + + } + /> + ); +}; + +export const Loading: Story<{}> = () => { + return ( + + Investigating + service.name:controller + + } + /> + ); +}; + +export const WithColor: Story<{}> = () => { + return ( + + Investigating + service.name:controller + + } + /> + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.tsx new file mode 100644 index 0000000000000..d887c75c1b4a5 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_step/index.tsx @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + EuiFlexGroup, + EuiFlexItem, + EuiIcon, + EuiLoadingSpinner, + EuiPanel, + EuiText, +} from '@elastic/eui'; +import { css } from '@emotion/css'; +import React from 'react'; +import { RootCauseAnalysisPanel } from '../rca_panel'; + +export interface RootCauseAnalysisStepItemProps { + label: React.ReactNode; + loading?: boolean; + color?: React.ComponentProps['color']; + button?: React.ReactElement; + iconType?: React.ComponentProps['type']; +} + +export function RootCauseAnalysisStepItem({ + label, + loading, + color, + iconType, + button, +}: RootCauseAnalysisStepItemProps) { + return ( + + + + + {loading ? ( + + ) : ( + + )} + + + + + {label} + + + {button ? {button} : null} + + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_stop_button/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_stop_button/index.tsx new file mode 100644 index 0000000000000..7ef0b175b636d --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_stop_button/index.tsx @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import React from 'react'; +import { i18n } from '@kbn/i18n'; +import { EuiButton } from '@elastic/eui'; + +export function RootCauseAnalysisStopButton({ onClick }: { onClick: () => void }) { + return ( + { + onClick(); + }} + iconType="stopFilled" + color="text" + > + {i18n.translate('xpack.observabilityAiAssistant.rca.stopAnalysisButtonLabel', { + defaultMessage: 'Stop', + })} + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.stories.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.stories.tsx new file mode 100644 index 0000000000000..4bc31dcf7cc4f --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.stories.tsx @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Meta, Story } from '@storybook/react'; +import React from 'react'; +import { EuiBadge, EuiFlexGroup, EuiFlexItem } from '@elastic/eui'; +import { RootCauseAnalysisTaskStepItem } from '.'; +import { EntityBadge } from '../entity_badge'; + +const stories: Meta<{}> = { + title: 'RCA/TaskStepItem', + component: RootCauseAnalysisTaskStepItem, +}; + +export default stories; + +export const Pending: Story<{}> = () => { + return ( + + Investigating + + + + + } + /> + ); +}; + +export const Completed: Story<{}> = () => { + return ( + + Completed investigation + + + + + } + /> + ); +}; + +export const Failure: Story<{}> = () => { + return ( + + Failed investigating + service.name:controller + + } + /> + ); +}; diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.tsx new file mode 100644 index 0000000000000..b29d8a834745d --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/components/rca/rca_task_step/index.tsx @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import React from 'react'; +import { RootCauseAnalysisStepItem } from '../rca_step'; + +export function RootCauseAnalysisTaskStepItem({ + label, + status, +}: { + label: React.ReactNode; + status: 'pending' | 'completed' | 'failure'; +}) { + let color: React.ComponentProps['color']; + let iconType: React.ComponentProps['iconType']; + + let loading: boolean | undefined; + + if (status === 'failure') { + color = 'danger'; + iconType = 'alert'; + } else if (status === 'completed') { + color = 'success'; + iconType = 'checkInCircleFilled'; + } else { + color = 'primary'; + loading = true; + } + + return ( + + ); +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/index.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/index.ts index 5de1c30de7c4c..9fd8b9158f4c8 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/index.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/index.ts @@ -14,6 +14,8 @@ import type { ObservabilityAIAssistantAppPublicStart, } from './types'; +export type { ObservabilityAIAssistantAppPublicSetup, ObservabilityAIAssistantAppPublicStart }; + export const plugin: PluginInitializer< ObservabilityAIAssistantAppPublicSetup, ObservabilityAIAssistantAppPublicStart, diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/plugin.tsx b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/plugin.tsx index cd1285b0017ce..218b663525dbd 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/plugin.tsx +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/plugin.tsx @@ -4,7 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -import React from 'react'; +import React, { lazy } from 'react'; import ReactDOM from 'react-dom'; import { type AppMountParameters, @@ -18,6 +18,7 @@ import type { Logger } from '@kbn/logging'; import { i18n } from '@kbn/i18n'; import { AI_ASSISTANT_APP_ID } from '@kbn/deeplinks-observability'; import { createAppService, AIAssistantAppService } from '@kbn/ai-assistant'; +import { withSuspense } from '@kbn/shared-ux-utility'; import type { ObservabilityAIAssistantAppPluginSetupDependencies, ObservabilityAIAssistantAppPluginStartDependencies, @@ -26,6 +27,7 @@ import type { } from './types'; import { getObsAIAssistantConnectorType } from './rule_connector'; import { NavControlInitiator } from './components/nav_control/lazy_nav_control'; +import { SharedProviders } from './utils/shared_providers'; // eslint-disable-next-line @typescript-eslint/no-empty-interface export interface ConfigSchema {} @@ -135,9 +137,33 @@ export class ObservabilityAIAssistantAppPlugin await registerFunctions({ pluginsStart, registerRenderFunction }); }); + const withProviders =

(Component: React.ComponentType

) => + React.forwardRef((props: P, ref: React.Ref) => ( + + + + )); + + const LazilyLoadedRootCauseAnalysisContainer = withSuspense( + withProviders( + lazy(() => + import('./components/rca/rca_container').then((m) => ({ + default: m.RootCauseAnalysisContainer, + })) + ) + ) + ); + pluginsStart.triggersActionsUi.actionTypeRegistry.register( getObsAIAssistantConnectorType(service) ); - return {}; + return { + RootCauseAnalysisContainer: LazilyLoadedRootCauseAnalysisContainer, + }; } } diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/types.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/types.ts index 398ef5f3afe4c..22bdf2b12236e 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/types.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/public/types.ts @@ -31,9 +31,13 @@ import type { AIAssistantManagementSelectionPluginPublicStart, AIAssistantManagementSelectionPluginPublicSetup, } from '@kbn/ai-assistant-management-plugin/public'; +import type { RootCauseAnalysisContainer } from './components/rca/rca_container'; -// eslint-disable-next-line @typescript-eslint/no-empty-interface -export interface ObservabilityAIAssistantAppPublicStart {} +export interface ObservabilityAIAssistantAppPublicStart { + RootCauseAnalysisContainer: React.ExoticComponent< + React.ComponentProps + >; +} // eslint-disable-next-line @typescript-eslint/no-empty-interface export interface ObservabilityAIAssistantAppPublicSetup {} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/clients/create_observability_ai_assistant_app_es_client.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/clients/create_observability_ai_assistant_app_es_client.ts new file mode 100644 index 0000000000000..babb6c1273370 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/clients/create_observability_ai_assistant_app_es_client.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { createObservabilityEsClient } from '@kbn/observability-utils-server/es/client/create_observability_es_client'; +import { ElasticsearchClient, Logger } from '@kbn/core/server'; + +export async function createObservabilityAIAssistantAppEsClient({ + client, + logger, +}: { + client: ElasticsearchClient; + logger: Logger; +}) { + const esClient = createObservabilityEsClient({ + client, + logger, + plugin: 'observabilityAIAssistantApp', + }); + + return esClient; +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/types.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/types.ts index 0a3fc6d9dc12d..cd9f578d99093 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/types.ts +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/types.ts @@ -35,6 +35,8 @@ import type { ObservabilityPluginSetup } from '@kbn/observability-plugin/server' import type { InferenceServerStart, InferenceServerSetup } from '@kbn/inference-plugin/server'; import type { LogsDataAccessPluginStart } from '@kbn/logs-data-access-plugin/server'; import type { LlmTasksPluginStart } from '@kbn/llm-tasks-plugin/server'; +import type { SLOServerStart, SLOServerSetup } from '@kbn/slo-plugin/server'; +import type { SpacesPluginStart, SpacesPluginSetup } from '@kbn/spaces-plugin/server'; // eslint-disable-next-line @typescript-eslint/no-empty-interface export interface ObservabilityAIAssistantAppServerStart {} @@ -55,6 +57,8 @@ export interface ObservabilityAIAssistantAppPluginStartDependencies { serverless?: ServerlessPluginStart; inference: InferenceServerStart; logsDataAccess: LogsDataAccessPluginStart; + slo: SLOServerStart; + spaces: SpacesPluginStart; llmTasks: LlmTasksPluginStart; } @@ -72,4 +76,6 @@ export interface ObservabilityAIAssistantAppPluginSetupDependencies { cloud?: CloudSetup; serverless?: ServerlessPluginSetup; inference: InferenceServerSetup; + slo: SLOServerSetup; + spaces: SpacesPluginSetup; } diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/util/get_log_sources.ts b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/util/get_log_sources.ts new file mode 100644 index 0000000000000..6532e686a2eb2 --- /dev/null +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/server/util/get_log_sources.ts @@ -0,0 +1,13 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { IUiSettingsClient } from '@kbn/core/server'; +import { OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID } from '@kbn/management-settings-ids'; + +export function getLogSources({ uiSettings }: { uiSettings: IUiSettingsClient }) { + return uiSettings.get(OBSERVABILITY_LOGS_DATA_ACCESS_LOG_SOURCES_ID) as Promise; +} diff --git a/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json b/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json index e0a520fb574c7..cc104cddc7288 100644 --- a/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json +++ b/x-pack/plugins/observability_solution/observability_ai_assistant_app/tsconfig.json @@ -10,7 +10,8 @@ "scripts/**/*", "typings/**/*", "public/**/*.json", - "server/**/*" + "server/**/*", + ".storybook/**/*.tsx" ], "kbn_references": [ "@kbn/es-types", @@ -69,7 +70,14 @@ "@kbn/cloud-plugin", "@kbn/logs-data-access-plugin", "@kbn/ai-assistant-common", + "@kbn/observability-utils-server", + "@kbn/observability-utils-common", + "@kbn/slo-plugin", + "@kbn/spaces-plugin", + "@kbn/data-service", "@kbn/inference-common", + "@kbn/observability-ai-server", + "@kbn/observability-ai-common", "@kbn/llm-tasks-plugin", "@kbn/product-doc-common", ], diff --git a/x-pack/plugins/observability_solution/slo/server/client/index.ts b/x-pack/plugins/observability_solution/slo/server/client/index.ts new file mode 100644 index 0000000000000..277aeb71399b1 --- /dev/null +++ b/x-pack/plugins/observability_solution/slo/server/client/index.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import type { + ElasticsearchClient, + KibanaRequest, + SavedObjectsClientContract, +} from '@kbn/core/server'; +import { castArray, once } from 'lodash'; +import { getListOfSummaryIndices, getSloSettings } from '../services/slo_settings'; + +export interface SloClient { + getSummaryIndices(): Promise; +} + +export function getSloClientWithRequest({ + esClient, + soClient, +}: { + request: KibanaRequest; + esClient: ElasticsearchClient; + soClient: SavedObjectsClientContract; +}): SloClient { + const getListOfSummaryIndicesOnce = once(async () => { + const settings = await getSloSettings(soClient); + + const { indices } = await getListOfSummaryIndices(esClient, settings); + + return castArray(indices); + }); + + return { + getSummaryIndices: async () => { + return await getListOfSummaryIndicesOnce(); + }, + }; +} diff --git a/x-pack/plugins/observability_solution/slo/server/index.ts b/x-pack/plugins/observability_solution/slo/server/index.ts index 76806ac4d16cd..b38956141137d 100644 --- a/x-pack/plugins/observability_solution/slo/server/index.ts +++ b/x-pack/plugins/observability_solution/slo/server/index.ts @@ -16,6 +16,10 @@ export async function plugin(ctx: PluginInitializerContext) { return new SLOPlugin(ctx); } +export type { SloClient } from './client'; + +export type { SLOServerSetup, SLOServerStart } from './types'; + export const config: PluginConfigDescriptor = { schema: configSchema, exposeToBrowser: { diff --git a/x-pack/plugins/observability_solution/slo/server/plugin.ts b/x-pack/plugins/observability_solution/slo/server/plugin.ts index 99cd4a2230a94..e7b25fe06ef32 100644 --- a/x-pack/plugins/observability_solution/slo/server/plugin.ts +++ b/x-pack/plugins/observability_solution/slo/server/plugin.ts @@ -9,6 +9,7 @@ import { CoreSetup, CoreStart, DEFAULT_APP_CATEGORIES, + KibanaRequest, Logger, Plugin, PluginInitializerContext, @@ -36,6 +37,7 @@ import type { SLOServerSetup, SLOServerStart, } from './types'; +import { getSloClientWithRequest } from './client'; const sloRuleTypes = [SLO_BURN_RATE_RULE_TYPE_ID]; @@ -179,6 +181,14 @@ export class SLOPlugin ?.start(plugins.taskManager, internalSoClient, internalEsClient) .catch(() => {}); - return {}; + return { + getSloClientWithRequest: (request: KibanaRequest) => { + return getSloClientWithRequest({ + request, + soClient: core.savedObjects.getScopedClient(request), + esClient: internalEsClient, + }); + }, + }; } } diff --git a/x-pack/plugins/observability_solution/slo/server/types.ts b/x-pack/plugins/observability_solution/slo/server/types.ts index 9a40547820182..5ec6e2c51e902 100644 --- a/x-pack/plugins/observability_solution/slo/server/types.ts +++ b/x-pack/plugins/observability_solution/slo/server/types.ts @@ -21,14 +21,17 @@ import { TaskManagerStartContract, } from '@kbn/task-manager-plugin/server'; import { UsageCollectionSetup } from '@kbn/usage-collection-plugin/server'; +import type { KibanaRequest } from '@kbn/core/server'; +import type { SloClient } from './client'; export type { SLOConfig } from '../common/config'; // eslint-disable-next-line @typescript-eslint/no-empty-interface export interface SLOServerSetup {} -// eslint-disable-next-line @typescript-eslint/no-empty-interface -export interface SLOServerStart {} +export interface SLOServerStart { + getSloClientWithRequest: (request: KibanaRequest) => SloClient; +} export interface SLOPluginSetupDependencies { alerting: AlertingServerSetup; diff --git a/yarn.lock b/yarn.lock index 47aad7866ae0c..b16576e2fb0f0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6460,6 +6460,14 @@ version "0.0.0" uid "" +"@kbn/observability-ai-common@link:x-pack/packages/observability/observability_ai/observability_ai_common": + version "0.0.0" + uid "" + +"@kbn/observability-ai-server@link:x-pack/packages/observability/observability_ai/observability_ai_server": + version "0.0.0" + uid "" + "@kbn/observability-alert-details@link:x-pack/packages/observability/alert_details": version "0.0.0" uid "" @@ -19586,6 +19594,11 @@ functions-have-names@^1.2.3: resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== +fuse.js@^7.0.0: + version "7.0.0" + resolved "https://registry.yarnpkg.com/fuse.js/-/fuse.js-7.0.0.tgz#6573c9fcd4c8268e403b4fc7d7131ffcf99a9eb2" + integrity sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q== + gauge@^3.0.0: version "3.0.2" resolved "https://registry.yarnpkg.com/gauge/-/gauge-3.0.2.tgz#03bf4441c044383908bcfa0656ad91803259b395"