From 967c5c147825d21691f0a572dfdb069cb2e3cccf Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Tue, 18 Jun 2024 09:49:50 +0200 Subject: [PATCH] [ML] AIOps Log Rate Analysis: Fix text field selection (#186176) If we analyse all detected text fields, we might run into performance issues with the `categorize_text` aggregation. Until this is resolved, we will rely on a predefined white list of supported text fields, for now set to `message` and `error.message`. (cherry picked from commit d3b81237ee3138e7a62ae34c19f65109b751f0b3) --- .../queries/__mocks__/field_caps_pgbench.ts | 10 ++++++++++ .../queries/fetch_index_info.ts | 8 +++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/x-pack/packages/ml/aiops_log_rate_analysis/queries/__mocks__/field_caps_pgbench.ts b/x-pack/packages/ml/aiops_log_rate_analysis/queries/__mocks__/field_caps_pgbench.ts index a4d85d8673971..bc3aec796ebe6 100644 --- a/x-pack/packages/ml/aiops_log_rate_analysis/queries/__mocks__/field_caps_pgbench.ts +++ b/x-pack/packages/ml/aiops_log_rate_analysis/queries/__mocks__/field_caps_pgbench.ts @@ -8,6 +8,16 @@ export const fieldCapsPgBenchMock = { indices: ['.ds-filebeat-8.2.0-2022.06.07-000082'], fields: { + // The next two fields are not in the original field caps response, + // but are added here to test the logic to ignore fields that are not + // in the white list. It's based on a real world example where the mapping + // included a double mapping of text+integer. + ignore_this_text_field: { + text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false }, + }, + 'ignore_this_text_field.int': { + integer: { type: 'integer', metadata_field: false, searchable: true, aggregatable: true }, + }, 'kubernetes.node.uid': { keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true }, }, diff --git a/x-pack/packages/ml/aiops_log_rate_analysis/queries/fetch_index_info.ts b/x-pack/packages/ml/aiops_log_rate_analysis/queries/fetch_index_info.ts index 1bb5b701fdd17..458fc630b5009 100644 --- a/x-pack/packages/ml/aiops_log_rate_analysis/queries/fetch_index_info.ts +++ b/x-pack/packages/ml/aiops_log_rate_analysis/queries/fetch_index_info.ts @@ -17,6 +17,12 @@ import { getTotalDocCountRequest } from './get_total_doc_count_request'; // TODO Consolidate with duplicate `fetchPValues` in // `x-pack/plugins/observability_solution/apm/server/routes/correlations/queries/fetch_duration_field_candidates.ts` +// Supported field names for text fields for log rate analysis. +// If we analyse all detected text fields, we might run into performance +// issues with the `categorize_text` aggregation. Until this is resolved, we +// rely on a predefined white list of supported text fields. +const TEXT_FIELD_WHITE_LIST = ['message', 'error.message']; + const SUPPORTED_ES_FIELD_TYPES = [ ES_FIELD_TYPES.KEYWORD, ES_FIELD_TYPES.IP, @@ -76,7 +82,7 @@ export const fetchIndexInfo = async ( acceptableFields.add(key); } - if (isTextField) { + if (isTextField && TEXT_FIELD_WHITE_LIST.includes(key)) { acceptableTextFields.add(key); }