From 4566ef71eca07bf0b6e386e277d6364bfe8cd4fa Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Mon, 12 Feb 2024 16:33:17 +0100 Subject: [PATCH] [ML] AIOps: Use `ml_standard` tokenizer for log rate analysis. (#176587) ## Summary Fixes #176387. The `standard` analyser for log pattern analysis introduced in #172188 might return patterns that mess with the identifying of significant patterns across time ranges, for example if a pattern matches different parts of a date or time. This adds an update that allows to set the analyser for log rate analysis to `ml_standard` but keep `standard` for log pattern analysis. ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [x] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed - [x] This was checked for breaking API changes and was [labeled appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) --- .../create_category_request.ts | 5 +- .../queries/fetch_categories.test.ts | 55 ------------------- .../queries/fetch_categories.ts | 5 +- .../apps/aiops/log_rate_analysis.ts | 3 +- 4 files changed, 8 insertions(+), 60 deletions(-) diff --git a/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts b/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts index 1c5a4745064b5..a5a3efae6f450 100644 --- a/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts +++ b/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts @@ -31,7 +31,8 @@ export function createCategoryRequest( queryIn: QueryDslQueryContainer, wrap: ReturnType['wrap'], intervalMs?: number, - additionalFilter?: CategorizationAdditionalFilter + additionalFilter?: CategorizationAdditionalFilter, + useStandardTokenizer: boolean = true ) { const query = createCategorizeQuery(queryIn, timeField, timeRange); const aggs = { @@ -39,7 +40,7 @@ export function createCategoryRequest( categorize_text: { field, size: CATEGORY_LIMIT, - categorization_analyzer: categorizationAnalyzer, + ...(useStandardTokenizer ? { categorization_analyzer: categorizationAnalyzer } : {}), }, aggs: { examples: { diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts index e6e79108435e2..280ffd9ed907f 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts @@ -85,61 +85,6 @@ describe('getCategoryRequest', () => { aggs: { categories: { categorize_text: { - categorization_analyzer: { - char_filter: ['first_line_with_letters'], - tokenizer: 'standard', - filter: [ - { - type: 'stop', - stopwords: [ - 'Monday', - 'Tuesday', - 'Wednesday', - 'Thursday', - 'Friday', - 'Saturday', - 'Sunday', - 'Mon', - 'Tue', - 'Wed', - 'Thu', - 'Fri', - 'Sat', - 'Sun', - 'January', - 'February', - 'March', - 'April', - 'May', - 'June', - 'July', - 'August', - 'September', - 'October', - 'November', - 'December', - 'Jan', - 'Feb', - 'Mar', - 'Apr', - 'May', - 'Jun', - 'Jul', - 'Aug', - 'Sep', - 'Oct', - 'Nov', - 'Dec', - 'GMT', - 'UTC', - ], - }, - { - type: 'limit', - max_token_count: '100', - }, - ], - }, field: 'the-field-name', size: 1000, }, diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.ts index bbb64bc95cd30..20fd551873b1c 100644 --- a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.ts +++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.ts @@ -76,7 +76,10 @@ export const getCategoryRequest = ( timeFieldName, undefined, query, - wrap + wrap, + undefined, + undefined, + false ); // In this case we're only interested in the aggregation which diff --git a/x-pack/test/functional/apps/aiops/log_rate_analysis.ts b/x-pack/test/functional/apps/aiops/log_rate_analysis.ts index 9799d4418d729..45fef76fa7170 100644 --- a/x-pack/test/functional/apps/aiops/log_rate_analysis.ts +++ b/x-pack/test/functional/apps/aiops/log_rate_analysis.ts @@ -36,8 +36,7 @@ export default function ({ getPageObjects, getService }: FtrProviderContext) { await ml.jobSourceSelection.selectSourceForLogRateAnalysis(testData.sourceIndexOrSavedSearch); }); - // FLAKY: https://github.com/elastic/kibana/issues/176387 - it.skip(`${testData.suiteTitle} displays index details`, async () => { + it(`${testData.suiteTitle} displays index details`, async () => { await ml.testExecution.logTestStep(`${testData.suiteTitle} displays the time range step`); await aiops.logRateAnalysisPage.assertTimeRangeSelectorSectionExists();