elastic · jgowdyelastic · Dec 5, 2023 · Nov 29, 2023 · Nov 30, 2023 · Nov 30, 2023
diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/significant_log_patterns.ts
@@ -18,8 +18,8 @@ export const significantLogPatterns: SignificantItem[] = [
     normalizedScore: 0,
     pValue: 0.000001,
     score: -13.815510557964274,
-    total_bg_count: 1975,
-    total_doc_count: 4669,
+    total_bg_count: 2528,
+    total_doc_count: 6650,
     type: 'log_pattern',
   },
 ];
diff --git a/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts b/x-pack/plugins/aiops/common/api/log_categorization/create_category_request.ts
@@ -5,7 +5,10 @@
  * 2.0.
  */
 
-import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
+import type {
+  QueryDslQueryContainer,
+  AggregationsCustomCategorizeTextAnalyzer,
+} from '@elastic/elasticsearch/lib/api/types';
 
 import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';
 
@@ -30,6 +33,7 @@ export function createCategoryRequest(
       categorize_text: {
         field,
         size: CATEGORY_LIMIT,
+        categorization_analyzer: categorizationAnalyzer,
       },
       aggs: {
         hit: {
@@ -64,3 +68,61 @@ export function createCategoryRequest(
     },
   };
 }
+
+const categorizationAnalyzer: AggregationsCustomCategorizeTextAnalyzer = {
+  char_filter: ['first_line_with_letters'],
+  tokenizer: 'standard',
+  filter: [
+    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
+    {
+      type: 'stop',
+      stopwords: [
+        'Monday',
+        'Tuesday',
+        'Wednesday',
+        'Thursday',
+        'Friday',
+        'Saturday',
+        'Sunday',
+        'Mon',
+        'Tue',
+        'Wed',
+        'Thu',
+        'Fri',
+        'Sat',
+        'Sun',
+        'January',
+        'February',
+        'March',
+        'April',
+        'May',
+        'June',
+        'July',
+        'August',
+        'September',
+        'October',
+        'November',
+        'December',
+        'Jan',
+        'Feb',
+        'Mar',
+        'Apr',
+        'May',
+        'Jun',
+        'Jul',
+        'Aug',
+        'Sep',
+        'Oct',
+        'Nov',
+        'Dec',
+        'GMT',
+        'UTC',
+      ],
+    },
+    // @ts-expect-error filter type in AggregationsCustomCategorizeTextAnalyzer is incorrect
+    {
+      type: 'limit',
+      max_token_count: '100',
+    },
+  ],
+};
diff --git a/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts b/x-pack/plugins/aiops/server/routes/log_rate_analysis/queries/fetch_categories.test.ts
@@ -84,7 +84,65 @@ describe('getCategoryRequest', () => {
             random_sampler: { probability: 0.1, seed: 1234 },
             aggs: {
               categories: {
-                categorize_text: { field: 'the-field-name', size: 1000 },
+                categorize_text: {
+                  categorization_analyzer: {
+                    char_filter: ['first_line_with_letters'],
+                    tokenizer: 'standard',
+                    filter: [
+                      {
+                        type: 'stop',
+                        stopwords: [
+                          'Monday',
+                          'Tuesday',
+                          'Wednesday',
+                          'Thursday',
+                          'Friday',
+                          'Saturday',
+                          'Sunday',
+                          'Mon',
+                          'Tue',
+                          'Wed',
+                          'Thu',
+                          'Fri',
+                          'Sat',
+                          'Sun',
+                          'January',
+                          'February',
+                          'March',
+                          'April',
+                          'May',
+                          'June',
+                          'July',
+                          'August',
+                          'September',
+                          'October',
+                          'November',
+                          'December',
+                          'Jan',
+                          'Feb',
+                          'Mar',
+                          'Apr',
+                          'May',
+                          'Jun',
+                          'Jul',
+                          'Aug',
+                          'Sep',
+                          'Oct',
+                          'Nov',
+                          'Dec',
+                          'GMT',
+                          'UTC',
+                        ],
+                      },
+                      {
+                        type: 'limit',
+                        max_token_count: '100',
+                      },
+                    ],
+                  },
+                  field: 'the-field-name',
+                  size: 1000,
+                },
                 aggs: {
                   hit: {
                     top_hits: { size: 1, sort: ['the-time-field-name'], _source: 'the-field-name' },