[8.13] [ML] AIOps: Fix grouping for fields with large arrays. (#177438)…

… (#177765) # Backport This will backport the following commits from `main` to `8.13`: - [[ML] AIOps: Fix grouping for fields with large arrays. (#177438)](#177438)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Walter Rafelsberger <[email protected]>
elastic · Feb 24, 2024 · 1a61d3c · 1a61d3c
1 parent 4e4bca2
commit 1a61d3c
Show file tree

Hide file tree

Showing 22 changed files with 2,780 additions and 199 deletions.
diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/filtered_frequent_item_sets.ts
@@ -9,15 +9,21 @@ import type { ItemSet } from '../../types';
 
 export const filteredFrequentItemSets: ItemSet[] = [
   {
-    set: { response_code: '500', url: 'home.php' },
+    set: [
+      { fieldName: 'response_code', fieldValue: '500' },
+      { fieldName: 'url', fieldValue: 'home.php' },
+    ],
     size: 2,
     maxPValue: 0.010770456205312423,
     doc_count: 792,
     support: 0.5262458471760797,
     total_doc_count: 1505,
   },
   {
-    set: { user: 'Peter', url: 'home.php' },
+    set: [
+      { fieldName: 'user', fieldValue: 'Peter' },
+      { fieldName: 'url', fieldValue: 'home.php' },
+    ],
     size: 2,
     maxPValue: 0.010770456205312423,
     doc_count: 634,

diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_item_groups.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/final_significant_item_groups.ts
@@ -9,103 +9,103 @@ import type { SignificantItemGroup } from '@kbn/ml-agg-utils';
 
 export const finalSignificantItemGroups: SignificantItemGroup[] = [
   {
-    docCount: 632,
-    group: [
-      {
-        key: 'url:login.php',
-        type: 'keyword',
-        fieldName: 'url',
-        fieldValue: 'login.php',
-        docCount: 790,
-        duplicate: 2,
-        pValue: 0.012783309213417932,
-      },
-      {
-        key: 'user:Peter',
-        type: 'keyword',
-        fieldName: 'user',
-        fieldValue: 'Peter',
-        docCount: 632,
-        duplicate: 2,
-        pValue: 0.012783309213417932,
-      },
-    ],
-    id: '1937394803',
-    pValue: 0.012783309213417932,
-  },
-  {
-    docCount: 792,
+    id: '2675980076',
     group: [
       {
         key: 'response_code:500',
         type: 'keyword',
         fieldName: 'response_code',
         fieldValue: '500',
         docCount: 792,
-        duplicate: 2,
         pValue: 0.012783309213417932,
+        duplicate: 2,
       },
       {
         key: 'url:home.php',
         type: 'keyword',
         fieldName: 'url',
         fieldValue: 'home.php',
         docCount: 792,
-        duplicate: 2,
         pValue: 0.00974308761016614,
+        duplicate: 2,
       },
     ],
-    id: '2675980076',
+    docCount: 792,
     pValue: 0.00974308761016614,
   },
   {
-    docCount: 790,
+    id: '3819687732',
     group: [
       {
         key: 'response_code:500',
         type: 'keyword',
         fieldName: 'response_code',
         fieldValue: '500',
         docCount: 792,
-        duplicate: 2,
         pValue: 0.012783309213417932,
+        duplicate: 2,
       },
       {
         key: 'url:login.php',
         type: 'keyword',
         fieldName: 'url',
         fieldValue: 'login.php',
         docCount: 790,
-        duplicate: 2,
         pValue: 0.012783309213417932,
+        duplicate: 2,
       },
     ],
-    id: '3819687732',
+    docCount: 790,
     pValue: 0.012783309213417932,
   },
   {
-    docCount: 636,
+    id: '2091742187',
     group: [
       {
         key: 'url:home.php',
         type: 'keyword',
         fieldName: 'url',
         fieldValue: 'home.php',
         docCount: 792,
-        duplicate: 2,
         pValue: 0.00974308761016614,
+        duplicate: 2,
       },
       {
         key: 'user:Peter',
         type: 'keyword',
         fieldName: 'user',
         fieldValue: 'Peter',
         docCount: 636,
-        duplicate: 2,
         pValue: 0.00974308761016614,
+        duplicate: 2,
       },
     ],
-    id: '2091742187',
+    docCount: 636,
     pValue: 0.00974308761016614,
   },
+  {
+    id: '1937394803',
+    group: [
+      {
+        key: 'url:login.php',
+        type: 'keyword',
+        fieldName: 'url',
+        fieldValue: 'login.php',
+        docCount: 790,
+        pValue: 0.012783309213417932,
+        duplicate: 2,
+      },
+      {
+        key: 'user:Peter',
+        type: 'keyword',
+        fieldName: 'user',
+        fieldValue: 'Peter',
+        docCount: 632,
+        pValue: 0.012783309213417932,
+        duplicate: 2,
+      },
+    ],
+    docCount: 632,
+    pValue: 0.012783309213417932,
+  },
 ];
diff --git a/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts b/x-pack/plugins/aiops/common/__mocks__/artificial_logs/frequent_item_sets.ts
@@ -9,39 +9,54 @@ import type { ItemSet } from '../../types';
 
 export const frequentItemSets: ItemSet[] = [
   {
-    set: { response_code: '500', url: 'home.php' },
+    set: [
+      { fieldName: 'response_code', fieldValue: '500' },
+      { fieldName: 'url', fieldValue: 'home.php' },
+    ],
     size: 2,
     maxPValue: 0.00974308761016614,
     doc_count: 792,
     support: 0.2703994537384773,
     total_doc_count: 2929,
   },
   {
-    set: { response_code: '500', url: 'login.php' },
+    set: [
+      { fieldName: 'response_code', fieldValue: '500' },
+      { fieldName: 'url', fieldValue: 'login.php' },
+    ],
     size: 2,
     maxPValue: 0.012783309213417932,
     doc_count: 790,
     support: 0.2697166268350973,
     total_doc_count: 2929,
   },
   {
-    set: { user: 'Peter', url: 'home.php' },
+    set: [
+      { fieldName: 'user', fieldValue: 'Peter' },
+      { fieldName: 'url', fieldValue: 'home.php' },
+    ],
     size: 2,
     maxPValue: 0.00974308761016614,
     doc_count: 636,
     support: 0.21713895527483784,
     total_doc_count: 2929,
   },
   {
-    set: { user: 'Peter', url: 'login.php' },
+    set: [
+      { fieldName: 'user', fieldValue: 'Peter' },
+      { fieldName: 'url', fieldValue: 'login.php' },
+    ],
     size: 2,
     maxPValue: 0.012783309213417932,
     doc_count: 632,
     support: 0.21577330146807785,
     total_doc_count: 2929,
   },
   {
-    set: { response_code: '500', user: 'Peter' },
+    set: [
+      { fieldName: 'response_code', fieldValue: '500' },
+      { fieldName: 'user', fieldValue: 'Peter' },
+    ],
     size: 2,
     maxPValue: 3.6085657805889595e-12,
     doc_count: 79,

diff --git a/x-pack/plugins/aiops/common/constants.ts b/x-pack/plugins/aiops/common/constants.ts
@@ -14,6 +14,14 @@ export const LOG_RATE_ANALYSIS_SETTINGS = {
    * The minimum support value to be used for the frequent item sets aggration.
    */
   FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001,
+  /**
+   * The maximum values per field to be used for the frequent item sets aggration.
+   */
+  FREQUENT_ITEMS_SETS_FIELD_VALUE_LIMIT: 50,
+  /**
+   * The number of terms by field to fetch for the zero docs fallback analysis.
+   */
+  TOP_TERMS_FALLBACK_SIZE: 100,
 } as const;
 
 /**

diff --git a/x-pack/plugins/aiops/common/types.ts b/x-pack/plugins/aiops/common/types.ts
@@ -15,7 +15,7 @@ export interface SignificantItemDuplicateGroup {
 export type FieldValuePairCounts = Record<string, Record<string, number>>;
 
 export interface ItemSet {
-  set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
+  set: FieldValuePair[];
   size: number;
   maxPValue: number;
   doc_count: number;