Skip to content

Commit

Permalink
[ML] AIOps: Fix grouping for fields with large arrays. (elastic#177438)
Browse files Browse the repository at this point in the history
## Summary

Fixes edge cases for datasets with large arrays within single fields:

- Deduplicates groups as a final step of creating groups.
- Limits how many values (50) to use per field for the
`frequent_item_sets` aggregations.
- Fixes the `should` clauses for the query for `frequent_item_sets`, the
previous version of the query could be too narrow for fields with arrays
and return no results.
- For the fallback analysis when either deviation or baseline returns no
docs, increases the limit from 10 to 100 docs.
- It turned out the grouping for array values of fields had another bug:
Because we treated the field/values of a group as a dictionary/record
like structure, this didn't hold multiple values for a single field. The
code was changed in this PR so it is an array of field/value pairs which
now supports multiple values per field.
- On the client side, fixes unique keys for the group item badges if
there's multiple items for the same field.

Adds API integration tests for a dataset with large arrays. This dataset
also triggers slowness of the `frequent_item_sets` agg and can be used
for a performance journey in a follow up. Without the new limit for how
many values per field to use, these new tests would fail because the agg
cases a timeout. The assertions for chunk and action lengths were
removed because they are flaky for longer running requests (because of
how we implemented flush fix and keep alive behavior).

Dataset to test behavior:
[aiops-lra-frequent-items-array.ndjson.zip](https://github.com/elastic/kibana/files/14362105/aiops-lra-frequent-items-array.ndjson.zip)

Without this PR, the dataset would cause the grouping part of log rate
analysis to time out. With this PR, it's still slow for just 18 docs,
but it is able to return results.

Video to replicate the test: Upload via ML File Upload, Adjust the date
picker in Log Rate Analysis, then run the analysis.


[aiops-log-rate-analysis-arrays-0001.webm](https://github.com/elastic/kibana/assets/230104/5d5ce34b-37ef-4e9f-81ae-f8002c194f88)


### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
  • Loading branch information
walterra authored Feb 23, 2024
1 parent 36ebbb2 commit 0d19e5e
Show file tree
Hide file tree
Showing 22 changed files with 2,780 additions and 199 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@ import type { ItemSet } from '../../types';

export const filteredFrequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
set: [
{ fieldName: 'response_code', fieldValue: '500' },
{ fieldName: 'url', fieldValue: 'home.php' },
],
size: 2,
maxPValue: 0.010770456205312423,
doc_count: 792,
support: 0.5262458471760797,
total_doc_count: 1505,
},
{
set: { user: 'Peter', url: 'home.php' },
set: [
{ fieldName: 'user', fieldValue: 'Peter' },
{ fieldName: 'url', fieldValue: 'home.php' },
],
size: 2,
maxPValue: 0.010770456205312423,
doc_count: 634,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,103 +9,103 @@ import type { SignificantItemGroup } from '@kbn/ml-agg-utils';

export const finalSignificantItemGroups: SignificantItemGroup[] = [
{
docCount: 632,
group: [
{
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
docCount: 790,
duplicate: 2,
pValue: 0.012783309213417932,
},
{
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
docCount: 632,
duplicate: 2,
pValue: 0.012783309213417932,
},
],
id: '1937394803',
pValue: 0.012783309213417932,
},
{
docCount: 792,
id: '2675980076',
group: [
{
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
docCount: 792,
duplicate: 2,
pValue: 0.012783309213417932,
duplicate: 2,
},
{
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
docCount: 792,
duplicate: 2,
pValue: 0.00974308761016614,
duplicate: 2,
},
],
id: '2675980076',
docCount: 792,
pValue: 0.00974308761016614,
},
{
docCount: 790,
id: '3819687732',
group: [
{
key: 'response_code:500',
type: 'keyword',
fieldName: 'response_code',
fieldValue: '500',
docCount: 792,
duplicate: 2,
pValue: 0.012783309213417932,
duplicate: 2,
},
{
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
docCount: 790,
duplicate: 2,
pValue: 0.012783309213417932,
duplicate: 2,
},
],
id: '3819687732',
docCount: 790,
pValue: 0.012783309213417932,
},
{
docCount: 636,
id: '2091742187',
group: [
{
key: 'url:home.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'home.php',
docCount: 792,
duplicate: 2,
pValue: 0.00974308761016614,
duplicate: 2,
},
{
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
docCount: 636,
duplicate: 2,
pValue: 0.00974308761016614,
duplicate: 2,
},
],
id: '2091742187',
docCount: 636,
pValue: 0.00974308761016614,
},
{
id: '1937394803',
group: [
{
key: 'url:login.php',
type: 'keyword',
fieldName: 'url',
fieldValue: 'login.php',
docCount: 790,
pValue: 0.012783309213417932,
duplicate: 2,
},
{
key: 'user:Peter',
type: 'keyword',
fieldName: 'user',
fieldValue: 'Peter',
docCount: 632,
pValue: 0.012783309213417932,
duplicate: 2,
},
],
docCount: 632,
pValue: 0.012783309213417932,
},
];
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,54 @@ import type { ItemSet } from '../../types';

export const frequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
set: [
{ fieldName: 'response_code', fieldValue: '500' },
{ fieldName: 'url', fieldValue: 'home.php' },
],
size: 2,
maxPValue: 0.00974308761016614,
doc_count: 792,
support: 0.2703994537384773,
total_doc_count: 2929,
},
{
set: { response_code: '500', url: 'login.php' },
set: [
{ fieldName: 'response_code', fieldValue: '500' },
{ fieldName: 'url', fieldValue: 'login.php' },
],
size: 2,
maxPValue: 0.012783309213417932,
doc_count: 790,
support: 0.2697166268350973,
total_doc_count: 2929,
},
{
set: { user: 'Peter', url: 'home.php' },
set: [
{ fieldName: 'user', fieldValue: 'Peter' },
{ fieldName: 'url', fieldValue: 'home.php' },
],
size: 2,
maxPValue: 0.00974308761016614,
doc_count: 636,
support: 0.21713895527483784,
total_doc_count: 2929,
},
{
set: { user: 'Peter', url: 'login.php' },
set: [
{ fieldName: 'user', fieldValue: 'Peter' },
{ fieldName: 'url', fieldValue: 'login.php' },
],
size: 2,
maxPValue: 0.012783309213417932,
doc_count: 632,
support: 0.21577330146807785,
total_doc_count: 2929,
},
{
set: { response_code: '500', user: 'Peter' },
set: [
{ fieldName: 'response_code', fieldValue: '500' },
{ fieldName: 'user', fieldValue: 'Peter' },
],
size: 2,
maxPValue: 3.6085657805889595e-12,
doc_count: 79,
Expand Down
8 changes: 8 additions & 0 deletions x-pack/plugins/aiops/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ export const LOG_RATE_ANALYSIS_SETTINGS = {
* The minimum support value to be used for the frequent item sets aggration.
*/
FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001,
/**
* The maximum values per field to be used for the frequent item sets aggration.
*/
FREQUENT_ITEMS_SETS_FIELD_VALUE_LIMIT: 50,
/**
* The number of terms by field to fetch for the zero docs fallback analysis.
*/
TOP_TERMS_FALLBACK_SIZE: 100,
} as const;

/**
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugins/aiops/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export interface SignificantItemDuplicateGroup {
export type FieldValuePairCounts = Record<string, Record<string, number>>;

export interface ItemSet {
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
set: FieldValuePair[];
size: number;
maxPValue: number;
doc_count: number;
Expand Down
Loading

0 comments on commit 0d19e5e

Please sign in to comment.