From 302c3cb7393c88ff3d200a386aef240f7578a291 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Fri, 27 Jul 2018 12:14:57 +0200 Subject: [PATCH] [ML] Improves bucket span estimator stability. (#21282) (#21330) - Fixes the bucket span estimator when median is selected as a detector function. agg.type.name is median and therefor not usable for an Elasticsearch aggregation. agg.type.dslName is percentile and is the correct mapping. .dslName is also used for the aggregations used for the preview charts. - 7.0 will introduce a search.max_buckets setting which defaults to 10000. This could lead to failing bucket estimations because the values used for creating the required aggregations could result in more buckets. This PR fixes it by taking search.max_buckets into account when calculating the time range used for the bucket estimation. (Since 6.2 that setting is available so backporting this to current unreleased minor releases 6.4 and 6.5) --- .../bucket_span_estimator_directive.js | 4 +- .../__tests__/bucket_span_estimator.js | 2 +- .../bucket_span_estimator.js | 87 ++++++++++++------- 3 files changed, 59 insertions(+), 34 deletions(-) diff --git a/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js b/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js index dbdb857238dae..a4fa1979a62a9 100644 --- a/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js +++ b/x-pack/plugins/ml/public/jobs/new_job/simple/components/bucket_span_estimator/bucket_span_estimator_directive.js @@ -66,14 +66,14 @@ module.directive('mlBucketSpanEstimator', function () { // single metric config const fieldName = ($scope.formConfig.field === null) ? null : $scope.formConfig.field.name; data.fields.push(fieldName); - data.aggTypes.push($scope.formConfig.agg.type.name); + data.aggTypes.push($scope.formConfig.agg.type.dslName); } else { // multi metric config Object.keys($scope.formConfig.fields).map((id) => { const field = $scope.formConfig.fields[id]; const fieldName = (field.id === EVENT_RATE_COUNT_FIELD) ? null : field.name; data.fields.push(fieldName); - data.aggTypes.push(field.agg.type.name); + data.aggTypes.push(field.agg.type.dslName); }); } diff --git a/x-pack/plugins/ml/server/models/bucket_span_estimator/__tests__/bucket_span_estimator.js b/x-pack/plugins/ml/server/models/bucket_span_estimator/__tests__/bucket_span_estimator.js index 97c8245908b9e..5c11cab9224db 100644 --- a/x-pack/plugins/ml/server/models/bucket_span_estimator/__tests__/bucket_span_estimator.js +++ b/x-pack/plugins/ml/server/models/bucket_span_estimator/__tests__/bucket_span_estimator.js @@ -40,7 +40,7 @@ describe('ML - BucketSpanEstimator', () => { } } }).catch((catchData) => { - expect(catchData).to.be('BucketSpanEstimator: run has stopped because no checks returned a valid interval'); + expect(catchData).to.be('Unable to retrieve cluster setting search.max_buckets'); done(); }); diff --git a/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js b/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js index 26b01e044e7e6..0f345f2826695 100644 --- a/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js +++ b/x-pack/plugins/ml/server/models/bucket_span_estimator/bucket_span_estimator.js @@ -17,7 +17,7 @@ export function estimateBucketSpanFactory(callWithRequest) { const SingleSeriesChecker = singleSeriesCheckerFactory(callWithRequest); class BucketSpanEstimator { - constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues) { + constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues, maxBuckets) { this.index = index; this.timeField = timeField; this.aggTypes = aggTypes; @@ -32,9 +32,15 @@ export function estimateBucketSpanFactory(callWithRequest) { minimumBucketSpanMS: 0 }; - // only run the tests over the last 250 hours of data + // determine durations for bucket span estimation + // taking into account the clusters' search.max_buckets settings + // the polled_data_checker uses an aggregation interval of 1 minute + // so that's the smallest interval we have to check for not to + // exceed search.max_buckets. + const ONE_MINUTE_MS = 60000; const ONE_HOUR_MS = 3600000; - const HOUR_MULTIPLIER = 250; + // only run the tests over the last 250 hours of data at max + const HOUR_MULTIPLIER = Math.min(250, Math.floor((maxBuckets * ONE_MINUTE_MS) / ONE_HOUR_MS)); const timePickerDurationLength = (this.duration.end - this.duration.start); const multiplierDurationLength = (ONE_HOUR_MS * HOUR_MULTIPLIER); @@ -315,35 +321,54 @@ export function estimateBucketSpanFactory(callWithRequest) { } return new Promise((resolve, reject) => { - const runEstimator = (splitFieldValues = []) => { - const bucketSpanEstimator = new BucketSpanEstimator( - formConfig, - splitFieldValues - ); - - bucketSpanEstimator.run() - .then((resp) => { - resolve(resp); - }) - .catch((resp) => { - reject(resp); - }); - }; + // fetch the `search.max_buckets` cluster setting so we're able to + // adjust aggregations to not exceed that limit. + callWithRequest('cluster.getSettings', { + flatSettings: true, + includeDefaults: true, + filterPath: '*.*max_buckets' + }) + .then((settings) => { + if (typeof settings !== 'object' || typeof settings.defaults !== 'object') { + reject('Unable to retrieve cluster setting search.max_buckets'); + } - // a partition has been selected, so we need to load some field values to use in the - // bucket span tests. - if (formConfig.splitField !== undefined) { - getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query) - .then((splitFieldValues) => { - runEstimator(splitFieldValues); - }) - .catch((resp) => { - reject(resp); - }); - } else { - // no partition field selected or we're in the single metric config - runEstimator(); - } + const maxBuckets = parseInt(settings.defaults['search.max_buckets']); + + const runEstimator = (splitFieldValues = []) => { + const bucketSpanEstimator = new BucketSpanEstimator( + formConfig, + splitFieldValues, + maxBuckets + ); + + bucketSpanEstimator.run() + .then((resp) => { + resolve(resp); + }) + .catch((resp) => { + reject(resp); + }); + }; + + // a partition has been selected, so we need to load some field values to use in the + // bucket span tests. + if (formConfig.splitField !== undefined) { + getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query) + .then((splitFieldValues) => { + runEstimator(splitFieldValues); + }) + .catch((resp) => { + reject(resp); + }); + } else { + // no partition field selected or we're in the single metric config + runEstimator(); + } + }) + .catch((resp) => { + reject(resp); + }); }); };