Skip to content

Commit

Permalink
[ML] Improves bucket span estimator stability. (elastic#21282) (elast…
Browse files Browse the repository at this point in the history
…ic#21330)

- Fixes the bucket span estimator when median is selected as a detector function. agg.type.name is median and therefor not usable for an Elasticsearch aggregation. agg.type.dslName is percentile and is the correct mapping. .dslName is also used for the aggregations used for the preview charts.
- 7.0 will introduce a search.max_buckets setting which defaults to 10000. This could lead to failing bucket estimations because the values used for creating the required aggregations could result in more buckets. This PR fixes it by taking search.max_buckets into account when calculating the time range used for the bucket estimation. (Since 6.2 that setting is available so backporting this to current unreleased minor releases 6.4 and 6.5)
  • Loading branch information
walterra authored Jul 27, 2018
1 parent 6cf0032 commit 302c3cb
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ module.directive('mlBucketSpanEstimator', function () {
// single metric config
const fieldName = ($scope.formConfig.field === null) ? null : $scope.formConfig.field.name;
data.fields.push(fieldName);
data.aggTypes.push($scope.formConfig.agg.type.name);
data.aggTypes.push($scope.formConfig.agg.type.dslName);
} else {
// multi metric config
Object.keys($scope.formConfig.fields).map((id) => {
const field = $scope.formConfig.fields[id];
const fieldName = (field.id === EVENT_RATE_COUNT_FIELD) ? null : field.name;
data.fields.push(fieldName);
data.aggTypes.push(field.agg.type.name);
data.aggTypes.push(field.agg.type.dslName);
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ describe('ML - BucketSpanEstimator', () => {
}
}
}).catch((catchData) => {
expect(catchData).to.be('BucketSpanEstimator: run has stopped because no checks returned a valid interval');
expect(catchData).to.be('Unable to retrieve cluster setting search.max_buckets');
done();
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export function estimateBucketSpanFactory(callWithRequest) {
const SingleSeriesChecker = singleSeriesCheckerFactory(callWithRequest);

class BucketSpanEstimator {
constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues) {
constructor({ index, timeField, aggTypes, fields, duration, query, splitField }, splitFieldValues, maxBuckets) {
this.index = index;
this.timeField = timeField;
this.aggTypes = aggTypes;
Expand All @@ -32,9 +32,15 @@ export function estimateBucketSpanFactory(callWithRequest) {
minimumBucketSpanMS: 0
};

// only run the tests over the last 250 hours of data
// determine durations for bucket span estimation
// taking into account the clusters' search.max_buckets settings
// the polled_data_checker uses an aggregation interval of 1 minute
// so that's the smallest interval we have to check for not to
// exceed search.max_buckets.
const ONE_MINUTE_MS = 60000;
const ONE_HOUR_MS = 3600000;
const HOUR_MULTIPLIER = 250;
// only run the tests over the last 250 hours of data at max
const HOUR_MULTIPLIER = Math.min(250, Math.floor((maxBuckets * ONE_MINUTE_MS) / ONE_HOUR_MS));
const timePickerDurationLength = (this.duration.end - this.duration.start);
const multiplierDurationLength = (ONE_HOUR_MS * HOUR_MULTIPLIER);

Expand Down Expand Up @@ -315,35 +321,54 @@ export function estimateBucketSpanFactory(callWithRequest) {
}

return new Promise((resolve, reject) => {
const runEstimator = (splitFieldValues = []) => {
const bucketSpanEstimator = new BucketSpanEstimator(
formConfig,
splitFieldValues
);

bucketSpanEstimator.run()
.then((resp) => {
resolve(resp);
})
.catch((resp) => {
reject(resp);
});
};
// fetch the `search.max_buckets` cluster setting so we're able to
// adjust aggregations to not exceed that limit.
callWithRequest('cluster.getSettings', {
flatSettings: true,
includeDefaults: true,
filterPath: '*.*max_buckets'
})
.then((settings) => {
if (typeof settings !== 'object' || typeof settings.defaults !== 'object') {
reject('Unable to retrieve cluster setting search.max_buckets');
}

// a partition has been selected, so we need to load some field values to use in the
// bucket span tests.
if (formConfig.splitField !== undefined) {
getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query)
.then((splitFieldValues) => {
runEstimator(splitFieldValues);
})
.catch((resp) => {
reject(resp);
});
} else {
// no partition field selected or we're in the single metric config
runEstimator();
}
const maxBuckets = parseInt(settings.defaults['search.max_buckets']);

const runEstimator = (splitFieldValues = []) => {
const bucketSpanEstimator = new BucketSpanEstimator(
formConfig,
splitFieldValues,
maxBuckets
);

bucketSpanEstimator.run()
.then((resp) => {
resolve(resp);
})
.catch((resp) => {
reject(resp);
});
};

// a partition has been selected, so we need to load some field values to use in the
// bucket span tests.
if (formConfig.splitField !== undefined) {
getRandomFieldValues(formConfig.index, formConfig.splitField, formConfig.query)
.then((splitFieldValues) => {
runEstimator(splitFieldValues);
})
.catch((resp) => {
reject(resp);
});
} else {
// no partition field selected or we're in the single metric config
runEstimator();
}
})
.catch((resp) => {
reject(resp);
});

});
};
Expand Down

0 comments on commit 302c3cb

Please sign in to comment.