Skip to content

Commit

Permalink
[ML] Add option for per-partition categorization to categorization jo…
Browse files Browse the repository at this point in the history
…b wizard (elastic#75061)
  • Loading branch information
qn895 committed Aug 19, 2020
1 parent cf94a56 commit 993d485
Show file tree
Hide file tree
Showing 33 changed files with 1,230 additions and 34 deletions.
2 changes: 2 additions & 0 deletions x-pack/plugins/ml/common/constants/anomalies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ export enum ANOMALY_THRESHOLD {
}

export const PARTITION_FIELDS = ['partition_field', 'over_field', 'by_field'] as const;
export const JOB_ID = 'job_id';
export const PARTITION_FIELD_VALUE = 'partition_field_value';
28 changes: 28 additions & 0 deletions x-pack/plugins/ml/common/constants/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,34 @@ export const getMessages = once(() => {
const createJobsDocsUrl = `https://www.elastic.co/guide/en/machine-learning/{{version}}/create-jobs.html`;

return {
categorizer_detector_missing_per_partition_field: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.categorizerMissingPerPartitionFieldMessage',
{
defaultMessage:
'Partition field must be set for detectors that reference "mlcategory" when per-partition categorization is enabled.',
}
),
url:
'https://www.elastic.co/guide/en/machine-learning/{{version}}/ml-configuring-categories.html',
},
categorizer_varying_per_partition_fields: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.categorizerVaryingPerPartitionFieldNamesMessage',
{
defaultMessage:
'Detectors with keyword "mlcategory" cannot have different partition_field_name when per-partition categorization is enabled. Found [{fields}].',

values: {
fields: '"{{fields}}"',
},
}
),
url:
'https://www.elastic.co/guide/en/machine-learning/{{version}}/ml-configuring-categories.html',
},
field_not_aggregatable: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate('xpack.ml.models.jobValidation.messages.fieldNotAggregatableMessage', {
Expand Down
17 changes: 17 additions & 0 deletions x-pack/plugins/ml/common/types/anomalies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,20 @@ export interface AnomaliesTableRecord {
}

export type PartitionFieldsType = typeof PARTITION_FIELDS[number];

export interface AnomalyCategorizerStatsDoc {
[key: string]: any;
job_id: string;
result_type: 'categorizer_stats';
partition_field_name?: string;
partition_field_value?: string;
categorized_doc_count: number;
total_category_count: number;
frequent_category_count: number;
rare_category_count: number;
dead_category_count: number;
failed_category_count: number;
categorization_status: 'ok' | 'warn';
log_time: number;
timestamp: number;
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,6 @@ export interface CustomRule {
}

export interface PerPartitionCategorization {
enabled: boolean;
enabled?: boolean;
stop_on_warn?: boolean;
}
9 changes: 9 additions & 0 deletions x-pack/plugins/ml/common/types/results.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

export interface GetStoppedPartitionResult {
jobs: string[] | Record<string, string[]>;
}
56 changes: 51 additions & 5 deletions x-pack/plugins/ml/common/util/job_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { EntityField } from './anomaly_utils';
import { MlServerLimits } from '../types/ml_server_info';
import { JobValidationMessage, JobValidationMessageId } from '../constants/messages';
import { ES_AGGREGATION, ML_JOB_AGGREGATION } from '../constants/aggregation_types';
import { MLCATEGORY } from '../constants/field_types';

export interface ValidationResults {
valid: boolean;
Expand Down Expand Up @@ -86,9 +87,9 @@ export function isSourceDataChartableForDetector(job: CombinedJob, detectorIndex
// whereas the 'function_description' field holds an ML-built display hint for function e.g. 'count'.
isSourceDataChartable =
mlFunctionToESAggregation(functionName) !== null &&
dtr.by_field_name !== 'mlcategory' &&
dtr.partition_field_name !== 'mlcategory' &&
dtr.over_field_name !== 'mlcategory';
dtr.by_field_name !== MLCATEGORY &&
dtr.partition_field_name !== MLCATEGORY &&
dtr.over_field_name !== MLCATEGORY;

// If the datafeed uses script fields, we can only plot the time series if
// model plot is enabled. Without model plot it will be very difficult or impossible
Expand Down Expand Up @@ -380,27 +381,72 @@ export function basicJobValidation(
valid = false;
}
}

let categorizerDetectorMissingPartitionField = false;
if (job.analysis_config.detectors.length === 0) {
messages.push({ id: 'detectors_empty' });
valid = false;
} else {
let v = true;

each(job.analysis_config.detectors, (d) => {
if (isEmpty(d.function)) {
v = false;
}
// if detector has an ml category, check if the partition_field is missing
const needToHavePartitionFieldName =
job.analysis_config.per_partition_categorization?.enabled === true &&
(d.by_field_name === MLCATEGORY || d.over_field_name === MLCATEGORY);

if (needToHavePartitionFieldName && d.partition_field_name === undefined) {
categorizerDetectorMissingPartitionField = true;
}
});
if (v) {
messages.push({ id: 'detectors_function_not_empty' });
} else {
messages.push({ id: 'detectors_function_empty' });
valid = false;
}
if (categorizerDetectorMissingPartitionField) {
messages.push({ id: 'categorizer_detector_missing_per_partition_field' });
valid = false;
}
}

// check for duplicate detectors
if (job.analysis_config.detectors.length >= 2) {
// check if the detectors with mlcategory might have different per_partition_field values
// if per_partition_categorization is enabled
if (job.analysis_config.per_partition_categorization !== undefined) {
if (
job.analysis_config.per_partition_categorization.enabled ||
(job.analysis_config.per_partition_categorization.stop_on_warn &&
Array.isArray(job.analysis_config.detectors) &&
job.analysis_config.detectors.length >= 2)
) {
const categorizationDetectors = job.analysis_config.detectors.filter(
(d) =>
d.by_field_name === MLCATEGORY ||
d.over_field_name === MLCATEGORY ||
d.partition_field_name === MLCATEGORY
);
const uniqPartitions = [
...new Set(
categorizationDetectors
.map((d) => d.partition_field_name)
.filter((name) => name !== undefined)
),
];
if (uniqPartitions.length > 1) {
valid = false;
messages.push({
id: 'categorizer_varying_per_partition_fields',
fields: uniqPartitions.join(', '),
});
}
}
}

// check for duplicate detectors
// create an array of objects with a subset of the attributes
// where we want to make sure they are not be the same across detectors
const compareSubSet = job.analysis_config.detectors.map((d) =>
Expand Down
19 changes: 18 additions & 1 deletion x-pack/plugins/ml/public/application/explorer/explorer.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ export class Explorer extends React.Component {
updateLanguage = (language) => this.setState({ language });

render() {
const { showCharts, severity } = this.props;
const { showCharts, severity, stoppedPartitions } = this.props;

const {
annotations,
Expand Down Expand Up @@ -298,6 +298,23 @@ export class Explorer extends React.Component {

<div className={mainColumnClasses}>
<EuiSpacer size="m" />

{stoppedPartitions && (
<EuiCallOut
size={'s'}
title={
<FormattedMessage
id="xpack.ml.explorer.stoppedPartitionsExistCallout"
defaultMessage="There may be fewer results than there could have been because stop_on_warn is turned on. Both categorization and subsequent anomaly detection have stopped for some partitions in {jobsWithStoppedPartitions, plural, one {job} other {jobs}} [{stoppedPartitions}] where the categorization status has changed to warn."
values={{
jobsWithStoppedPartitions: stoppedPartitions.length,
stoppedPartitions: stoppedPartitions.join(', '),
}}
/>
}
/>
)}

<AnomalyTimeline
explorerState={this.props.explorerState}
setSelectedCells={this.props.setSelectedCells}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ export class CategorizationJobCreator extends JobCreator {
ML_JOB_AGGREGATION.COUNT;
private _categorizationAnalyzer: CategorizationAnalyzer = {};
private _defaultCategorizationAnalyzer: CategorizationAnalyzer;
private _partitionFieldName: string | null = null;

constructor(
indexPattern: IndexPattern,
Expand Down Expand Up @@ -75,6 +76,11 @@ export class CategorizationJobCreator extends JobCreator {
private _createDetector(agg: Aggregation, field: Field) {
const dtr: Detector = createBasicDetector(agg, field);
dtr.by_field_name = mlCategory.id;

// API requires if per_partition_categorization is enabled, add partition field to the detector
if (this.perPartitionCategorization && this.categorizationPerPartitionField !== null) {
dtr.partition_field_name = this.categorizationPerPartitionField;
}
this._addDetector(dtr, agg, mlCategory);
}

Expand Down Expand Up @@ -173,4 +179,29 @@ export class CategorizationJobCreator extends JobCreator {
this.bucketSpan = bs;
}
}

public get categorizationPerPartitionField() {
return this._partitionFieldName;
}

public set categorizationPerPartitionField(fieldName: string | null) {
if (fieldName === null) {
this._detectors.forEach((detector) => {
delete detector.partition_field_name;
});
if (this._partitionFieldName !== null) this.removeInfluencer(this._partitionFieldName);
this._partitionFieldName = null;
} else {
if (this._partitionFieldName !== fieldName) {
// remove the previous field from list of influencers
// and add the new one
if (this._partitionFieldName !== null) this.removeInfluencer(this._partitionFieldName);
this.addInfluencer(fieldName);
this._partitionFieldName = fieldName;
this._detectors.forEach((detector) => {
detector.partition_field_name = fieldName;
});
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,36 @@ export class JobCreator {
return JSON.stringify(this._datafeed_config, null, 2);
}

private _initPerPartitionCategorization() {
if (this._job_config.analysis_config.per_partition_categorization === undefined) {
this._job_config.analysis_config.per_partition_categorization = {};
}
if (this._job_config.analysis_config.per_partition_categorization?.enabled === undefined) {
this._job_config.analysis_config.per_partition_categorization!.enabled = false;
}
if (this._job_config.analysis_config.per_partition_categorization?.stop_on_warn === undefined) {
this._job_config.analysis_config.per_partition_categorization!.stop_on_warn = false;
}
}

public get perPartitionCategorization() {
return this._job_config.analysis_config.per_partition_categorization?.enabled === true;
}

public set perPartitionCategorization(enabled: boolean) {
this._initPerPartitionCategorization();
this._job_config.analysis_config.per_partition_categorization!.enabled = enabled;
}

public get perPartitionStopOnWarn() {
return this._job_config.analysis_config.per_partition_categorization?.stop_on_warn === true;
}

public set perPartitionStopOnWarn(enabled: boolean) {
this._initPerPartitionCategorization();
this._job_config.analysis_config.per_partition_categorization!.stop_on_warn = enabled;
}

protected _overrideConfigs(job: Job, datafeed: Datafeed) {
this._job_config = job;
this._datafeed_config = datafeed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ export interface BasicValidations {
queryDelay: Validation;
frequency: Validation;
scrollSize: Validation;
categorizerMissingPerPartition: Validation;
categorizerVaryingPerPartitionField: Validation;
}

export interface AdvancedValidations {
Expand All @@ -76,6 +78,8 @@ export class JobValidator {
queryDelay: { valid: true },
frequency: { valid: true },
scrollSize: { valid: true },
categorizerMissingPerPartition: { valid: true },
categorizerVaryingPerPartitionField: { valid: true },
};
private _advancedValidations: AdvancedValidations = {
categorizationFieldValid: { valid: true },
Expand Down Expand Up @@ -273,6 +277,14 @@ export class JobValidator {
this._advancedValidations.categorizationFieldValid.valid = valid;
}

public get categorizerMissingPerPartition() {
return this._basicValidations.categorizerMissingPerPartition;
}

public get categorizerVaryingPerPartitionField() {
return this._basicValidations.categorizerVaryingPerPartitionField;
}

/**
* Indicates if the Pick Fields step has a valid input
*/
Expand All @@ -283,6 +295,8 @@ export class JobValidator {
(this._jobCreator.type === JOB_TYPE.ADVANCED && this.modelMemoryLimit.valid)) &&
this.bucketSpan.valid &&
this.duplicateDetectors.valid &&
this.categorizerMissingPerPartition.valid &&
this.categorizerVaryingPerPartitionField.valid &&
!this.validating &&
(this._jobCreator.type !== JOB_TYPE.CATEGORIZATION ||
(this._jobCreator.type === JOB_TYPE.CATEGORIZATION && this.categorizationField))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,29 @@ export function populateValidationMessages(
basicValidations.duplicateDetectors.message = msg;
}

if (validationResults.contains('categorizer_detector_missing_per_partition_field')) {
basicValidations.categorizerMissingPerPartition.valid = false;
const msg = i18n.translate(
'xpack.ml.newJob.wizard.validateJob.categorizerMissingPerPartitionFieldMessage',
{
defaultMessage:
'Partition field must be set for detectors that reference "mlcategory" when per-partition categorization is enabled.',
}
);
basicValidations.categorizerMissingPerPartition.message = msg;
}
if (validationResults.contains('categorizer_varying_per_partition_fields')) {
basicValidations.categorizerVaryingPerPartitionField.valid = false;
const msg = i18n.translate(
'xpack.ml.newJob.wizard.validateJob.categorizerVaryingPerPartitionFieldNamesMessage',
{
defaultMessage:
'Detectors with keyword "mlcategory" cannot have different partition_field_name when per-partition categorization is enabled.',
}
);
basicValidations.categorizerVaryingPerPartitionField.message = msg;
}

if (validationResults.contains('bucket_span_empty')) {
basicValidations.bucketSpan.valid = false;
const msg = i18n.translate(
Expand Down
Loading

0 comments on commit 993d485

Please sign in to comment.