Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Data Frame Analytics: Fix race condition and support for feature influence legacy format. #81123

Merged
merged 7 commits into from
Oct 21, 2020
4 changes: 3 additions & 1 deletion x-pack/plugins/ml/common/types/data_frame_analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ export type DataFrameAnalyticsId = string;
export interface OutlierAnalysis {
[key: string]: {};

outlier_detection: {};
outlier_detection: {
compute_feature_influence?: boolean;
};
}

interface Regression {
Expand Down
3 changes: 3 additions & 0 deletions x-pack/plugins/ml/common/util/analytics_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ import {
import { ANALYSIS_CONFIG_TYPE } from '../../common/constants/data_frame_analytics';

export const isOutlierAnalysis = (arg: any): arg is OutlierAnalysis => {
if (typeof arg !== 'object' || arg === null) return false;
const keys = Object.keys(arg);
return keys.length === 1 && keys[0] === ANALYSIS_CONFIG_TYPE.OUTLIER_DETECTION;
};

export const isRegressionAnalysis = (arg: any): arg is RegressionAnalysis => {
if (typeof arg !== 'object' || arg === null) return false;
const keys = Object.keys(arg);
return keys.length === 1 && keys[0] === ANALYSIS_CONFIG_TYPE.REGRESSION;
};

export const isClassificationAnalysis = (arg: any): arg is ClassificationAnalysis => {
if (typeof arg !== 'object' || arg === null) return false;
const keys = Object.keys(arg);
return keys.length === 1 && keys[0] === ANALYSIS_CONFIG_TYPE.CLASSIFICATION;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import {

import {
FEATURE_IMPORTANCE,
FEATURE_INFLUENCE,
OUTLIER_SCORE,
TOP_CLASSES,
} from '../../data_frame_analytics/common/constants';
Expand Down Expand Up @@ -112,10 +111,7 @@ export const getDataGridSchemasFromFieldTypes = (fieldTypes: FieldTypes, results
schema = NON_AGGREGATABLE;
}

if (
field === `${resultsField}.${OUTLIER_SCORE}` ||
field.includes(`${resultsField}.${FEATURE_INFLUENCE}`)
) {
if (field === `${resultsField}.${OUTLIER_SCORE}`) {
schema = 'numeric';
}

Expand Down Expand Up @@ -203,11 +199,6 @@ export const useRenderCellValue = (
}

function getCellValue(cId: string) {
if (cId.includes(`.${FEATURE_INFLUENCE}.`) && resultsField !== undefined) {
const results = getNestedProperty(tableItems[adjustedRowIndex], resultsField, null);
return results[cId.replace(`${resultsField}.`, '')];
}

if (tableItems.hasOwnProperty(adjustedRowIndex)) {
const item = tableItems[adjustedRowIndex];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import {
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '../../../../../../../src/plugins/data/public';
import { newJobCapsService } from '../../services/new_job_capabilities_service';

import { FEATURE_IMPORTANCE, FEATURE_INFLUENCE, OUTLIER_SCORE, TOP_CLASSES } from './constants';
import { FEATURE_IMPORTANCE, OUTLIER_SCORE, TOP_CLASSES } from './constants';
import { DataFrameAnalyticsConfig } from '../../../../common/types/data_frame_analytics';

export type EsId = string;
Expand Down Expand Up @@ -179,7 +179,6 @@ export const getDefaultFieldsFromJobCaps = (
const resultsField = jobConfig.dest.results_field;

const featureImportanceFields = [];
const featureInfluenceFields = [];
const topClassesFields = [];
const allFields: any = [];
let type: ES_FIELD_TYPES | undefined;
Expand All @@ -193,16 +192,6 @@ export const getDefaultFieldsFromJobCaps = (
name: `${resultsField}.${OUTLIER_SCORE}`,
type: KBN_FIELD_TYPES.NUMBER,
});

featureInfluenceFields.push(
...fields
.filter((d) => !jobConfig.analyzed_fields.excludes.includes(d.id))
.map((d) => ({
id: `${resultsField}.${FEATURE_INFLUENCE}.${d.id}`,
name: `${resultsField}.${FEATURE_INFLUENCE}.${d.name}`,
type: KBN_FIELD_TYPES.NUMBER,
}))
);
}
}

Expand Down Expand Up @@ -247,12 +236,7 @@ export const getDefaultFieldsFromJobCaps = (
}
}

allFields.push(
...fields,
...featureImportanceFields,
...featureInfluenceFields,
...topClassesFields
);
allFields.push(...fields, ...featureImportanceFields, ...topClassesFields);
allFields.sort(({ name: a }: { name: string }, { name: b }: { name: string }) =>
sortExplorationResultsFields(a, b, jobConfig)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ import { DataFrameAnalyticsConfig } from '../../../../common/types/data_frame_an
export const getIndexData = async (
jobConfig: DataFrameAnalyticsConfig | undefined,
dataGrid: UseDataGridReturnType,
searchQuery: SavedSearchQuery
searchQuery: SavedSearchQuery,
options: { didCancel: boolean }
) => {
if (jobConfig !== undefined) {
const {
Expand Down Expand Up @@ -52,22 +53,19 @@ export const getIndexData = async (
index: jobConfig.dest.index,
body: {
fields: ['*'],
_source: jobConfig.dest.results_field,
_source: [],
query: searchQuery,
from: pageIndex * pageSize,
size: pageSize,
...(Object.keys(sort).length > 0 ? { sort } : {}),
},
});

setRowCount(resp.hits.total.value);
const docs = resp.hits.hits.map((d) => ({
...getProcessedFields(d.fields),
[jobConfig.dest.results_field]: d._source[jobConfig.dest.results_field],
}));

setTableItems(docs);
setStatus(INDEX_STATUS.LOADED);
if (!options.didCancel) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For regression and classification jobs, the results grid is failing to display for me:

image

setRowCount(resp.hits.total.value);
setTableItems(resp.hits.hits.map((d) => getProcessedFields(d.fields)));
setStatus(INDEX_STATUS.LOADED);
}
} catch (e) {
setErrorMessage(extractErrorMessage(e));
setStatus(INDEX_STATUS.ERROR);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,15 @@ export const useExplorationResults = (
dataGrid.resetPagination();
}, [JSON.stringify(searchQuery)]);

// The pattern using `didCancel` allows us to abort out of date remote request.
// We wrap `didCancel` in a object so we can mutate the value as it's being
// passed on to `getIndexData`.
useEffect(() => {
getIndexData(jobConfig, dataGrid, searchQuery);
const options = { didCancel: false };
getIndexData(jobConfig, dataGrid, searchQuery, options);
return () => {
options.didCancel = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we create a custom hook, let's call it useSafeEffect for instance, that wraps the callback with didCancel check under the hood?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep that's something we could do in a follow up for 7.11 - for this 7.10 fix I wanted to touch as little code as possible that's why I went for duplicate code and no new utils. i'd also like to refactor getIndexData a bit so we don't have to use the mutable object but just a plain variable.

};
// custom comparison
}, [jobConfig && jobConfig.id, dataGrid.pagination, searchQuery, dataGrid.sortingColumns]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@ export const getFeatureCount = (resultsField: string, tableItems: DataGridItem[]

const fullItem = tableItems[0];

if (
fullItem[resultsField] !== undefined &&
Array.isArray(fullItem[resultsField][FEATURE_INFLUENCE])
) {
return fullItem[resultsField][FEATURE_INFLUENCE].length;
if (Array.isArray(fullItem[`${resultsField}.${FEATURE_INFLUENCE}.feature_name`])) {
return fullItem[`${resultsField}.${FEATURE_INFLUENCE}.feature_name`].length;
}

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

import React, { useState, FC } from 'react';

import { EuiSpacer, EuiText } from '@elastic/eui';
import { EuiCallOut, EuiSpacer, EuiText } from '@elastic/eui';

import { i18n } from '@kbn/i18n';

import {
useColorRange,
Expand All @@ -15,7 +17,8 @@ import {
} from '../../../../../components/color_range_legend';
import { SavedSearchQuery } from '../../../../../contexts/ml';

import { defaultSearchQuery, useResultsViewConfig } from '../../../../common';
import { defaultSearchQuery, isOutlierAnalysis, useResultsViewConfig } from '../../../../common';
import { FEATURE_INFLUENCE } from '../../../../common/constants';

import { ExpandableSectionAnalytics, ExpandableSectionResults } from '../expandable_section';
import { ExplorationQueryBar } from '../exploration_query_bar';
Expand All @@ -36,11 +39,26 @@ export const OutlierExploration: FC<ExplorationProps> = React.memo(({ jobId }) =

const { columnsWithCharts, tableItems } = outlierData;

const colorRange = useColorRange(
COLOR_RANGE.BLUE,
COLOR_RANGE_SCALE.INFLUENCER,
jobConfig !== undefined ? getFeatureCount(jobConfig.dest.results_field, tableItems) : 1
);
const featureCount = getFeatureCount(jobConfig?.dest?.results_field || '', tableItems);
const colorRange = useColorRange(COLOR_RANGE.BLUE, COLOR_RANGE_SCALE.INFLUENCER, featureCount);

// Show the color range only if feature influence is enabled and there's more than 0 features.
const showColorRange =
featureCount > 0 &&
isOutlierAnalysis(jobConfig?.analysis) &&
jobConfig?.analysis.outlier_detection.compute_feature_influence === true;

const resultsField = jobConfig?.dest.results_field ?? '';

// Identify if the results index has a legacy feature influence format.
// If feature influence was enabled for the legacy job we'll show a callout
// with some additional information for a workaround.
const showLegacyFeatureInfluenceFormatCallout =
isOutlierAnalysis(jobConfig?.analysis) &&
jobConfig?.analysis.outlier_detection.compute_feature_influence === true &&
columnsWithCharts.findIndex(
(d) => d.id === `${resultsField}.${FEATURE_INFLUENCE}.feature_name`
) === -1;

return (
<>
Expand All @@ -58,8 +76,26 @@ export const OutlierExploration: FC<ExplorationProps> = React.memo(({ jobId }) =
</>
)}
{typeof jobConfig?.id === 'string' && <ExpandableSectionAnalytics jobId={jobConfig?.id} />}
{showLegacyFeatureInfluenceFormatCallout && (
<>
<EuiCallOut
size="s"
title={i18n.translate(
'xpack.ml.dataframe.analytics.outlierExploration.legacyFeatureInfluenceFormatCalloutTitle',
{
defaultMessage:
'Color coded table cells based on feature influence are not available because the results index uses an unsupported legacy format. Please clone and rerun the job.',
}
)}
iconType="pin"
/>
<EuiSpacer size="m" />
</>
)}
<ExpandableSectionResults
colorRange={colorRange}
colorRange={
showColorRange && !showLegacyFeatureInfluenceFormatCallout ? colorRange : undefined
}
indexData={outlierData}
indexPattern={indexPattern}
jobConfig={jobConfig}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ import {

import { getFeatureCount, getOutlierScoreFieldName } from './common';

interface FeatureInfluence {
feature_name: string;
influence: number;
}

export const useOutlierData = (
indexPattern: IndexPattern | undefined,
jobConfig: DataFrameAnalyticsConfig | undefined,
Expand Down Expand Up @@ -88,8 +83,15 @@ export const useOutlierData = (
}
}, [jobConfig && jobConfig.id]);

// The pattern using `didCancel` allows us to abort out of date remote request.
// We wrap `didCancel` in a object so we can mutate the value as it's being
// passed on to `getIndexData`.
useEffect(() => {
getIndexData(jobConfig, dataGrid, searchQuery);
const options = { didCancel: false };
getIndexData(jobConfig, dataGrid, searchQuery, options);
return () => {
options.didCancel = true;
};
// custom comparison
}, [jobConfig && jobConfig.id, dataGrid.pagination, searchQuery, dataGrid.sortingColumns]);

Expand Down Expand Up @@ -151,19 +153,17 @@ export const useOutlierData = (
const split = columnId.split('.');
let backgroundColor;

const featureNames = fullItem[`${resultsField}.${FEATURE_INFLUENCE}.feature_name`];

// column with feature values get color coded by its corresponding influencer value
if (
fullItem[resultsField] !== undefined &&
fullItem[resultsField][FEATURE_INFLUENCE] !== undefined &&
fullItem[resultsField][FEATURE_INFLUENCE].find(
(d: FeatureInfluence) => d.feature_name === columnId
) !== undefined
) {
backgroundColor = colorRange(
fullItem[resultsField][FEATURE_INFLUENCE].find(
(d: FeatureInfluence) => d.feature_name === columnId
).influence
);
if (Array.isArray(featureNames)) {
const featureIndex = featureNames.indexOf(columnId);

if (featureIndex > -1) {
backgroundColor = colorRange(
fullItem[`${resultsField}.${FEATURE_INFLUENCE}.influence`][featureIndex]
);
}
}

// column with influencer values get color coded by its own value
Expand Down