Skip to content

Commit

Permalink
[ML] Histogram API endpoint for transforms.
Browse files Browse the repository at this point in the history
  • Loading branch information
walterra committed Jul 13, 2020
1 parent 84203df commit a117e43
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 158 deletions.
329 changes: 172 additions & 157 deletions x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { LegacyCallAPIOptions, LegacyAPICaller } from 'kibana/server';
import { LegacyAPICaller } from 'kibana/server';
import _ from 'lodash';
import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/server';
import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types';
Expand Down Expand Up @@ -179,12 +179,176 @@ type BatchStats =
| DocumentCountStats
| FieldExamples;

const getAggIntervals = async (
callAsCurrentUser: LegacyAPICaller,
indexPatternTitle: string,
query: any,
fields: HistogramField[],
samplerShardSize: number
): Promise<NumericColumnStatsMap> => {
const numericColumns = fields.filter((field) => {
return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
});

if (numericColumns.length === 0) {
return {};
}

const minMaxAggs = numericColumns.reduce((aggs, c) => {
const id = stringHash(c.fieldName);
aggs[id] = {
stats: {
field: c.fieldName,
},
};
return aggs;
}, {} as Record<string, object>);

const respStats = await callAsCurrentUser('search', {
index: indexPatternTitle,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
size: 0,
},
});

const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations =
aggsPath.length > 0 ? _.get(respStats.aggregations, aggsPath) : respStats.aggregations;

return Object.keys(aggregations).reduce((p, aggName) => {
const stats = [aggregations[aggName].min, aggregations[aggName].max];
if (!stats.includes(null)) {
const delta = aggregations[aggName].max - aggregations[aggName].min;

let aggInterval = 1;

if (delta > MAX_CHART_COLUMNS || delta <= 1) {
aggInterval = delta / (MAX_CHART_COLUMNS - 1);
}

p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] };
}

return p;
}, {} as NumericColumnStatsMap);
};

// export for re-use by transforms plugin
export const getHistogramsForFields = async (
callAsCurrentUser: LegacyAPICaller,
indexPatternTitle: string,
query: any,
fields: HistogramField[],
samplerShardSize: number
) => {
const aggIntervals = await getAggIntervals(
callAsCurrentUser,
indexPatternTitle,
query,
fields,
samplerShardSize
);

const chartDataAggs = fields.reduce((aggs, field) => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(fieldName);
if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] !== undefined) {
aggs[`${id}_histogram`] = {
histogram: {
field: fieldName,
interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
},
};
}
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
if (fieldType === KBN_FIELD_TYPES.STRING) {
aggs[`${id}_cardinality`] = {
cardinality: {
field: fieldName,
},
};
}
aggs[`${id}_terms`] = {
terms: {
field: fieldName,
size: MAX_CHART_COLUMNS,
},
};
}
return aggs;
}, {} as Record<string, ChartRequestAgg>);

if (Object.keys(chartDataAggs).length === 0) {
return [];
}

const respChartsData = await callAsCurrentUser('search', {
index: indexPatternTitle,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
size: 0,
},
});

const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations =
aggsPath.length > 0
? _.get(respChartsData.aggregations, aggsPath)
: respChartsData.aggregations;

const chartsData: ChartData[] = fields.map(
(field): ChartData => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(field.fieldName);

if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] === undefined) {
return {
type: 'numeric',
data: [],
interval: 0,
stats: [0, 0],
id: fieldName,
};
}

return {
data: aggregations[`${id}_histogram`].buckets,
interval: aggIntervals[id].interval,
stats: [aggIntervals[id].min, aggIntervals[id].max],
type: 'numeric',
id: fieldName,
};
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
return {
type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
cardinality:
fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
data: aggregations[`${id}_terms`].buckets,
id: fieldName,
};
}

return {
type: 'unsupported',
id: fieldName,
};
}
);

return chartsData;
};

export class DataVisualizer {
callAsCurrentUser: (
endpoint: string,
clientParams: Record<string, any>,
options?: LegacyCallAPIOptions
) => Promise<any>;
callAsCurrentUser: LegacyAPICaller;

constructor(callAsCurrentUser: LegacyAPICaller) {
this.callAsCurrentUser = callAsCurrentUser;
Expand Down Expand Up @@ -273,62 +437,6 @@ export class DataVisualizer {
return stats;
}

async getAggIntervals(
indexPatternTitle: string,
query: any,
fields: HistogramField[],
samplerShardSize: number
): Promise<NumericColumnStatsMap> {
const numericColumns = fields.filter((field) => {
return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE;
});

if (numericColumns.length === 0) {
return {};
}

const minMaxAggs = numericColumns.reduce((aggs, c) => {
const id = stringHash(c.fieldName);
aggs[id] = {
stats: {
field: c.fieldName,
},
};
return aggs;
}, {} as Record<string, object>);

const respStats = await this.callAsCurrentUser('search', {
index: indexPatternTitle,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize),
size: 0,
},
});

const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations =
aggsPath.length > 0 ? _.get(respStats.aggregations, aggsPath) : respStats.aggregations;

return Object.keys(aggregations).reduce((p, aggName) => {
const stats = [aggregations[aggName].min, aggregations[aggName].max];
if (!stats.includes(null)) {
const delta = aggregations[aggName].max - aggregations[aggName].min;

let aggInterval = 1;

if (delta > MAX_CHART_COLUMNS || delta <= 1) {
aggInterval = delta / (MAX_CHART_COLUMNS - 1);
}

p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] };
}

return p;
}, {} as NumericColumnStatsMap);
}

// Obtains binned histograms for supplied list of fields. The statistics for each field in the
// returned array depend on the type of the field (keyword, number, date etc).
// Sampling will be used if supplied samplerShardSize > 0.
Expand All @@ -338,106 +446,13 @@ export class DataVisualizer {
fields: HistogramField[],
samplerShardSize: number
): Promise<any> {
const aggIntervals = await this.getAggIntervals(
return await getHistogramsForFields(
this.callAsCurrentUser,
indexPatternTitle,
query,
fields,
samplerShardSize
);

const chartDataAggs = fields.reduce((aggs, field) => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(fieldName);
if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] !== undefined) {
aggs[`${id}_histogram`] = {
histogram: {
field: fieldName,
interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
},
};
}
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
if (fieldType === KBN_FIELD_TYPES.STRING) {
aggs[`${id}_cardinality`] = {
cardinality: {
field: fieldName,
},
};
}
aggs[`${id}_terms`] = {
terms: {
field: fieldName,
size: MAX_CHART_COLUMNS,
},
};
}
return aggs;
}, {} as Record<string, ChartRequestAgg>);

if (Object.keys(chartDataAggs).length === 0) {
return [];
}

const respChartsData = await this.callAsCurrentUser('search', {
index: indexPatternTitle,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
size: 0,
},
});

const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations =
aggsPath.length > 0
? _.get(respChartsData.aggregations, aggsPath)
: respChartsData.aggregations;

const chartsData: ChartData[] = fields.map(
(field): ChartData => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(field.fieldName);

if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] === undefined) {
return {
type: 'numeric',
data: [],
interval: 0,
stats: [0, 0],
id: fieldName,
};
}

return {
data: aggregations[`${id}_histogram`].buckets,
interval: aggIntervals[id].interval,
stats: [aggIntervals[id].min, aggIntervals[id].max],
type: 'numeric',
id: fieldName,
};
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
return {
type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
cardinality:
fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
data: aggregations[`${id}_terms`].buckets,
id: fieldName,
};
}

return {
type: 'unsupported',
id: fieldName,
};
}
);

return chartsData;
}

// Obtains statistics for supplied list of fields. The statistics for each field in the
Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugins/ml/server/models/data_visualizer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
* you may not use this file except in compliance with the Elastic License.
*/

export { DataVisualizer } from './data_visualizer';
export { getHistogramsForFields, DataVisualizer } from './data_visualizer';
1 change: 1 addition & 0 deletions x-pack/plugins/ml/server/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ export * from '../common/types/anomalies';
export * from '../common/types/anomaly_detection_jobs';
export * from './lib/capabilities/errors';
export { ModuleSetupPayload } from './shared_services/providers/modules';
export { getHistogramsForFields } from './models/data_visualizer/';
Loading

0 comments on commit a117e43

Please sign in to comment.