diff --git a/x-pack/plugins/data_visualizer/common/types/field_request_config.ts b/x-pack/plugins/data_visualizer/common/types/field_request_config.ts index 84a24c8bcce39..43ba81eccd784 100644 --- a/x-pack/plugins/data_visualizer/common/types/field_request_config.ts +++ b/x-pack/plugins/data_visualizer/common/types/field_request_config.ts @@ -66,6 +66,12 @@ export interface FieldVisStats { max?: number; median?: number; min?: number; + sampledValues?: Array<{ + key: number | string; + doc_count: number; + percent: number; + key_as_string?: string; + }>; topValues?: Array<{ key: number | string; doc_count: number; diff --git a/x-pack/plugins/data_visualizer/common/types/field_stats.ts b/x-pack/plugins/data_visualizer/common/types/field_stats.ts index 15141888e8e32..61babb72dae70 100644 --- a/x-pack/plugins/data_visualizer/common/types/field_stats.ts +++ b/x-pack/plugins/data_visualizer/common/types/field_stats.ts @@ -93,6 +93,7 @@ export interface StringFieldStats { fieldName: string; isTopValuesSampled: boolean; topValues: Bucket[]; + sampledValues?: Bucket[]; topValuesSampleSize?: number; topValuesSamplerShardSize?: number; } diff --git a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx index 0aee7715838ff..5a7b99befff64 100644 --- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx +++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/keyword_content.tsx @@ -55,6 +55,16 @@ export const KeywordContent: FC = ({ config, onAddFilter }) = barColor="success" onAddFilter={onAddFilter} /> + {config.stats?.sampledValues && fieldName !== undefined ? ( + + ) : null} + {EMSSuggestion && stats && } ); diff --git a/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx b/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx index 32ad9a2f2e295..fa1e1fbddd654 100644 --- a/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx +++ b/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx @@ -37,6 +37,7 @@ interface Props { barColor?: 'primary' | 'success' | 'danger' | 'subdued' | 'accent'; compressed?: boolean; onAddFilter?: (field: DataViewField | string, value: string, type: '+' | '-') => void; + showSampledValues?: boolean; } function getPercentLabel(percent: number): string { @@ -47,7 +48,20 @@ function getPercentLabel(percent: number): string { } } -export const TopValues: FC = ({ stats, fieldFormat, barColor, compressed, onAddFilter }) => { +export const TopValues: FC = ({ + stats, + fieldFormat, + barColor, + compressed, + onAddFilter, + /** Top values by default show % of time a value exist in sampled records/rows (i.e. value A exists in 10% of sampled records) + * showSampledValues: true shows % of times a value exist in all arrays of values that have been flattened + * Example for 4 records: ["a", "a", "b"], ["b", "b", "c"], "d", "e" + * "a" exists in 1/4 records (50% - showSampledValues: false), + * "a" exists in 2/8 sampled values (25% - showSampledValues: true). + */ + showSampledValues = false, +}) => { const { services: { data: { fieldFormats }, @@ -55,41 +69,34 @@ export const TopValues: FC = ({ stats, fieldFormat, barColor, compressed, } = useDataVisualizerKibana(); if (stats === undefined || !stats.topValues) return null; - const { topValues: originalTopValues, fieldName, sampleCount, topValuesSampleSize } = stats; - if (originalTopValues?.length === 0) return null; - const totalDocuments = Math.min(sampleCount ?? 0, stats.totalDocuments ?? 0); - - const topValues = originalTopValues.map((bucket) => ({ - ...bucket, - percent: - typeof bucket.percent === 'number' - ? bucket.percent - : bucket.doc_count / (topValuesSampleSize ?? totalDocuments), - })); + const { fieldName, sampleCount } = stats; - const topValuesOtherCountPercent = - 1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0); - const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0)); + const originalTopValues = (showSampledValues ? stats.sampledValues : stats.topValues) ?? []; + if (originalTopValues?.length === 0) return null; + const totalDocuments = showSampledValues + ? stats.topValuesSampleSize ?? 0 + : Math.min(sampleCount ?? 0, stats.totalDocuments ?? 0); const getMessage = () => { - if (topValuesSampleSize !== undefined) { + if (showSampledValues && stats.topValuesSampleSize !== undefined) { return ( {fieldFormats .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER]) - .convert(topValuesSampleSize)} + .convert(stats.topValuesSampleSize)} ), }} /> ); } + return totalDocuments > (sampleCount ?? 0) ? ( = ({ stats, fieldFormat, barColor, compressed, ); + const topValues = originalTopValues.map((bucket) => ({ + ...bucket, + percent: + typeof bucket.percent === 'number' ? bucket.percent : bucket.doc_count / totalDocuments, + })); + + const topValuesOtherCountPercent = + 1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0); + const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0)); + return ( - + {showSampledValues ? ( + + ) : ( + + )}
{ const query = esqlBaseQuery + - `| STATS ${getSafeESQLName(`${field.name}_terms`)} = count(${getSafeESQLName( + `| STATS ${getSafeESQLName(`${field.name}_in_records`)} = count(MV_MIN(${getSafeESQLName( field.name - )}) BY ${getSafeESQLName(field.name)} - | SORT ${getSafeESQLName(`${field.name}_terms`)} DESC + )})), ${getSafeESQLName(`${field.name}_in_values`)} = count(${getSafeESQLName(field.name)}) + BY ${getSafeESQLName(field.name)} + | SORT ${getSafeESQLName(`${field.name}_in_records`)} DESC | LIMIT 10`; - return { field, request: { @@ -64,21 +64,29 @@ export const getESQLKeywordFieldStats = async ({ if (!resp) return; if (isFulfilled(resp)) { - const results = resp.value?.rawResponse?.values as Array<[BucketCount, BucketTerm]>; + const results = resp.value?.rawResponse?.values as Array< + [BucketCount, BucketCount, BucketTerm] + >; if (results) { const topValuesSampleSize = results.reduce((acc, row) => { - return row[0] + acc; + return row[1] + acc; }, 0); + const sampledValues = results.map((row) => ({ + key: row[2], + doc_count: row[1], + })); + const terms = results.map((row) => ({ - key: row[1], + key: row[2], doc_count: row[0], })); return { fieldName: field.name, topValues: terms, + sampledValues, isTopValuesSampled: true, topValuesSampleSize, } as StringFieldStats;