Skip to content

Commit

Permalink
Add new top values equivalent to show % in rows, % in all flattened v…
Browse files Browse the repository at this point in the history
…alues
  • Loading branch information
qn895 committed Apr 13, 2024
1 parent 65de7f5 commit 593ed2d
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ export interface FieldVisStats {
max?: number;
median?: number;
min?: number;
sampledValues?: Array<{
key: number | string;
doc_count: number;
percent: number;
key_as_string?: string;
}>;
topValues?: Array<{
key: number | string;
doc_count: number;
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/data_visualizer/common/types/field_stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ export interface StringFieldStats {
fieldName: string;
isTopValuesSampled: boolean;
topValues: Bucket[];
sampledValues?: Bucket[];
topValuesSampleSize?: number;
topValuesSamplerShardSize?: number;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ export const KeywordContent: FC<FieldDataRowProps> = ({ config, onAddFilter }) =
barColor="success"
onAddFilter={onAddFilter}
/>
{config.stats?.sampledValues && fieldName !== undefined ? (
<TopValues
stats={stats}
fieldFormat={fieldFormat}
barColor="success"
onAddFilter={onAddFilter}
showSampledValues={true}
/>
) : null}

{EMSSuggestion && stats && <ChoroplethMap stats={stats} suggestion={EMSSuggestion} />}
</ExpandedRowContent>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ interface Props {
barColor?: 'primary' | 'success' | 'danger' | 'subdued' | 'accent';
compressed?: boolean;
onAddFilter?: (field: DataViewField | string, value: string, type: '+' | '-') => void;
showSampledValues?: boolean;
}

function getPercentLabel(percent: number): string {
Expand All @@ -47,49 +48,55 @@ function getPercentLabel(percent: number): string {
}
}

export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed, onAddFilter }) => {
export const TopValues: FC<Props> = ({
stats,
fieldFormat,
barColor,
compressed,
onAddFilter,
/** Top values by default show % of time a value exist in sampled records/rows (i.e. value A exists in 10% of sampled records)
* showSampledValues: true shows % of times a value exist in all arrays of values that have been flattened
* Example for 4 records: ["a", "a", "b"], ["b", "b", "c"], "d", "e"
* "a" exists in 1/4 records (50% - showSampledValues: false),
* "a" exists in 2/8 sampled values (25% - showSampledValues: true).
*/
showSampledValues = false,
}) => {
const {
services: {
data: { fieldFormats },
},
} = useDataVisualizerKibana();

if (stats === undefined || !stats.topValues) return null;
const { topValues: originalTopValues, fieldName, sampleCount, topValuesSampleSize } = stats;
if (originalTopValues?.length === 0) return null;
const totalDocuments = Math.min(sampleCount ?? 0, stats.totalDocuments ?? 0);

const topValues = originalTopValues.map((bucket) => ({
...bucket,
percent:
typeof bucket.percent === 'number'
? bucket.percent
: bucket.doc_count / (topValuesSampleSize ?? totalDocuments),
}));
const { fieldName, sampleCount } = stats;

const topValuesOtherCountPercent =
1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0);
const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0));
const originalTopValues = (showSampledValues ? stats.sampledValues : stats.topValues) ?? [];
if (originalTopValues?.length === 0) return null;
const totalDocuments = showSampledValues
? stats.topValuesSampleSize ?? 0
: Math.min(sampleCount ?? 0, stats.totalDocuments ?? 0);

const getMessage = () => {
if (topValuesSampleSize !== undefined) {
if (showSampledValues && stats.topValuesSampleSize !== undefined) {
return (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleValuesLabel"
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {value} other {values}}."
values={{
sampledDocuments: topValuesSampleSize,
sampledDocuments: stats.topValuesSampleSize,
sampledDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(topValuesSampleSize)}
.convert(stats.topValuesSampleSize)}
</strong>
),
}}
/>
);
}

return totalDocuments > (sampleCount ?? 0) ? (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
Expand Down Expand Up @@ -128,16 +135,33 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
</EuiText>
);

const topValues = originalTopValues.map((bucket) => ({
...bucket,
percent:
typeof bucket.percent === 'number' ? bucket.percent : bucket.doc_count / totalDocuments,
}));

const topValuesOtherCountPercent =
1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0);
const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0));

return (
<ExpandedRowPanel
dataTestSubj={'dataVisualizerFieldDataTopValues'}
className={classNames('dvPanel__wrapper', compressed ? 'dvPanel--compressed' : undefined)}
>
<ExpandedRowFieldHeader>
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValuesLabel"
defaultMessage="Top values"
/>
{showSampledValues ? (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topSampledValuesLabel"
defaultMessage="Top Sampled values"
/>
) : (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValuesLabel"
defaultMessage="Top values"
/>
)}
</ExpandedRowFieldHeader>

<div
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ export const getESQLKeywordFieldStats = async ({
const keywordFields = columns.map((field) => {
const query =
esqlBaseQuery +
`| STATS ${getSafeESQLName(`${field.name}_terms`)} = count(${getSafeESQLName(
`| STATS ${getSafeESQLName(`${field.name}_in_records`)} = count(MV_MIN(${getSafeESQLName(
field.name
)}) BY ${getSafeESQLName(field.name)}
| SORT ${getSafeESQLName(`${field.name}_terms`)} DESC
)})), ${getSafeESQLName(`${field.name}_in_values`)} = count(${getSafeESQLName(field.name)})
BY ${getSafeESQLName(field.name)}
| SORT ${getSafeESQLName(`${field.name}_in_records`)} DESC
| LIMIT 10`;

return {
field,
request: {
Expand All @@ -64,21 +64,29 @@ export const getESQLKeywordFieldStats = async ({
if (!resp) return;

if (isFulfilled(resp)) {
const results = resp.value?.rawResponse?.values as Array<[BucketCount, BucketTerm]>;
const results = resp.value?.rawResponse?.values as Array<
[BucketCount, BucketCount, BucketTerm]
>;

if (results) {
const topValuesSampleSize = results.reduce((acc, row) => {
return row[0] + acc;
return row[1] + acc;
}, 0);

const sampledValues = results.map((row) => ({
key: row[2],
doc_count: row[1],
}));

const terms = results.map((row) => ({
key: row[1],
key: row[2],
doc_count: row[0],
}));

return {
fieldName: field.name,
topValues: terms,
sampledValues,
isTopValuesSampled: true,
topValuesSampleSize,
} as StringFieldStats;
Expand Down

0 comments on commit 593ed2d

Please sign in to comment.