-
Notifications
You must be signed in to change notification settings - Fork 8.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] Adds sampled % of documents & cardinality for text fields for Data visualizer/Field stats & fix missing bucket in doc count chart #172378
Changes from 9 commits
f4ce944
ed88d3b
b767c85
bbed315
40cfce9
00f1eab
12bc9f7
1940769
adb6ae0
e6facd0
fbe174b
6e88086
075d5a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,21 +5,67 @@ | |||||
* 2.0. | ||||||
*/ | ||||||
|
||||||
import { EuiIcon, EuiText } from '@elastic/eui'; | ||||||
import { EuiIcon, EuiText, EuiToolTip } from '@elastic/eui'; | ||||||
|
||||||
import React from 'react'; | ||||||
import { FormattedMessage } from '@kbn/i18n-react'; | ||||||
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types'; | ||||||
import { SUPPORTED_FIELD_TYPES } from '../../../../../../../common/constants'; | ||||||
import { useDataVisualizerKibana } from '../../../../../kibana_context'; | ||||||
import { FieldDataRowProps } from '../../types'; | ||||||
|
||||||
interface Props { | ||||||
cardinality?: number; | ||||||
interface Props extends FieldDataRowProps { | ||||||
showIcon?: boolean; | ||||||
} | ||||||
|
||||||
export const DistinctValues = ({ cardinality, showIcon }: Props) => { | ||||||
if (cardinality === undefined) return null; | ||||||
export const DistinctValues = ({ showIcon, config }: Props) => { | ||||||
const { stats, type } = config; | ||||||
const { | ||||||
services: { | ||||||
data: { fieldFormats }, | ||||||
}, | ||||||
} = useDataVisualizerKibana(); | ||||||
|
||||||
const cardinality = config?.stats?.cardinality; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here 6e88086 |
||||||
|
||||||
if (cardinality === undefined || stats === undefined) return null; | ||||||
|
||||||
const { sampleCount } = stats; | ||||||
|
||||||
const tooltipContent = | ||||||
type === SUPPORTED_FIELD_TYPES.TEXT ? ( | ||||||
<FormattedMessage | ||||||
id="xpack.dataVisualizer.sampledCardinalityForTextFieldsMsg" | ||||||
defaultMessage="The cardinality for text fields is sampled and calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}." | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As above.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here e6facd0 |
||||||
values={{ | ||||||
sampledDocuments: sampleCount, | ||||||
sampledDocumentsFormatted: ( | ||||||
<strong> | ||||||
{fieldFormats | ||||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER]) | ||||||
.convert(sampleCount)} | ||||||
</strong> | ||||||
), | ||||||
}} | ||||||
/> | ||||||
) : null; | ||||||
|
||||||
const icon = showIcon ? ( | ||||||
type === SUPPORTED_FIELD_TYPES.TEXT ? ( | ||||||
<EuiToolTip content={tooltipContent}> | ||||||
<EuiIcon type="partial" size={'m'} className={'columnHeader__icon'} /> | ||||||
</EuiToolTip> | ||||||
) : ( | ||||||
<EuiIcon type="database" size={'m'} className={'columnHeader__icon'} /> | ||||||
) | ||||||
) : null; | ||||||
|
||||||
const content = <EuiText size={'xs'}>{cardinality}</EuiText>; | ||||||
|
||||||
return ( | ||||||
<> | ||||||
{showIcon ? <EuiIcon type="database" size={'m'} className={'columnHeader__icon'} /> : null} | ||||||
<EuiText size={'xs'}>{cardinality}</EuiText> | ||||||
{icon} | ||||||
<EuiToolTip content={tooltipContent}>{content}</EuiToolTip> | ||||||
</> | ||||||
); | ||||||
}; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,11 +5,13 @@ | |||||
* 2.0. | ||||||
*/ | ||||||
|
||||||
import { EuiIcon, EuiText } from '@elastic/eui'; | ||||||
import { EuiIcon, EuiText, EuiToolTip } from '@elastic/eui'; | ||||||
|
||||||
import React from 'react'; | ||||||
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types'; | ||||||
import { roundToDecimalPlace } from '@kbn/ml-number-utils'; | ||||||
import { FormattedMessage } from '@kbn/i18n-react'; | ||||||
import { SUPPORTED_FIELD_TYPES } from '../../../../../../../common/constants'; | ||||||
import { useDataVisualizerKibana } from '../../../../../kibana_context'; | ||||||
import { isIndexBasedFieldVisConfig } from '../../../../../../../common/types/field_vis_config'; | ||||||
import type { FieldDataRowProps } from '../../types/field_data_row'; | ||||||
|
@@ -19,7 +21,7 @@ interface Props extends FieldDataRowProps { | |||||
totalCount?: number; | ||||||
} | ||||||
export const DocumentStat = ({ config, showIcon, totalCount }: Props) => { | ||||||
const { stats } = config; | ||||||
const { stats, type } = config; | ||||||
const { | ||||||
services: { | ||||||
data: { fieldFormats }, | ||||||
|
@@ -40,15 +42,47 @@ export const DocumentStat = ({ config, showIcon, totalCount }: Props) => { | |||||
? `(${roundToDecimalPlace((valueCount / total) * 100)}%)` | ||||||
: null; | ||||||
|
||||||
const content = ( | ||||||
<EuiText size={'xs'}> | ||||||
{fieldFormats | ||||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER]) | ||||||
.convert(valueCount)}{' '} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the space char There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this one, yes, since it's separating two different values |
||||||
{docsPercent} | ||||||
</EuiText> | ||||||
); | ||||||
|
||||||
const tooltipContent = | ||||||
type === SUPPORTED_FIELD_TYPES.TEXT ? ( | ||||||
<FormattedMessage | ||||||
id="xpack.dataVisualizer.sampledPercentageForTextFieldsMsg" | ||||||
defaultMessage="The % of documents for text fields is sampled and calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}." | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it could be a bit simpler.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here e6facd0 |
||||||
values={{ | ||||||
sampledDocuments: sampleCount, | ||||||
sampledDocumentsFormatted: ( | ||||||
<strong> | ||||||
{fieldFormats | ||||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER]) | ||||||
.convert(sampleCount)} | ||||||
</strong> | ||||||
), | ||||||
}} | ||||||
/> | ||||||
) : null; | ||||||
|
||||||
const icon = showIcon ? ( | ||||||
type === SUPPORTED_FIELD_TYPES.TEXT ? ( | ||||||
<EuiToolTip content={tooltipContent}> | ||||||
<EuiIcon type="partial" size={'m'} className={'columnHeader__icon'} /> | ||||||
</EuiToolTip> | ||||||
) : ( | ||||||
<EuiIcon type="document" size={'m'} className={'columnHeader__icon'} /> | ||||||
) | ||||||
) : null; | ||||||
|
||||||
return valueCount !== undefined ? ( | ||||||
<> | ||||||
{showIcon ? <EuiIcon type="document" size={'m'} className={'columnHeader__icon'} /> : null} | ||||||
<EuiText size={'xs'}> | ||||||
{fieldFormats | ||||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER]) | ||||||
.convert(valueCount)}{' '} | ||||||
{docsPercent} | ||||||
</EuiText> | ||||||
{icon} | ||||||
<EuiToolTip content={tooltipContent}>{content}</EuiToolTip> | ||||||
</> | ||||||
) : null; | ||||||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,13 +15,16 @@ import type { | |
ISearchOptions, | ||
} from '@kbn/data-plugin/common'; | ||
import { extractErrorProperties } from '@kbn/ml-error-utils'; | ||
import { getProcessedFields } from '@kbn/ml-data-grid'; | ||
import { useDataVisualizerKibana } from '../../kibana_context'; | ||
import { | ||
AggregatableFieldOverallStats, | ||
checkAggregatableFieldsExistRequest, | ||
checkNonAggregatableFieldExistsRequest, | ||
getSampleOfDocumentsForNonAggregatableFields, | ||
isAggregatableFieldOverallStats, | ||
isNonAggregatableFieldOverallStats, | ||
isNonAggregatableSampledDocs, | ||
NonAggregatableFieldOverallStats, | ||
processAggregatableFieldsExistResponse, | ||
processNonAggregatableFieldsExistResponse, | ||
|
@@ -128,6 +131,26 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams | |
probability | ||
); | ||
|
||
const nonAggregatableFieldsExamplesObs = data.search | ||
.search<IKibanaSearchRequest, IKibanaSearchResponse>( | ||
{ | ||
params: getSampleOfDocumentsForNonAggregatableFields( | ||
nonAggregatableFields, | ||
index, | ||
searchQuery, | ||
timeFieldName, | ||
earliest, | ||
latest, | ||
runtimeFieldMap | ||
), | ||
}, | ||
searchOptions | ||
) | ||
.pipe( | ||
map((resp) => { | ||
return resp as IKibanaSearchResponse; | ||
}) | ||
); | ||
const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) => | ||
data.search | ||
.search<IKibanaSearchRequest, IKibanaSearchResponse>( | ||
|
@@ -190,14 +213,29 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams | |
|
||
const sub = rateLimitingForkJoin< | ||
AggregatableFieldOverallStats | NonAggregatableFieldOverallStats | undefined | ||
>([...aggregatableOverallStatsObs, ...nonAggregatableFieldsObs], MAX_CONCURRENT_REQUESTS); | ||
>( | ||
[ | ||
nonAggregatableFieldsExamplesObs, | ||
...aggregatableOverallStatsObs, | ||
...nonAggregatableFieldsObs, | ||
], | ||
MAX_CONCURRENT_REQUESTS | ||
); | ||
|
||
searchSubscription$.current = sub.subscribe({ | ||
next: (value) => { | ||
const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = []; | ||
const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = []; | ||
|
||
let sampledNonAggregatableFieldsExamples: Array<{ [key: string]: string }> | undefined; | ||
value.forEach((resp, idx) => { | ||
if (idx === 0 && isNonAggregatableSampledDocs(resp)) { | ||
const docs = resp.rawResponse.hits.hits.map((d) => | ||
getProcessedFields(d.fields ?? {}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rather than calling d.fields ? getProcessedFields(d.fields) : {} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here 6e88086 |
||
); | ||
|
||
sampledNonAggregatableFieldsExamples = docs; | ||
} | ||
if (isAggregatableFieldOverallStats(resp)) { | ||
aggregatableOverallStatsResp.push(resp); | ||
} | ||
|
@@ -214,9 +252,27 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams | |
aggregatableFields | ||
); | ||
|
||
const nonAggregatableFieldsCount: number[] = new Array(nonAggregatableFields.length).fill( | ||
0 | ||
); | ||
const nonAggregatableFieldsUniqueCount = nonAggregatableFields.map( | ||
() => new Set<string>() | ||
); | ||
if (sampledNonAggregatableFieldsExamples) { | ||
sampledNonAggregatableFieldsExamples.forEach((doc) => { | ||
nonAggregatableFields.forEach((field, fieldIdx) => { | ||
if (doc.hasOwnProperty(field)) { | ||
nonAggregatableFieldsCount[fieldIdx] += 1; | ||
nonAggregatableFieldsUniqueCount[fieldIdx].add(doc[field]!); | ||
} | ||
}); | ||
}); | ||
} | ||
const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse( | ||
nonAggregatableOverallStatsResp, | ||
nonAggregatableFields | ||
nonAggregatableFields, | ||
nonAggregatableFieldsCount, | ||
nonAggregatableFieldsUniqueCount | ||
); | ||
|
||
setOverallStats({ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated here 6e88086