-
Notifications
You must be signed in to change notification settings - Fork 8.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ML] Adds grok highlighting to the file data visualizer (#175913)
Adds grokpattern highlighting to the file data visualizer for semi-structured text files. The first 5 lines of the file are displayed with inline highlighting. Hovering the mouse over displays a tooltip with the field name and type. ![image](https://github.com/elastic/kibana/assets/22172091/7b50aeca-0255-4413-93ef-e44976e798f4) If for whatever reason the highlighting fails, we switch back to the raw text. @szabosteve and @peteharverson I'm not 100% happy with the labels on the tabs, `Highlighted text` and `Raw text`. So suggestions are welcome. Relates to elastic/elasticsearch#104394 --------- Co-authored-by: kibanamachine <[email protected]> Co-authored-by: István Zoltán Szabó <[email protected]>
- Loading branch information
1 parent
8612ddc
commit 0bca3c0
Showing
15 changed files
with
521 additions
and
53 deletions.
There are no files selected for viewing
13 changes: 13 additions & 0 deletions
13
x-pack/plugins/data_visualizer/common/types/test_grok_pattern.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
export interface TestGrokPatternResponse { | ||
matches: Array<{ | ||
matched: boolean; | ||
fields: Record<string, Array<{ match: string; offset: number; length: number }>>; | ||
}>; | ||
} |
62 changes: 62 additions & 0 deletions
62
...sualizer/public/application/file_data_visualizer/components/file_contents/field_badge.tsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import React, { FC } from 'react'; | ||
import { EuiBadge, EuiFlexGroup, EuiFlexItem, EuiToolTip } from '@elastic/eui'; | ||
import { FieldIcon } from '@kbn/react-field'; | ||
import { i18n } from '@kbn/i18n'; | ||
import { getSupportedFieldType } from '../../../common/components/fields_stats_grid/get_field_names'; | ||
import { useCurrentEuiTheme } from '../../../common/hooks/use_current_eui_theme'; | ||
|
||
interface Props { | ||
type: string | undefined; | ||
value: string; | ||
name: string; | ||
} | ||
|
||
export const FieldBadge: FC<Props> = ({ type, value, name }) => { | ||
const { euiColorLightestShade, euiColorLightShade } = useCurrentEuiTheme(); | ||
const supportedType = getSupportedFieldType(type ?? 'unknown'); | ||
const tooltip = type | ||
? i18n.translate('xpack.dataVisualizer.file.fileContents.fieldBadge.tooltip', { | ||
defaultMessage: 'Type: {type}', | ||
values: { type: supportedType }, | ||
}) | ||
: undefined; | ||
return ( | ||
<EuiToolTip title={name} content={tooltip}> | ||
<EuiBadge | ||
data-test-subj="dataVisualizerFieldBadge" | ||
css={{ | ||
// magic numbers to align the badges with the text | ||
// and to align the icon correctly inside the badge. | ||
marginRight: '2px', | ||
marginTop: '-4px', | ||
padding: '0px 4px', | ||
cursor: 'pointer', | ||
pointerEvents: 'none', | ||
border: `1px solid ${euiColorLightShade}`, | ||
backgroundColor: euiColorLightestShade, | ||
}} | ||
> | ||
<EuiFlexGroup gutterSize="none"> | ||
<EuiFlexItem grow={false}> | ||
<FieldIcon | ||
type={supportedType} | ||
css={{ | ||
marginRight: '4px', | ||
marginTop: '1px', | ||
border: `1px solid ${euiColorLightShade}`, | ||
}} | ||
/> | ||
</EuiFlexItem> | ||
<EuiFlexItem>{value}</EuiFlexItem> | ||
</EuiFlexGroup> | ||
</EuiBadge> | ||
</EuiToolTip> | ||
); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
...izer/public/application/file_data_visualizer/components/file_contents/grok_highlighter.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { MessageImporter } from '@kbn/file-upload-plugin/public'; | ||
import type { HttpSetup } from '@kbn/core/public'; | ||
import type { ImportFactoryOptions } from '@kbn/file-upload-plugin/public/importer'; | ||
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common'; | ||
import type { TestGrokPatternResponse } from '../../../../../common/types/test_grok_pattern'; | ||
|
||
export const LINE_LIMIT = 5; | ||
|
||
type HighlightedLine = Array<{ | ||
word: string; | ||
field?: { | ||
type: string; | ||
name: string; | ||
}; | ||
}>; | ||
|
||
export class GrokHighlighter extends MessageImporter { | ||
constructor(options: ImportFactoryOptions, private http: HttpSetup) { | ||
super(options); | ||
} | ||
|
||
public async createLines( | ||
text: string, | ||
grokPattern: string, | ||
mappings: FindFileStructureResponse['mappings'], | ||
ecsCompatibility: string | undefined | ||
): Promise<HighlightedLine[]> { | ||
const docs = this._createDocs(text, false, LINE_LIMIT); | ||
const lines = docs.docs.map((doc) => doc.message); | ||
const matches = await this.testGrokPattern(lines, grokPattern, ecsCompatibility); | ||
|
||
return lines.map((line, index) => { | ||
const { matched, fields } = matches[index]; | ||
if (matched === false) { | ||
return [ | ||
{ | ||
word: line, | ||
}, | ||
]; | ||
} | ||
const sortedFields = Object.entries(fields) | ||
.map(([fieldName, [{ match, offset, length }]]) => { | ||
let type = mappings.properties[fieldName]?.type; | ||
if (type === undefined && fieldName === 'timestamp') { | ||
// it's possible that the timestamp field is not mapped as `timestamp` | ||
// but instead as `@timestamp` | ||
type = mappings.properties['@timestamp']?.type; | ||
} | ||
return { | ||
name: fieldName, | ||
match, | ||
offset, | ||
length, | ||
type, | ||
}; | ||
}) | ||
.sort((a, b) => a.offset - b.offset); | ||
|
||
let offset = 0; | ||
const highlightedLine: HighlightedLine = []; | ||
for (const field of sortedFields) { | ||
highlightedLine.push({ word: line.substring(offset, field.offset) }); | ||
highlightedLine.push({ | ||
word: field.match, | ||
field: { | ||
type: field.type, | ||
name: field.name, | ||
}, | ||
}); | ||
offset = field.offset + field.length; | ||
} | ||
highlightedLine.push({ word: line.substring(offset) }); | ||
return highlightedLine; | ||
}); | ||
} | ||
|
||
private async testGrokPattern( | ||
lines: string[], | ||
grokPattern: string, | ||
ecsCompatibility: string | undefined | ||
) { | ||
const { matches } = await this.http.fetch<TestGrokPatternResponse>( | ||
'/internal/data_visualizer/test_grok_pattern', | ||
{ | ||
method: 'POST', | ||
version: '1', | ||
body: JSON.stringify({ | ||
grokPattern, | ||
text: lines, | ||
ecsCompatibility, | ||
}), | ||
} | ||
); | ||
return matches; | ||
} | ||
} |
Oops, something went wrong.