Skip to content

Commit

Permalink
[ML] Adds grok highlighting to the file data visualizer (#175913)
Browse files Browse the repository at this point in the history
Adds grokpattern highlighting to the file data visualizer for
semi-structured text files.
The first 5 lines of the file are displayed with inline highlighting.
Hovering the mouse over displays a tooltip with the field name and type.


![image](https://github.com/elastic/kibana/assets/22172091/7b50aeca-0255-4413-93ef-e44976e798f4)


If for whatever reason the highlighting fails, we switch back to the raw
text.

@szabosteve and @peteharverson I'm not 100% happy with the labels on the
tabs, `Highlighted text` and `Raw text`. So suggestions are welcome.



Relates to elastic/elasticsearch#104394

---------

Co-authored-by: kibanamachine <[email protected]>
Co-authored-by: István Zoltán Szabó <[email protected]>
  • Loading branch information
3 people authored Feb 8, 2024
1 parent 8612ddc commit 0bca3c0
Show file tree
Hide file tree
Showing 15 changed files with 521 additions and 53 deletions.
13 changes: 13 additions & 0 deletions x-pack/plugins/data_visualizer/common/types/test_grok_pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export interface TestGrokPatternResponse {
matches: Array<{
matched: boolean;
fields: Record<string, Array<{ match: string; offset: number; length: number }>>;
}>;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import React, { FC } from 'react';
import { EuiBadge, EuiFlexGroup, EuiFlexItem, EuiToolTip } from '@elastic/eui';
import { FieldIcon } from '@kbn/react-field';
import { i18n } from '@kbn/i18n';
import { getSupportedFieldType } from '../../../common/components/fields_stats_grid/get_field_names';
import { useCurrentEuiTheme } from '../../../common/hooks/use_current_eui_theme';

interface Props {
type: string | undefined;
value: string;
name: string;
}

export const FieldBadge: FC<Props> = ({ type, value, name }) => {
const { euiColorLightestShade, euiColorLightShade } = useCurrentEuiTheme();
const supportedType = getSupportedFieldType(type ?? 'unknown');
const tooltip = type
? i18n.translate('xpack.dataVisualizer.file.fileContents.fieldBadge.tooltip', {
defaultMessage: 'Type: {type}',
values: { type: supportedType },
})
: undefined;
return (
<EuiToolTip title={name} content={tooltip}>
<EuiBadge
data-test-subj="dataVisualizerFieldBadge"
css={{
// magic numbers to align the badges with the text
// and to align the icon correctly inside the badge.
marginRight: '2px',
marginTop: '-4px',
padding: '0px 4px',
cursor: 'pointer',
pointerEvents: 'none',
border: `1px solid ${euiColorLightShade}`,
backgroundColor: euiColorLightestShade,
}}
>
<EuiFlexGroup gutterSize="none">
<EuiFlexItem grow={false}>
<FieldIcon
type={supportedType}
css={{
marginRight: '4px',
marginTop: '1px',
border: `1px solid ${euiColorLightShade}`,
}}
/>
</EuiFlexItem>
<EuiFlexItem>{value}</EuiFlexItem>
</EuiFlexGroup>
</EuiBadge>
</EuiToolTip>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,52 +5,150 @@
* 2.0.
*/

import React, { FC, useEffect, useState, useMemo } from 'react';
import { FormattedMessage } from '@kbn/i18n-react';
import React, { FC } from 'react';

import { EuiTitle, EuiSpacer } from '@elastic/eui';
import {
EuiTitle,
EuiSpacer,
EuiHorizontalRule,
EuiFlexGroup,
EuiFlexItem,
EuiSwitch,
} from '@elastic/eui';

import { JsonEditor, EDITOR_MODE } from '../json_editor';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import useMountedState from 'react-use/lib/useMountedState';
import { i18n } from '@kbn/i18n';
import { EDITOR_MODE, JsonEditor } from '../json_editor';
import { useGrokHighlighter } from './use_text_parser';
import { LINE_LIMIT } from './grok_highlighter';

interface Props {
data: string;
format: string;
numberOfLines: number;
semiStructureTextData: SemiStructureTextData | null;
}

export const FileContents: FC<Props> = ({ data, format, numberOfLines }) => {
interface SemiStructureTextData {
grokPattern?: string;
multilineStartPattern?: string;
excludeLinesPattern?: string;
sampleStart: string;
mappings: FindFileStructureResponse['mappings'];
ecsCompatibility?: string;
}

function semiStructureTextDataGuard(
semiStructureTextData: SemiStructureTextData | null
): semiStructureTextData is SemiStructureTextData {
return (
semiStructureTextData !== null &&
semiStructureTextData.grokPattern !== undefined &&
semiStructureTextData.multilineStartPattern !== undefined
);
}

export const FileContents: FC<Props> = ({ data, format, numberOfLines, semiStructureTextData }) => {
let mode = EDITOR_MODE.TEXT;
if (format === EDITOR_MODE.JSON) {
mode = EDITOR_MODE.JSON;
}
const isMounted = useMountedState();
const grokHighlighter = useGrokHighlighter();

const [isSemiStructureTextData, setIsSemiStructureTextData] = useState(
semiStructureTextDataGuard(semiStructureTextData)
);
const formattedData = useMemo(
() => limitByNumberOfLines(data, numberOfLines),
[data, numberOfLines]
);

const [highlightedLines, setHighlightedLines] = useState<JSX.Element[] | null>(null);
const [showHighlights, setShowHighlights] = useState<boolean>(isSemiStructureTextData);

useEffect(() => {
if (isSemiStructureTextData === false) {
return;
}
const { grokPattern, multilineStartPattern, excludeLinesPattern, mappings, ecsCompatibility } =
semiStructureTextData!;

const formattedData = limitByNumberOfLines(data, numberOfLines);
grokHighlighter(
data,
grokPattern!,
mappings,
ecsCompatibility,
multilineStartPattern!,
excludeLinesPattern
)
.then((docs) => {
if (isMounted()) {
setHighlightedLines(docs);
}
})
.catch((e) => {
if (isMounted()) {
setHighlightedLines(null);
setIsSemiStructureTextData(false);
}
});
}, [data, semiStructureTextData, grokHighlighter, isSemiStructureTextData, isMounted]);

return (
<React.Fragment>
<EuiTitle size="s">
<h2>
<FormattedMessage
id="xpack.dataVisualizer.file.fileContents.fileContentsTitle"
defaultMessage="File contents"
/>
</h2>
</EuiTitle>

<div>
<FormattedMessage
id="xpack.dataVisualizer.file.fileContents.firstLinesDescription"
defaultMessage="First {numberOfLines, plural, zero {# line} one {# line} other {# lines}}"
values={{
numberOfLines,
}}
/>
</div>
<>
<EuiFlexGroup>
<EuiFlexItem>
<EuiTitle size="s">
<h2>
<FormattedMessage
id="xpack.dataVisualizer.file.fileContents.fileContentsTitle"
defaultMessage="File contents"
/>
</h2>
</EuiTitle>
</EuiFlexItem>
{isSemiStructureTextData ? (
<EuiFlexItem grow={false} data-test-subj="dataVisualizerFileContentsHighlightingSwitch">
<EuiSwitch
label={i18n.translate('xpack.dataVisualizer.file.fileContents.highlightSwitch', {
defaultMessage: 'Grok pattern highlighting',
})}
compressed
checked={showHighlights}
onChange={() => setShowHighlights(!showHighlights)}
/>
</EuiFlexItem>
) : null}
</EuiFlexGroup>

<EuiSpacer size="s" />

<FormattedMessage
id="xpack.dataVisualizer.file.fileContents.firstLinesDescription"
defaultMessage="First {numberOfLines, plural, zero {# line} one {# line} other {# lines}}"
values={{
numberOfLines: showHighlights ? LINE_LIMIT : numberOfLines,
}}
/>

<EuiSpacer size="s" />

<JsonEditor mode={mode} readOnly={true} value={formattedData} height="200px" />
</React.Fragment>
{highlightedLines === null || showHighlights === false ? (
<JsonEditor mode={mode} readOnly={true} value={formattedData} height="200px" />
) : (
<>
{highlightedLines.map((line, i) => (
<>
{line}
{i === highlightedLines.length - 1 ? null : <EuiHorizontalRule margin="s" />}
</>
))}
</>
)}
</>
);
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { MessageImporter } from '@kbn/file-upload-plugin/public';
import type { HttpSetup } from '@kbn/core/public';
import type { ImportFactoryOptions } from '@kbn/file-upload-plugin/public/importer';
import type { FindFileStructureResponse } from '@kbn/file-upload-plugin/common';
import type { TestGrokPatternResponse } from '../../../../../common/types/test_grok_pattern';

export const LINE_LIMIT = 5;

type HighlightedLine = Array<{
word: string;
field?: {
type: string;
name: string;
};
}>;

export class GrokHighlighter extends MessageImporter {
constructor(options: ImportFactoryOptions, private http: HttpSetup) {
super(options);
}

public async createLines(
text: string,
grokPattern: string,
mappings: FindFileStructureResponse['mappings'],
ecsCompatibility: string | undefined
): Promise<HighlightedLine[]> {
const docs = this._createDocs(text, false, LINE_LIMIT);
const lines = docs.docs.map((doc) => doc.message);
const matches = await this.testGrokPattern(lines, grokPattern, ecsCompatibility);

return lines.map((line, index) => {
const { matched, fields } = matches[index];
if (matched === false) {
return [
{
word: line,
},
];
}
const sortedFields = Object.entries(fields)
.map(([fieldName, [{ match, offset, length }]]) => {
let type = mappings.properties[fieldName]?.type;
if (type === undefined && fieldName === 'timestamp') {
// it's possible that the timestamp field is not mapped as `timestamp`
// but instead as `@timestamp`
type = mappings.properties['@timestamp']?.type;
}
return {
name: fieldName,
match,
offset,
length,
type,
};
})
.sort((a, b) => a.offset - b.offset);

let offset = 0;
const highlightedLine: HighlightedLine = [];
for (const field of sortedFields) {
highlightedLine.push({ word: line.substring(offset, field.offset) });
highlightedLine.push({
word: field.match,
field: {
type: field.type,
name: field.name,
},
});
offset = field.offset + field.length;
}
highlightedLine.push({ word: line.substring(offset) });
return highlightedLine;
});
}

private async testGrokPattern(
lines: string[],
grokPattern: string,
ecsCompatibility: string | undefined
) {
const { matches } = await this.http.fetch<TestGrokPatternResponse>(
'/internal/data_visualizer/test_grok_pattern',
{
method: 'POST',
version: '1',
body: JSON.stringify({
grokPattern,
text: lines,
ecsCompatibility,
}),
}
);
return matches;
}
}
Loading

0 comments on commit 0bca3c0

Please sign in to comment.