Skip to content

Commit

Permalink
[ML] improve query string parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
darnautov committed Sep 14, 2020
1 parent 9047528 commit 57fa5a0
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ describe('ML - custom URL utils', () => {
);
});

/** FIXME */
test.skip('returns expected URL for a Kibana Discover type URL when record field contains special characters', () => {
expect(getUrlForRecord(TEST_DISCOVER_URL, TEST_RECORD_SPECIAL_CHARS)).toBe(
"discover#/?_g=(time:(from:'2017-02-09T15:10:00.000Z',mode:absolute,to:'2017-02-09T17:15:00.000Z'))&_a=(index:bf6e5860-9404-11e8-8d4c-593f69c47267,query:(language:kuery,query:'airline:\"%3C%3E%3A%3B%5B%7D%5C%22)\" and odd:field,name:>:&12<''))"
Expand Down Expand Up @@ -459,7 +458,7 @@ describe('ML - custom URL utils', () => {
};

expect(getUrlForRecord(urlConfig, testRecords)).toBe(
"security/hosts/ml-hosts?_g=()&query=(query:'process.name:\"seq\"',language:kuery)&timerange=(global:(linkTo:!(timeline),timerange:(from:'2019-02-01T16:00:00.000Z',kind:absolute,to:'2019-02-01T18:59:59.999Z')),timeline:(linkTo:!(global),timerange:(from:'2019-02-01T16%3A00%3A00.000Z',kind:absolute,to:'2019-02-01T18%3A59%3A59.999Z')))"
"security/hosts/ml-hosts?_g=()&query=(language:kuery,query:'process.name:\"seq\"')&timerange=(global:(linkTo:!(timeline),timerange:(from:'2019-02-01T16:00:00.000Z',kind:absolute,to:'2019-02-01T18:59:59.999Z')),timeline:(linkTo:!(global),timerange:(from:'2019-02-01T16%3A00%3A00.000Z',kind:absolute,to:'2019-02-01T18%3A59%3A59.999Z')))"
);
});

Expand Down
133 changes: 79 additions & 54 deletions x-pack/plugins/ml/public/application/util/custom_url_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import { get, flow } from 'lodash';
import moment from 'moment';
import rison, { RisonObject, RisonValue } from 'rison-node';

import { parseInterval } from '../../../common/util/parse_interval';
import { escapeForElasticsearchQuery, replaceStringTokens } from './string_utils';
Expand Down Expand Up @@ -131,13 +132,70 @@ function escapeForKQL(value: string | number): string {

type GetResultTokenValue = (v: string) => string;

export const isRisonObject = (value: RisonValue): value is RisonObject => {
return value !== null && typeof value === 'object';
};

const getQueryStringResultProvider = (
record: CustomUrlAnomalyRecordDoc,
getResultTokenValue: GetResultTokenValue
) => (resultPrefix: string, queryString: string, resultPostfix: string): string => {
const URL_LENGTH_LIMIT = 2000;

let availableCharactersLeft = URL_LENGTH_LIMIT - resultPrefix.length - resultPostfix.length;

// URL template might contain encoded characters
const queryFields = queryString
// Split query string by AND operator.
.split(/\sand\s/i)
// Get property name from `influencerField:$influencerField$` string.
.map((v) => String(v.split(/:(.+)?\$/)[0]).trim());

const queryParts: string[] = [];
const joinOperator = ' AND ';

fieldsLoop: for (let i = 0; i < queryFields.length; i++) {
const field = queryFields[i];
// Use lodash get to allow nested JSON fields to be retrieved.
let tokenValues: string[] | string | null = get(record, field) || null;
if (tokenValues === null) {
continue;
}
tokenValues = Array.isArray(tokenValues) ? tokenValues : [tokenValues];

// Create a pair `influencerField:value`.
// In cases where there are multiple influencer field values for an anomaly
// combine values with OR operator e.g. `(influencerField:value or influencerField:another_value)`.
let result = '';
for (let j = 0; j < tokenValues.length; j++) {
const part = `${j > 0 ? ' OR ' : ''}${field}:"${getResultTokenValue(tokenValues[j])}"`;

// Build up a URL string which is not longer than the allowed length and isn't corrupted by invalid query.
if (availableCharactersLeft < part.length) {
if (result.length > 0) {
queryParts.push(j > 0 ? `(${result})` : result);
}
break fieldsLoop;
}

result += part;

availableCharactersLeft -= result.length;
}

if (result.length > 0) {
queryParts.push(tokenValues.length > 1 ? `(${result})` : result);
}
}
return queryParts.join(joinOperator);
};

/**
* Builds a Kibana dashboard or Discover URL from the supplied config, with any
* dollar delimited tokens substituted from the supplied anomaly record.
*/
function buildKibanaUrl(urlConfig: UrlConfig, record: CustomUrlAnomalyRecordDoc) {
const urlValue = urlConfig.url_value;
const URL_LENGTH_LIMIT = 2000;

const isLuceneQueryLanguage = urlValue.includes('language:lucene');

Expand Down Expand Up @@ -172,67 +230,34 @@ function buildKibanaUrl(urlConfig: UrlConfig, record: CustomUrlAnomalyRecordDoc)
return flow(
(str: string) => str.replace('$earliest$', record.earliest).replace('$latest$', record.latest),
// Process query string content of the URL
decodeURIComponent,
(str: string) => {
const getResultTokenValue: GetResultTokenValue = flow(
queryLanguageEscapeCallback,
commonEscapeCallback
);

const getQueryStringResult = getQueryStringResultProvider(record, getResultTokenValue);

const match = str.match(/(.+)(\(.*\blanguage:(?:lucene|kuery)\b.*?\))(.+)/);

if (match !== null && match[2] !== undefined) {
const [, prefix, queryDef, postfix] = match;

const q = rison.decode(queryDef);

if (isRisonObject(q) && q.hasOwnProperty('query')) {
const [resultPrefix, resultPostfix] = [prefix, postfix].map(replaceSingleTokenValues);
const resultQuery = getQueryStringResult(resultPrefix, q.query as string, resultPostfix);
return `${resultPrefix}${rison.encode({ ...q, query: resultQuery })}${resultPostfix}`;
}
}

return str.replace(
/(.+query:'|.+&kuery=)([^']*)(['&].+)/,
/(.+&kuery=)(.*?)[^!](&.+)/,
(fullMatch, prefix: string, queryString: string, postfix: string) => {
const [resultPrefix, resultPostfix] = [prefix, postfix].map(replaceSingleTokenValues);

let availableCharactersLeft =
URL_LENGTH_LIMIT - resultPrefix.length - resultPostfix.length;

// URL template might contain encoded characters
const queryFields = decodeURIComponent(queryString)
// Split query string by AND operator.
.split(/\sand\s/i)
// Get property name from `influencerField:$influencerField$` string.
.map((v) => String(v.split(/:(.+)?\$/)[0]).trim());

const queryParts: string[] = [];
const joinOperator = ' AND ';

fieldsLoop: for (let i = 0; i < queryFields.length; i++) {
const field = queryFields[i];
// Use lodash get to allow nested JSON fields to be retrieved.
let tokenValues: string[] | string | null = get(record, field) || null;
if (tokenValues === null) {
continue;
}
tokenValues = Array.isArray(tokenValues) ? tokenValues : [tokenValues];

// Create a pair `influencerField:value`.
// In cases where there are multiple influencer field values for an anomaly
// combine values with OR operator e.g. `(influencerField:value or influencerField:another_value)`.
let result = '';
for (let j = 0; j < tokenValues.length; j++) {
const part = `${j > 0 ? ' OR ' : ''}${field}:"${getResultTokenValue(
tokenValues[j]
)}"`;

// Build up a URL string which is not longer than the allowed length and isn't corrupted by invalid query.
if (availableCharactersLeft < part.length) {
if (result.length > 0) {
queryParts.push(j > 0 ? `(${result})` : result);
}
break fieldsLoop;
}

result += part;

availableCharactersLeft -= result.length;
}

if (result.length > 0) {
queryParts.push(tokenValues.length > 1 ? `(${result})` : result);
}
}

const resultQuery = queryParts.join(joinOperator);

const resultQuery = getQueryStringResult(resultPrefix, queryString, resultPostfix);
return `${resultPrefix}${resultQuery}${resultPostfix}`;
}
);
Expand Down

0 comments on commit 57fa5a0

Please sign in to comment.