diff --git a/x-pack/test/api_integration/apis/ml/categorization_field_examples.ts b/x-pack/test/api_integration/apis/ml/categorization_field_examples.ts new file mode 100644 index 0000000000000..0884cf05e523e --- /dev/null +++ b/x-pack/test/api_integration/apis/ml/categorization_field_examples.ts @@ -0,0 +1,290 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import expect from '@kbn/expect'; + +import { FtrProviderContext } from '../../ftr_provider_context'; + +const COMMON_HEADERS = { + 'kbn-xsrf': 'some-xsrf-token', +}; + +const start = 1554463535770; +const end = 1574316073914; +const analyzer = { + tokenizer: 'ml_classic', + filter: [ + { + type: 'stop', + stopwords: [ + 'Monday', + 'Tuesday', + 'Wednesday', + 'Thursday', + 'Friday', + 'Saturday', + 'Sunday', + 'Mon', + 'Tue', + 'Wed', + 'Thu', + 'Fri', + 'Sat', + 'Sun', + 'January', + 'February', + 'March', + 'April', + 'May', + 'June', + 'July', + 'August', + 'September', + 'October', + 'November', + 'December', + 'Jan', + 'Feb', + 'Mar', + 'Apr', + 'May', + 'Jun', + 'Jul', + 'Aug', + 'Sep', + 'Oct', + 'Nov', + 'Dec', + 'GMT', + 'UTC', + ], + }, + ], +}; +const defaultRequestBody = { + indexPatternTitle: 'categorization_functional_test', + query: { bool: { must: [{ match_all: {} }] } }, + size: 5, + timeField: '@timestamp', + start, + end, + analyzer, +}; + +const testDataList = [ + { + title: 'valid with good number of tokens', + requestBody: { + ...defaultRequestBody, + field: 'field1', + }, + expected: { + responseCode: 200, + overallValidStatus: 'valid', + sampleSize: 1000, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'valid', + message: '1000 field values analyzed, 95% contain 3 or more tokens.', + }, + ], + }, + }, + { + title: 'invalid, too many tokens.', + requestBody: { + ...defaultRequestBody, + field: 'field2', + }, + expected: { + responseCode: 200, + overallValidStatus: 'invalid', + sampleSize: 500, + exampleLength: 5, + validationChecks: [ + { + id: 1, + valid: 'partially_valid', + message: 'The median length for the field values analyzed is over 400 characters.', + }, + { + id: 4, + valid: 'invalid', + message: + 'Tokenization of field value examples has failed due to more than 10000 tokens being found in a sample of 50 values.', + }, + ], + }, + }, + { + title: 'partially valid, more than 75% are null', + requestBody: { + ...defaultRequestBody, + field: 'field3', + }, + expected: { + responseCode: 200, + overallValidStatus: 'partially_valid', + sampleSize: 250, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'valid', + message: '250 field values analyzed, 95% contain 3 or more tokens.', + }, + { + id: 2, + valid: 'partially_valid', + message: 'More than 75% of field values are null.', + }, + ], + }, + }, + { + title: 'partially valid, median length is over 400 characters', + requestBody: { + ...defaultRequestBody, + field: 'field4', + }, + expected: { + responseCode: 200, + overallValidStatus: 'partially_valid', + sampleSize: 500, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'valid', + message: '500 field values analyzed, 100% contain 3 or more tokens.', + }, + { + id: 1, + valid: 'partially_valid', + message: 'The median length for the field values analyzed is over 400 characters.', + }, + ], + }, + }, + { + title: 'invalid, no values in any doc', + requestBody: { + ...defaultRequestBody, + field: 'field5', + }, + expected: { + responseCode: 200, + overallValidStatus: 'invalid', + sampleSize: 0, + exampleLength: 0, + validationChecks: [ + { + id: 3, + valid: 'invalid', + message: + 'No examples for this field could be found. Please ensure the selected date range contains data.', + }, + ], + }, + }, + { + title: 'invalid, mostly made up of stop words, so no matched tokens', + requestBody: { + ...defaultRequestBody, + field: 'field6', + }, + expected: { + responseCode: 200, + overallValidStatus: 'invalid', + sampleSize: 1000, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'invalid', + message: '1000 field values analyzed, 0% contain 3 or more tokens.', + }, + ], + }, + }, + { + title: 'valid, mostly made up of stop words, but analyser has no stop words. so it is ok.', + requestBody: { + ...defaultRequestBody, + field: 'field6', + analyzer: { + tokenizer: 'ml_classic', + }, + }, + expected: { + responseCode: 200, + overallValidStatus: 'valid', + sampleSize: 1000, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'valid', + message: '1000 field values analyzed, 100% contain 3 or more tokens.', + }, + ], + }, + }, + { + title: 'partially valid, half the docs are stop words.', + requestBody: { + ...defaultRequestBody, + field: 'field7', + }, + expected: { + responseCode: 200, + overallValidStatus: 'partially_valid', + sampleSize: 1000, + exampleLength: 5, + validationChecks: [ + { + id: 0, + valid: 'partially_valid', + message: '1000 field values analyzed, 50% contain 3 or more tokens.', + }, + ], + }, + }, +]; + +// eslint-disable-next-line import/no-default-export +export default ({ getService }: FtrProviderContext) => { + const esArchiver = getService('esArchiver'); + const supertest = getService('supertest'); + + describe('Categorization example endpoint - ', function() { + this.tags(['james']); + before(async () => { + await esArchiver.load('ml/categorization'); + }); + + after(async () => { + await esArchiver.unload('ml/categorization'); + }); + + for (const testData of testDataList) { + it(testData.title, async () => { + const { body } = await supertest + .post('/api/ml/jobs/categorization_field_examples') + .set(COMMON_HEADERS) + .send(testData.requestBody) + .expect(testData.expected.responseCode); + + expect(body.overallValidStatus).to.eql(testData.expected.overallValidStatus); + expect(body.examples.length).to.eql(testData.expected.exampleLength); + expect(body.sampleSize).to.eql(testData.expected.sampleSize); + expect(body.validationChecks).to.eql(testData.expected.validationChecks); + }); + } + }); +}; diff --git a/x-pack/test/api_integration/apis/ml/index.ts b/x-pack/test/api_integration/apis/ml/index.ts index 9fff4ca8436b0..1df5dfe2941ce 100644 --- a/x-pack/test/api_integration/apis/ml/index.ts +++ b/x-pack/test/api_integration/apis/ml/index.ts @@ -12,5 +12,6 @@ export default function({ loadTestFile }: FtrProviderContext) { loadTestFile(require.resolve('./bucket_span_estimator')); loadTestFile(require.resolve('./calculate_model_memory_limit')); + loadTestFile(require.resolve('./categorization_field_examples')); }); }