diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle index d102490820a07..b19fa4ab5f185 100644 --- a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle @@ -24,7 +24,7 @@ def remoteCluster = testClusters.register('remote-cluster') { testDistribution = 'DEFAULT' versions = [ccsCompatVersion.toString(), project.version] numberOfNodes = 2 - setting 'node.roles', '[data,ingest,master]' + setting 'node.roles', '[data,ingest,master,ml]' setting 'xpack.security.enabled', 'true' setting 'xpack.watcher.enabled', 'false' setting 'xpack.license.self_generated.type', 'trial' @@ -35,7 +35,7 @@ def remoteCluster = testClusters.register('remote-cluster') { testClusters.register('mixed-cluster') { testDistribution = 'DEFAULT' numberOfNodes = 2 - setting 'node.roles', '[data,ingest,master]' + setting 'node.roles', '[data,ingest,master,ml]' setting 'xpack.security.enabled', 'true' setting 'xpack.watcher.enabled', 'false' setting 'xpack.license.self_generated.type', 'trial' diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml new file mode 100644 index 0000000000000..3ac52c2d45d0d --- /dev/null +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml @@ -0,0 +1,324 @@ +# This test tests cases covered by ML's text_expansion.yml +--- +setup: + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + indices.create: + index: index-with-sparse-vector + body: + mappings: + properties: + source_text: + type: keyword + ml.tokens: + type: sparse_vector + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.create: + index: unrelated + body: + mappings: + properties: + source_text: + type: keyword + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model: + model_id: "text_expansion_model" + body: > + { + "description": "simple model for testing", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model_vocabulary: + model_id: "text_expansion_model" + body: > + { "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model_definition_part: + model_id: "text_expansion_model" + part: 0 + body: > + { + "total_definition_length":2078, + "definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=", + "total_parts": 1 + } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + Content-Type: application/json + bulk: + index: index-with-sparse-vector + refresh: true + body: | + {"index": {}} + {"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}} + {"index": {}} + {"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}} + {"index": {}} + {"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}} + {"index": {}} + {"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}} + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + Content-Type: application/json + ml.start_trained_model_deployment: + model_id: text_expansion_model + wait_for: started + + +--- +teardown: + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.delete: + index: index-with-sparse-vector + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.delete: + index: unrelated + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.stop_trained_model_deployment: + model_id: text_expansion_model + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.delete_trained_model: + model_id: "text_expansion_model" + ignore: 404 + +--- +"Test text expansion search": + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test text expansion search with pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test named, boosted text expansion search with pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + - match: { hits.hits.0._score: 3.0 } + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + _name: i-like-naming-my-queries + boost: 100.0 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + - match: { hits.hits.0.matched_queries: ["i-like-naming-my-queries"] } + - match: { hits.hits.0._score: 300.0 } + +--- +"Test text expansion search with default pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: {} + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test text expansion search with weighted tokens rescoring only pruned tokens": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 } + +--- +"Test weighted tokens search": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: false + - match: { hits.total.value: 5 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test weighted tokens search with default pruning config": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: {} + - match: { hits.total.value: 5 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test weighted tokens search only scoring pruned tokens": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 } + +--- +"Test weighted tokens search that prunes tokens based on frequency": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"octopus":1.0}, {"comforter":1.0}, {"is": 1.0}, {"the": 1.0}, {"best": 1.0}, {"of": 1.0}, {"the": 1.0}, {"bunch": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 3 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 } diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/40_text_expansion.yml b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/40_text_expansion.yml new file mode 100644 index 0000000000000..3ac52c2d45d0d --- /dev/null +++ b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/src/test/resources/rest-api-spec/test/remote_cluster/40_text_expansion.yml @@ -0,0 +1,324 @@ +# This test tests cases covered by ML's text_expansion.yml +--- +setup: + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials + indices.create: + index: index-with-sparse-vector + body: + mappings: + properties: + source_text: + type: keyword + ml.tokens: + type: sparse_vector + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.create: + index: unrelated + body: + mappings: + properties: + source_text: + type: keyword + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model: + model_id: "text_expansion_model" + body: > + { + "description": "simple model for testing", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model_vocabulary: + model_id: "text_expansion_model" + body: > + { "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.put_trained_model_definition_part: + model_id: "text_expansion_model" + part: 0 + body: > + { + "total_definition_length":2078, + "definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=", + "total_parts": 1 + } + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + Content-Type: application/json + bulk: + index: index-with-sparse-vector + refresh: true + body: | + {"index": {}} + {"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}} + {"index": {}} + {"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}} + {"index": {}} + {"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}} + {"index": {}} + {"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}} + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + Content-Type: application/json + ml.start_trained_model_deployment: + model_id: text_expansion_model + wait_for: started + + +--- +teardown: + - skip: + features: headers + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.delete: + index: index-with-sparse-vector + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + indices.delete: + index: unrelated + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.stop_trained_model_deployment: + model_id: text_expansion_model + ignore: 404 + + - do: + headers: + Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" + ml.delete_trained_model: + model_id: "text_expansion_model" + ignore: 404 + +--- +"Test text expansion search": + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test text expansion search with pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test named, boosted text expansion search with pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + - match: { hits.hits.0._score: 3.0 } + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + _name: i-like-naming-my-queries + boost: 100.0 + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + - match: { hits.hits.0.matched_queries: ["i-like-naming-my-queries"] } + - match: { hits.hits.0._score: 300.0 } + +--- +"Test text expansion search with default pruning config": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: {} + - match: { hits.total.value: 4 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test text expansion search with weighted tokens rescoring only pruned tokens": + - skip: + version: " - 8.12.99" + reason: "pruning introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 } + +--- +"Test weighted tokens search": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 1 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: false + - match: { hits.total.value: 5 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test weighted tokens search with default pruning config": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: {} + - match: { hits.total.value: 5 } + - match: { hits.hits.0._source.source_text: "the octopus comforter smells" } + +--- +"Test weighted tokens search only scoring pruned tokens": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 4 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 } + +--- +"Test weighted tokens search that prunes tokens based on frequency": + - skip: + version: " - 8.12.99" + reason: "weighted token search introduced in 8.13.0" + + - do: + search: + index: index-with-sparse-vector + body: + query: + weighted_tokens: + ml.tokens: + tokens: [{"the": 1.0}, {"octopus":1.0}, {"comforter":1.0}, {"is": 1.0}, {"the": 1.0}, {"best": 1.0}, {"of": 1.0}, {"the": 1.0}, {"bunch": 1.0}] + pruning_config: + tokens_freq_ratio_threshold: 3 + tokens_weight_threshold: 0.4 + only_score_pruned_tokens: true + - match: { hits.total.value: 0 }