Add multi cluster text expansion tests

kderusso · Mar 14, 2024 · 122f165 · 122f165
1 parent 41a0210
commit 122f165
Show file tree

Hide file tree

Showing 3 changed files with 650 additions and 2 deletions.
diff --git a/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle b/x-pack/plugin/ml/qa/multi-cluster-tests-with-security/build.gradle
@@ -24,7 +24,7 @@ def remoteCluster = testClusters.register('remote-cluster') {
     testDistribution = 'DEFAULT'
     versions = [ccsCompatVersion.toString(), project.version]
     numberOfNodes = 2
-    setting 'node.roles', '[data,ingest,master]'
+    setting 'node.roles', '[data,ingest,master,ml]'
     setting 'xpack.security.enabled', 'true'
     setting 'xpack.watcher.enabled', 'false'
     setting 'xpack.license.self_generated.type', 'trial'
@@ -35,7 +35,7 @@ def remoteCluster = testClusters.register('remote-cluster') {
 testClusters.register('mixed-cluster') {
     testDistribution = 'DEFAULT'
     numberOfNodes = 2
-    setting 'node.roles', '[data,ingest,master]'
+    setting 'node.roles', '[data,ingest,master,ml]'
     setting 'xpack.security.enabled', 'true'
     setting 'xpack.watcher.enabled', 'false'
     setting 'xpack.license.self_generated.type', 'trial'

diff --git a/...s-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml b/...s-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml
@@ -0,0 +1,324 @@
+# This test tests cases covered by ML's text_expansion.yml
+---
+setup:
+  - skip:
+      features: headers
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials
+      indices.create:
+        index: index-with-sparse-vector
+        body:
+          mappings:
+            properties:
+              source_text:
+                type: keyword
+              ml.tokens:
+                type: sparse_vector
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      indices.create:
+        index: unrelated
+        body:
+          mappings:
+            properties:
+              source_text:
+                type: keyword
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      ml.put_trained_model:
+        model_id: "text_expansion_model"
+        body: >
+          {
+            "description": "simple model for testing",
+            "model_type": "pytorch",
+            "inference_config": {
+              "text_expansion": {
+                "tokenization": {
+                  "bert": {
+                    "with_special_tokens": false
+                  }
+                }
+              }
+            }
+          }
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      ml.put_trained_model_vocabulary:
+        model_id: "text_expansion_model"
+        body: >
+          { "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] }
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      ml.put_trained_model_definition_part:
+        model_id: "text_expansion_model"
+        part: 0
+        body: >
+          {
+            "total_definition_length":2078,
+            "definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=",
+            "total_parts": 1
+          }
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+        Content-Type: application/json
+      bulk:
+        index: index-with-sparse-vector
+        refresh: true
+        body: |
+          {"index": {}}
+          {"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}}
+          {"index": {}}
+          {"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}}
+          {"index": {}}
+          {"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}}
+          {"index": {}}
+          {"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}}
+          {"index": {}}
+          {"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}}
+          {"index": {}}
+          {"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}}
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+        Content-Type: application/json
+      ml.start_trained_model_deployment:
+        model_id: text_expansion_model
+        wait_for: started
+
+
+---
+teardown:
+  - skip:
+      features: headers
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      indices.delete:
+        index: index-with-sparse-vector
+        ignore: 404
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      indices.delete:
+        index: unrelated
+        ignore: 404
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      ml.stop_trained_model_deployment:
+        model_id: text_expansion_model
+        ignore: 404
+
+  - do:
+      headers:
+        Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
+      ml.delete_trained_model:
+        model_id: "text_expansion_model"
+        ignore: 404
+
+---
+"Test text expansion search":
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+
+---
+"Test text expansion search with pruning config":
+  - skip:
+      version: " - 8.12.99"
+      reason: "pruning introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+                pruning_config:
+                  tokens_freq_ratio_threshold: 4
+                  tokens_weight_threshold: 0.4
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+
+---
+"Test named, boosted text expansion search with pruning config":
+  - skip:
+      version: " - 8.12.99"
+      reason: "pruning introduced in 8.13.0"
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+                pruning_config:
+                  tokens_freq_ratio_threshold: 4
+                  tokens_weight_threshold: 0.4
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+  - match: { hits.hits.0._score: 3.0 }
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+                pruning_config:
+                  tokens_freq_ratio_threshold: 4
+                  tokens_weight_threshold: 0.4
+                _name: i-like-naming-my-queries
+                boost: 100.0
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+  - match: { hits.hits.0.matched_queries: ["i-like-naming-my-queries"] }
+  - match: { hits.hits.0._score: 300.0 }
+
+---
+"Test text expansion search with default pruning config":
+  - skip:
+      version: " - 8.12.99"
+      reason: "pruning introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+                pruning_config: {}
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+
+---
+"Test text expansion search with weighted tokens rescoring only pruned tokens":
+  - skip:
+      version: " - 8.12.99"
+      reason: "pruning introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            text_expansion:
+              ml.tokens:
+                model_id: text_expansion_model
+                model_text: "octopus comforter smells"
+                pruning_config:
+                  tokens_freq_ratio_threshold: 4
+                  tokens_weight_threshold: 0.4
+                  only_score_pruned_tokens: true
+  - match: { hits.total.value: 0 }
+
+---
+"Test weighted tokens search":
+  - skip:
+      version: " - 8.12.99"
+      reason: "weighted token search introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            weighted_tokens:
+              ml.tokens:
+                tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
+                pruning_config:
+                  tokens_freq_ratio_threshold: 1
+                  tokens_weight_threshold: 0.4
+                  only_score_pruned_tokens: false
+  - match: { hits.total.value: 5 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+
+---
+"Test weighted tokens search with default pruning config":
+  - skip:
+      version: " - 8.12.99"
+      reason: "weighted token search introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            weighted_tokens:
+              ml.tokens:
+                tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
+                pruning_config: {}
+  - match: { hits.total.value: 5 }
+  - match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
+
+---
+"Test weighted tokens search only scoring pruned tokens":
+  - skip:
+      version: " - 8.12.99"
+      reason: "weighted token search introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            weighted_tokens:
+              ml.tokens:
+                tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
+                pruning_config:
+                  tokens_freq_ratio_threshold: 4
+                  tokens_weight_threshold: 0.4
+                  only_score_pruned_tokens: true
+  - match: { hits.total.value: 0 }
+
+---
+"Test weighted tokens search that prunes tokens based on frequency":
+  - skip:
+      version: " - 8.12.99"
+      reason: "weighted token search introduced in 8.13.0"
+
+  - do:
+      search:
+        index: index-with-sparse-vector
+        body:
+          query:
+            weighted_tokens:
+              ml.tokens:
+                tokens: [{"the": 1.0}, {"octopus":1.0}, {"comforter":1.0}, {"is": 1.0}, {"the": 1.0}, {"best": 1.0}, {"of": 1.0}, {"the": 1.0}, {"bunch": 1.0}]
+                pruning_config:
+                  tokens_freq_ratio_threshold: 3
+                  tokens_weight_threshold: 0.4
+                  only_score_pruned_tokens: true
+  - match: { hits.total.value: 0 }