Skip to content

Commit

Permalink
Add multi cluster text expansion tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kderusso committed Mar 14, 2024
1 parent 41a0210 commit 122f165
Show file tree
Hide file tree
Showing 3 changed files with 650 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def remoteCluster = testClusters.register('remote-cluster') {
testDistribution = 'DEFAULT'
versions = [ccsCompatVersion.toString(), project.version]
numberOfNodes = 2
setting 'node.roles', '[data,ingest,master]'
setting 'node.roles', '[data,ingest,master,ml]'
setting 'xpack.security.enabled', 'true'
setting 'xpack.watcher.enabled', 'false'
setting 'xpack.license.self_generated.type', 'trial'
Expand All @@ -35,7 +35,7 @@ def remoteCluster = testClusters.register('remote-cluster') {
testClusters.register('mixed-cluster') {
testDistribution = 'DEFAULT'
numberOfNodes = 2
setting 'node.roles', '[data,ingest,master]'
setting 'node.roles', '[data,ingest,master,ml]'
setting 'xpack.security.enabled', 'true'
setting 'xpack.watcher.enabled', 'false'
setting 'xpack.license.self_generated.type', 'trial'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
# This test tests cases covered by ML's text_expansion.yml
---
setup:
- skip:
features: headers

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials
indices.create:
index: index-with-sparse-vector
body:
mappings:
properties:
source_text:
type: keyword
ml.tokens:
type: sparse_vector

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
indices.create:
index: unrelated
body:
mappings:
properties:
source_text:
type: keyword

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
ml.put_trained_model:
model_id: "text_expansion_model"
body: >
{
"description": "simple model for testing",
"model_type": "pytorch",
"inference_config": {
"text_expansion": {
"tokenization": {
"bert": {
"with_special_tokens": false
}
}
}
}
}
- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
ml.put_trained_model_vocabulary:
model_id: "text_expansion_model"
body: >
{ "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] }
- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
ml.put_trained_model_definition_part:
model_id: "text_expansion_model"
part: 0
body: >
{
"total_definition_length":2078,
"definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=",
"total_parts": 1
}
- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
Content-Type: application/json
bulk:
index: index-with-sparse-vector
refresh: true
body: |
{"index": {}}
{"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}}
{"index": {}}
{"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}}
{"index": {}}
{"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}}
{"index": {}}
{"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}}
- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
Content-Type: application/json
ml.start_trained_model_deployment:
model_id: text_expansion_model
wait_for: started


---
teardown:
- skip:
features: headers

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
indices.delete:
index: index-with-sparse-vector
ignore: 404

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
indices.delete:
index: unrelated
ignore: 404

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
ml.stop_trained_model_deployment:
model_id: text_expansion_model
ignore: 404

- do:
headers:
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk"
ml.delete_trained_model:
model_id: "text_expansion_model"
ignore: 404

---
"Test text expansion search":
- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
- match: { hits.total.value: 4 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }

---
"Test text expansion search with pruning config":
- skip:
version: " - 8.12.99"
reason: "pruning introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
pruning_config:
tokens_freq_ratio_threshold: 4
tokens_weight_threshold: 0.4
- match: { hits.total.value: 4 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }

---
"Test named, boosted text expansion search with pruning config":
- skip:
version: " - 8.12.99"
reason: "pruning introduced in 8.13.0"
- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
pruning_config:
tokens_freq_ratio_threshold: 4
tokens_weight_threshold: 0.4
- match: { hits.total.value: 4 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
- match: { hits.hits.0._score: 3.0 }

- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
pruning_config:
tokens_freq_ratio_threshold: 4
tokens_weight_threshold: 0.4
_name: i-like-naming-my-queries
boost: 100.0
- match: { hits.total.value: 4 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }
- match: { hits.hits.0.matched_queries: ["i-like-naming-my-queries"] }
- match: { hits.hits.0._score: 300.0 }

---
"Test text expansion search with default pruning config":
- skip:
version: " - 8.12.99"
reason: "pruning introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
pruning_config: {}
- match: { hits.total.value: 4 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }

---
"Test text expansion search with weighted tokens rescoring only pruned tokens":
- skip:
version: " - 8.12.99"
reason: "pruning introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
pruning_config:
tokens_freq_ratio_threshold: 4
tokens_weight_threshold: 0.4
only_score_pruned_tokens: true
- match: { hits.total.value: 0 }

---
"Test weighted tokens search":
- skip:
version: " - 8.12.99"
reason: "weighted token search introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
weighted_tokens:
ml.tokens:
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
pruning_config:
tokens_freq_ratio_threshold: 1
tokens_weight_threshold: 0.4
only_score_pruned_tokens: false
- match: { hits.total.value: 5 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }

---
"Test weighted tokens search with default pruning config":
- skip:
version: " - 8.12.99"
reason: "weighted token search introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
weighted_tokens:
ml.tokens:
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
pruning_config: {}
- match: { hits.total.value: 5 }
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" }

---
"Test weighted tokens search only scoring pruned tokens":
- skip:
version: " - 8.12.99"
reason: "weighted token search introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
weighted_tokens:
ml.tokens:
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}]
pruning_config:
tokens_freq_ratio_threshold: 4
tokens_weight_threshold: 0.4
only_score_pruned_tokens: true
- match: { hits.total.value: 0 }

---
"Test weighted tokens search that prunes tokens based on frequency":
- skip:
version: " - 8.12.99"
reason: "weighted token search introduced in 8.13.0"

- do:
search:
index: index-with-sparse-vector
body:
query:
weighted_tokens:
ml.tokens:
tokens: [{"the": 1.0}, {"octopus":1.0}, {"comforter":1.0}, {"is": 1.0}, {"the": 1.0}, {"best": 1.0}, {"of": 1.0}, {"the": 1.0}, {"bunch": 1.0}]
pruning_config:
tokens_freq_ratio_threshold: 3
tokens_weight_threshold: 0.4
only_score_pruned_tokens: true
- match: { hits.total.value: 0 }
Loading

0 comments on commit 122f165

Please sign in to comment.