forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add multi cluster text expansion tests
- Loading branch information
Showing
3 changed files
with
650 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
324 changes: 324 additions & 0 deletions
324
...s-with-security/src/test/resources/rest-api-spec/test/multi_cluster/40_text_expansion.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,324 @@ | ||
# This test tests cases covered by ML's text_expansion.yml | ||
--- | ||
setup: | ||
- skip: | ||
features: headers | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" #test_user credentials | ||
indices.create: | ||
index: index-with-sparse-vector | ||
body: | ||
mappings: | ||
properties: | ||
source_text: | ||
type: keyword | ||
ml.tokens: | ||
type: sparse_vector | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
indices.create: | ||
index: unrelated | ||
body: | ||
mappings: | ||
properties: | ||
source_text: | ||
type: keyword | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
ml.put_trained_model: | ||
model_id: "text_expansion_model" | ||
body: > | ||
{ | ||
"description": "simple model for testing", | ||
"model_type": "pytorch", | ||
"inference_config": { | ||
"text_expansion": { | ||
"tokenization": { | ||
"bert": { | ||
"with_special_tokens": false | ||
} | ||
} | ||
} | ||
} | ||
} | ||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
ml.put_trained_model_vocabulary: | ||
model_id: "text_expansion_model" | ||
body: > | ||
{ "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] } | ||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
ml.put_trained_model_definition_part: | ||
model_id: "text_expansion_model" | ||
part: 0 | ||
body: > | ||
{ | ||
"total_definition_length":2078, | ||
"definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=", | ||
"total_parts": 1 | ||
} | ||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
Content-Type: application/json | ||
bulk: | ||
index: index-with-sparse-vector | ||
refresh: true | ||
body: | | ||
{"index": {}} | ||
{"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}} | ||
{"index": {}} | ||
{"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}} | ||
{"index": {}} | ||
{"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}} | ||
{"index": {}} | ||
{"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}} | ||
{"index": {}} | ||
{"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}} | ||
{"index": {}} | ||
{"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}} | ||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
Content-Type: application/json | ||
ml.start_trained_model_deployment: | ||
model_id: text_expansion_model | ||
wait_for: started | ||
|
||
|
||
--- | ||
teardown: | ||
- skip: | ||
features: headers | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
indices.delete: | ||
index: index-with-sparse-vector | ||
ignore: 404 | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
indices.delete: | ||
index: unrelated | ||
ignore: 404 | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
ml.stop_trained_model_deployment: | ||
model_id: text_expansion_model | ||
ignore: 404 | ||
|
||
- do: | ||
headers: | ||
Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" | ||
ml.delete_trained_model: | ||
model_id: "text_expansion_model" | ||
ignore: 404 | ||
|
||
--- | ||
"Test text expansion search": | ||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
- match: { hits.total.value: 4 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
|
||
--- | ||
"Test text expansion search with pruning config": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "pruning introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 4 | ||
tokens_weight_threshold: 0.4 | ||
- match: { hits.total.value: 4 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
|
||
--- | ||
"Test named, boosted text expansion search with pruning config": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "pruning introduced in 8.13.0" | ||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 4 | ||
tokens_weight_threshold: 0.4 | ||
- match: { hits.total.value: 4 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
- match: { hits.hits.0._score: 3.0 } | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 4 | ||
tokens_weight_threshold: 0.4 | ||
_name: i-like-naming-my-queries | ||
boost: 100.0 | ||
- match: { hits.total.value: 4 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
- match: { hits.hits.0.matched_queries: ["i-like-naming-my-queries"] } | ||
- match: { hits.hits.0._score: 300.0 } | ||
|
||
--- | ||
"Test text expansion search with default pruning config": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "pruning introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
pruning_config: {} | ||
- match: { hits.total.value: 4 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
|
||
--- | ||
"Test text expansion search with weighted tokens rescoring only pruned tokens": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "pruning introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
text_expansion: | ||
ml.tokens: | ||
model_id: text_expansion_model | ||
model_text: "octopus comforter smells" | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 4 | ||
tokens_weight_threshold: 0.4 | ||
only_score_pruned_tokens: true | ||
- match: { hits.total.value: 0 } | ||
|
||
--- | ||
"Test weighted tokens search": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "weighted token search introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
weighted_tokens: | ||
ml.tokens: | ||
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 1 | ||
tokens_weight_threshold: 0.4 | ||
only_score_pruned_tokens: false | ||
- match: { hits.total.value: 5 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
|
||
--- | ||
"Test weighted tokens search with default pruning config": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "weighted token search introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
weighted_tokens: | ||
ml.tokens: | ||
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] | ||
pruning_config: {} | ||
- match: { hits.total.value: 5 } | ||
- match: { hits.hits.0._source.source_text: "the octopus comforter smells" } | ||
|
||
--- | ||
"Test weighted tokens search only scoring pruned tokens": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "weighted token search introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
weighted_tokens: | ||
ml.tokens: | ||
tokens: [{"the": 1.0}, {"comforter":1.0}, {"smells":1.0}, {"bad": 1.0}] | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 4 | ||
tokens_weight_threshold: 0.4 | ||
only_score_pruned_tokens: true | ||
- match: { hits.total.value: 0 } | ||
|
||
--- | ||
"Test weighted tokens search that prunes tokens based on frequency": | ||
- skip: | ||
version: " - 8.12.99" | ||
reason: "weighted token search introduced in 8.13.0" | ||
|
||
- do: | ||
search: | ||
index: index-with-sparse-vector | ||
body: | ||
query: | ||
weighted_tokens: | ||
ml.tokens: | ||
tokens: [{"the": 1.0}, {"octopus":1.0}, {"comforter":1.0}, {"is": 1.0}, {"the": 1.0}, {"best": 1.0}, {"of": 1.0}, {"the": 1.0}, {"bunch": 1.0}] | ||
pruning_config: | ||
tokens_freq_ratio_threshold: 3 | ||
tokens_weight_threshold: 0.4 | ||
only_score_pruned_tokens: true | ||
- match: { hits.total.value: 0 } |
Oops, something went wrong.