You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the bug
Using stop words and synonyms together gives incorrect search results. This appears to be due to the positioning of the words after stop words filtering occurs. The workaround seems to be to use slop to make up for the word positions, which isn't ideal.
PUT stoptest/_doc/1
{
"title": "cdf",
"body": "An award of the Medal or a clasp may be made by the Governor-General on the recommendation of the Chief of the Defence Force or a delegate"
}
PUT stoptest/_doc/2
{
"title": "cdf",
"body": "An award of the Medal or a clasp may be made by the Governor-General on the recommendation of the cdf or a delegate"
}
//phrase search returns the document with the synonym but not the actual phrase
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "Chief of the Defence Force"
}
}
},
"highlight": {
"fields": {
"body": {}
}
}
}
//phrase search for synonym returns the doc with the phrase but not the synonym, again the wrong way around
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "cdf"
}
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
//adding a slop to the phrase search works
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "Chief of the Defence Force",
"slop": 1
}
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
/adding a slop the the synonym search does not work
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "cdf",
"slop": 1
}
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
//search on a field without the synonym works as expected
GET stoptest/_search
{
"query": {
"match_phrase": {
"body.nosyn": {
"query": "Chief of the Defence Force"
}
Describe the bug
Using stop words and synonyms together gives incorrect search results. This appears to be due to the positioning of the words after stop words filtering occurs. The workaround seems to be to use slop to make up for the word positions, which isn't ideal.
Preconditions
None
To Reproduce
Write the exact actions one should perform in order to reproduce the bug.
Steps to reproduce the behavior:
PUT stoptest
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"analysis": {
"analyzer": {
"drn_synonym_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"drn_synonyms",
"stop",
"kstem"
],
"char_filter": [
"underscore"
],
"tokenizer": "standard"
},
"drn_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"stop",
"kstem"
],
"tokenizer": "standard"
}
},
"filter": {
"drn_synonyms": {
"expand": "true",
"type": "synonym",
"lenient": "true",
"synonyms": [
"cdf,chief_of_the_defence_force"
]
}
},
"char_filter": {
"underscore": {
"type": "mapping",
"mappings": [
"""_ => \u0020 """
]
}
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "drn_analyzer",
"search_analyzer": "drn_synonym_analyzer"
},
"body": {
"type": "text",
"analyzer": "drn_analyzer",
"search_analyzer": "drn_synonym_analyzer",
"fields": {
"nosyn": {
"type": "text",
"analyzer": "drn_analyzer",
"search_analyzer": "drn_analyzer"
}
}
}
}
}
}
PUT stoptest/_doc/1
{
"title": "cdf",
"body": "An award of the Medal or a clasp may be made by the Governor-General on the recommendation of the Chief of the Defence Force or a delegate"
}
PUT stoptest/_doc/2
{
"title": "cdf",
"body": "An award of the Medal or a clasp may be made by the Governor-General on the recommendation of the cdf or a delegate"
}
//phrase search returns the document with the synonym but not the actual phrase
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "Chief of the Defence Force"
}
}
},
"highlight": {
"fields": {
"body": {}
}
}
}
//phrase search for synonym returns the doc with the phrase but not the synonym, again the wrong way around
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "cdf"
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
//adding a slop to the phrase search works
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "Chief of the Defence Force",
"slop": 1
}
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
/adding a slop the the synonym search does not work
GET stoptest/_search
{
"query": {
"match_phrase": {
"body": {
"query": "cdf",
"slop": 1
}
}
}, "highlight": {
"fields": {
"body": {}
}
}
}
//search on a field without the synonym works as expected
GET stoptest/_search
{
"query": {
"match_phrase": {
"body.nosyn": {
"query": "Chief of the Defence Force"
}
}, "highlight": {
"fields": {
"body.nosyn": {}
}
}
}
GET stoptest/_search
{
"query": {
"match_phrase": {
"body.nosyn": {
"query": "cdf"
}
}, "highlight": {
"fields": {
"body.nosyn": {}
}
}
}
//hybrid solution for phrase
GET stoptest/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "Chief of the Defence Force",
"fields": [
"body",
"body.nosyn"
],
"type": "phrase"
}
}
],
"should": [
{
"multi_match": {
"query": "Chief of the Defence Force",
"fields": [
"body",
"body.nosyn"
],
"type": "phrase",
"slop": 1
}
}
]
}
},
"highlight": {
"fields": {
"body": {}
}
}
}
//hybrid solution for single terms
GET stoptest/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "cdf",
"fields": [
"body",
"body.nosyn"
],
"type": "most_fields"
}
}
],
"should": [
{
"multi_match": {
"query": "cdf",
"fields": [
"body",
"body.nosyn"
],
"type": "phrase",
"slop": 1
}
}
]
}
},
"highlight": {
"fields": {
"body": {}
}
}
}
Expected behavior
The text was updated successfully, but these errors were encountered: