Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PIT and search_after do not return all "pages" of a query when nested type field is in mapping #100260

Closed
astefan opened this issue Oct 4, 2023 · 3 comments
Labels
>bug :Search/Search Search-related issues that do not fall into other categories Team:Search Meta label for search team

Comments

@astefan
Copy link
Contributor

astefan commented Oct 4, 2023

Elasticsearch Version

main

Installed Plugins

No response

Java Version

bundled

OS Version

W

Problem Description

This problem was discovered while investigating an ES SQL CI failure here. I have tested this behavior outside ES SQL and I narrowed it down to a more simpler scenario.

Steps to Reproduce

Create a test_emp index with the following mapping:

{
    "mappings": {
        "properties": {
            "birth_date": {
                "type": "date"
            },
            "dep": {
                "type": "nested",
                "properties": {
                    "dep_id": {
                        "type": "keyword"
                    }
                }
            },
            "emp_no": {
                "type": "integer"
            }
        }
    }
}

Add the following data set to it:

POST /test_emp/_bulk

{"index":{"_id":1}}
{"birth_date": "1964-06-02T00:00:00Z","emp_no": "10002"}
{"index":{"_id":2}}
{"birth_date": "1959-12-25T00:00:00Z","emp_no": "10078"}
{"index":{"_id":3}}
{"birth_date": "1959-07-23T00:00:00Z","emp_no": "10083"}
{"index":{"_id":4}}
{"birth_date": "1959-07-23T00:00:00Z","emp_no": "10087"}
{"index":{"_id":5}}
{"emp_no": "10040"}
{"index":{"_id":6}}
{"emp_no": "10041"}
{"index":{"_id":7}}
{"emp_no": "10042"}
{"index":{"_id":8}}
{"emp_no": "10043"}
{"index":{"_id":9}}
{"emp_no": "10044"}
{"index":{"_id":10}}
{"emp_no": "10045"}
{"index":{"_id":11}}
{"emp_no": "10046"}
{"index":{"_id":12}}
{"emp_no": "10047"}
{"index":{"_id":13}}
{"emp_no": "10048"}
{"index":{"_id":14}}
{"emp_no": "10049"}
{"index":{"_id":15}}
{"birth_date": "2001-07-23T00:00:00Z","emp_no": "10050"}
{"index":{"_id":16}}
{"birth_date": "2002-07-23T00:00:00Z","emp_no": "10051"}
{"index":{"_id":17}}
{"birth_date": "2003-07-23T00:00:00Z","emp_no": "10052"}

Open a PIT against this index: POST /test_emp/_pit?keep_alive=5m and use the provided pit id in the following query. Notice the size is set to 5.

POST /_search

{
    "pit": {
	    "id":  "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAHxY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
	    "keep_alive": "5m"  
    },
    "size": 5,
    "query": {
        "bool": {
            "should": [
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-07-23T00:00:00.000Z",
                            "lte": "1959-07-23T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-12-25T00:00:00.000Z",
                            "lte": "1959-12-25T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "terms": {
                        "birth_date": [
                            "1964-06-02T00:00:00.000Z"
                        ],
                        "boost": 1.0
                    }
                },
                {
                    "bool": {
                        "must_not": [
                            {
                                "exists": {
                                    "field": "birth_date",
                                    "boost": 1.0
                                }
                            }
                        ],
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "_source": false,
    "fields": [
        {
            "field": "birth_date",
            "format": "strict_date_optional_time_nanos"
        }
    ],
    "sort": [
        {
            "birth_date": {
                "order": "asc",
                "missing": "_last",
                "unmapped_type": "date_nanos"
            }
        }
    ],
    "track_total_hits": -1
}

This is the first "page" of results:

{
    "pit_id": "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAHxY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "max_score": null,
        "hits": [
            {
                "_index": "test_emp",
                "_id": "3",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-07-23T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -329616000000,
                    2
                ]
            },
            {
                "_index": "test_emp",
                "_id": "4",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-07-23T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -329616000000,
                    3
                ]
            },
            {
                "_index": "test_emp",
                "_id": "2",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-12-25T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -316224000000,
                    1
                ]
            },
            {
                "_index": "test_emp",
                "_id": "1",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1964-06-02T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -176169600000,
                    0
                ]
            },
            {
                "_index": "test_emp",
                "_id": "5",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    4
                ]
            }
        ]
    }
}

Use the last document sort content in the next query as search_after:

{
    "pit": {
	    "id":  "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAHxY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
	    "keep_alive": "5m"  
    },
    "size": 5,
    "query": {
        "bool": {
            "should": [
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-07-23T00:00:00.000Z",
                            "lte": "1959-07-23T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-12-25T00:00:00.000Z",
                            "lte": "1959-12-25T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "terms": {
                        "birth_date": [
                            "1964-06-02T00:00:00.000Z"
                        ],
                        "boost": 1.0
                    }
                },
                {
                    "bool": {
                        "must_not": [
                            {
                                "exists": {
                                    "field": "birth_date",
                                    "boost": 1.0
                                }
                            }
                        ],
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "_source": false,
    "fields": [
        {
            "field": "birth_date",
            "format": "strict_date_optional_time_nanos"
        }
    ],
    "sort": [
        {
            "birth_date": {
                "order": "asc",
                "missing": "_last",
                "unmapped_type": "date_nanos"
            }
        }
    ],
    "track_total_hits": -1,
    "search_after":[
                    9223372036854775807,
                    4
                ]
}

The results now return one document only:

{
    "pit_id": "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAHxY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "max_score": null,
        "hits": [
            {
                "_index": "test_emp",
                "_id": "6",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    5
                ]
            }
        ]
    }
}

But, if I open a new PIT and change the original query to use "size": 15:

{
    "pit": {
	    "id":  "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAIBY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
	    "keep_alive": "5m"  
    },
    "size": 15,
    "query": {
        "bool": {
            "should": [
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-07-23T00:00:00.000Z",
                            "lte": "1959-07-23T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-12-25T00:00:00.000Z",
                            "lte": "1959-12-25T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "terms": {
                        "birth_date": [
                            "1964-06-02T00:00:00.000Z"
                        ],
                        "boost": 1.0
                    }
                },
                {
                    "bool": {
                        "must_not": [
                            {
                                "exists": {
                                    "field": "birth_date",
                                    "boost": 1.0
                                }
                            }
                        ],
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "_source": false,
    "fields": [
        {
            "field": "birth_date",
            "format": "strict_date_optional_time_nanos"
        }
    ],
    "sort": [
        {
            "birth_date": {
                "order": "asc",
                "missing": "_last",
                "unmapped_type": "date_nanos"
            }
        }
    ],
    "track_total_hits": -1
}

I get back 14 documents:

{
    "pit_id": "sLSHBAEIdGVzdF9lbXAWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAWSmFNOTRoYVhUYks5bTEyNG05SnFDQQAAAAAAAAAAIBY0eWg3LUxLcVNkdUtfXzJEb3VyaGVnAAEWSWpfSndKUV9TOUtZVldTZGlhQkFpUQAA",
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "max_score": null,
        "hits": [
            {
                "_index": "test_emp",
                "_id": "3",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-07-23T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -329616000000,
                    2
                ]
            },
            {
                "_index": "test_emp",
                "_id": "4",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-07-23T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -329616000000,
                    3
                ]
            },
            {
                "_index": "test_emp",
                "_id": "2",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1959-12-25T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -316224000000,
                    1
                ]
            },
            {
                "_index": "test_emp",
                "_id": "1",
                "_score": null,
                "fields": {
                    "birth_date": [
                        "1964-06-02T00:00:00.000Z"
                    ]
                },
                "sort": [
                    -176169600000,
                    0
                ]
            },
            {
                "_index": "test_emp",
                "_id": "5",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    4
                ]
            },
            {
                "_index": "test_emp",
                "_id": "6",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    5
                ]
            },
            {
                "_index": "test_emp",
                "_id": "7",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    6
                ]
            },
            {
                "_index": "test_emp",
                "_id": "8",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    7
                ]
            },
            {
                "_index": "test_emp",
                "_id": "9",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    8
                ]
            },
            {
                "_index": "test_emp",
                "_id": "10",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    9
                ]
            },
            {
                "_index": "test_emp",
                "_id": "11",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    10
                ]
            },
            {
                "_index": "test_emp",
                "_id": "12",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    11
                ]
            },
            {
                "_index": "test_emp",
                "_id": "13",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    12
                ]
            },
            {
                "_index": "test_emp",
                "_id": "14",
                "_score": null,
                "sort": [
                    9223372036854775807,
                    13
                ]
            }
        ]
    }
}

This shows that the original query using "size": 5 terminates early.
Testing different scenarios I've came to the conclusion that the simple presence of dep field as nested in the mapping (there are no documents having values for dep in my tests) makes the query to return incorrect results. If the tests are performed without this field in the mapping, all 3 pages of results are returned correctly (5 documents 1st page, 5 documents 2nd page, 4 documents 3rd and last page).

Logs (if relevant)

No response

@astefan astefan added >bug :Search/Search Search-related issues that do not fall into other categories needs:triage Requires assignment of a team area label and removed needs:triage Requires assignment of a team area label labels Oct 4, 2023
@elasticsearchmachine
Copy link
Collaborator

Pinging @elastic/es-search (Team:Search)

@askneller
Copy link

I believe this is no longer an issue. I ran the steps to reproduce and the second query returned 5 results instead of one, a third query returned 4 results.

I ran the query specified by this step:

Use the last document sort content in the next query as search_after:

and 5 results were returned instead of one:

{
  "pit_id" : "uMyMBAEHdGVzdGlkeBY0TkVGanZrLVI5Q204SnJpS0lQVUJnABZnNEQ2QUpEYlMxbWhEX1JpTXFZeUV3AAAAAAAAAAACFl9mMHVITXhOUkk2LUZod3d0SkNLOEEAARY0TkVGanZrLVI5Q204SnJpS0lQVUJnAAA=",
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [
      {
        "_index" : "testidx",
        "_id" : "6",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          5
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "7",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          6
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "8",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          7
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "9",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          8
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "10",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          9
        ]
      }
    ]
  }
}

I then ran the following query using the sort from the last result in search_after to get the final page:

{
    "pit": {
	    "id":  "uMyMBAEHdGVzdGlkeBY0TkVGanZrLVI5Q204SnJpS0lQVUJnABZnNEQ2QUpEYlMxbWhEX1JpTXFZeUV3AAAAAAAAAAACFl9mMHVITXhOUkk2LUZod3d0SkNLOEEAARY0TkVGanZrLVI5Q204SnJpS0lQVUJnAAA=",
	    "keep_alive": "5m"  
    },
    "size": 5,
    "query": {
        "bool": {
            "should": [
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-07-23T00:00:00.000Z",
                            "lte": "1959-07-23T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "range": {
                        "birth_date": {
                            "gte": "1959-12-25T00:00:00.000Z",
                            "lte": "1959-12-25T00:00:00.000Z",
                            "time_zone": "Z",
                            "format": "strict_date_optional_time_nanos",
                            "boost": 1.0
                        }
                    }
                },
                {
                    "terms": {
                        "birth_date": [
                            "1964-06-02T00:00:00.000Z"
                        ],
                        "boost": 1.0
                    }
                },
                {
                    "bool": {
                        "must_not": [
                            {
                                "exists": {
                                    "field": "birth_date",
                                    "boost": 1.0
                                }
                            }
                        ],
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "_source": false,
    "fields": [
        {
            "field": "birth_date",
            "format": "strict_date_optional_time_nanos"
        }
    ],
    "sort": [
        {
            "birth_date": {
                "order": "asc",
                "missing": "_last",
                "unmapped_type": "date_nanos"
            }
        }
    ],
    "track_total_hits": -1,
    "search_after":[
	9223372036854775807,
	9
    ]
}

This gave me the final expected 4 results:

{
  "pit_id" : "uMyMBAEHdGVzdGlkeBY0TkVGanZrLVI5Q204SnJpS0lQVUJnABZnNEQ2QUpEYlMxbWhEX1JpTXFZeUV3AAAAAAAAAAACFl9mMHVITXhOUkk2LUZod3d0SkNLOEEAARY0TkVGanZrLVI5Q204SnJpS0lQVUJnAAA=",
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "max_score" : null,
    "hits" : [
      {
        "_index" : "testidx",
        "_id" : "11",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          10
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "12",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          11
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "13",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          12
        ]
      },
      {
        "_index" : "testidx",
        "_id" : "14",
        "_score" : null,
        "sort" : [
          9223372036854775807,
          13
        ]
      }
    ]
  }
}

So this issue appears to have been resolved.

@benwtrent
Copy link
Member

This indeed works just fine in 8.14+ I am unsure when it was fixed. but it is.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
>bug :Search/Search Search-related issues that do not fall into other categories Team:Search Meta label for search team
Projects
None yet
Development

No branches or pull requests

4 participants