Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Make page_count self-truncating when we detect the last page of a que…
Browse files Browse the repository at this point in the history
…ry (#1032)

* Make page_count self-truncating when we detect the last page of a query based on page size and result count

* Update examples to have correct page count

* Fix unnecessary page addition bug

* Fix incorrect page counting

* Prevent page count from ever being 0

* Do not use impossible "page" value in unit tests
  • Loading branch information
sarayourfriend authored Jan 9, 2023
1 parent d78abb9 commit aa7ae51
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 31 deletions.
28 changes: 18 additions & 10 deletions api/catalog/api/controllers/search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def search(
)

result_count, page_count = _get_result_and_page_count(
search_response, results, page_size
search_response, results, page_size, page
)
return results or [], page_count, result_count

Expand Down Expand Up @@ -447,7 +447,7 @@ def related_media(uuid, index, request, filter_dead):
s, start, end, page_size, response, request, filter_dead
)

result_count, _ = _get_result_and_page_count(response, results, page_size)
result_count, _ = _get_result_and_page_count(response, results, page_size, page)

return results or [], result_count

Expand Down Expand Up @@ -496,7 +496,7 @@ def get_sources(index):


def _get_result_and_page_count(
response_obj: Response, results: list[Hit] | None, page_size: int
response_obj: Response, results: list[Hit] | None, page_size: int, page: int
) -> tuple[int, int]:
"""
Elasticsearch does not allow deep pagination of ranked queries.
Expand All @@ -506,14 +506,22 @@ def _get_result_and_page_count(
:param results: The list of filtered result Hits.
:return: Result and page count.
"""
if results is None:
return 0, 1
if not results:
return 0, 0

result_count = response_obj.hits.total.value
page_count = int(result_count / page_size)
if page_count % page_size != 0:
page_count += 1
if len(results) < page_size and page_count == 0:
result_count = len(results)
page_count = ceil(result_count / page_size)

if len(results) < page_size:
if page_count == 1:
result_count = len(results)

# If we have fewer results than the requested page size and are
# not on the first page that means that we've reached the end of
# the query and can set the page_count to the currently requested
# page. This means that the `page_count` can change during
# pagination for the same query, but it's the only way to
# communicate under the current v1 API that a query has been exhausted.
page_count = page

return result_count, page_count
4 changes: 2 additions & 2 deletions api/catalog/api/examples/audio_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
audio_search_200_example = {
"application/json": {
"result_count": 1,
"page_count": 0,
"page_count": 1,
"page_size": 20,
"page": 1,
"results": [
Expand Down Expand Up @@ -108,7 +108,7 @@
audio_related_200_example = {
"application/json": {
"result_count": 10000,
"page_count": 0,
"page_count": 1,
"results": [
{
"title": "File:Mozart - Eine kleine Nachtmusik - 1. Allegro.ogg",
Expand Down
4 changes: 2 additions & 2 deletions api/catalog/api/examples/image_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
image_search_200_example = {
"application/json": {
"result_count": 1,
"page_count": 0,
"page_count": 1,
"page_size": 20,
"page": 1,
"results": [base_image | {"fields_matched": ["title"]}],
Expand Down Expand Up @@ -109,7 +109,7 @@
image_related_200_example = {
"application/json": {
"result_count": 10000,
"page_count": 0,
"page_count": 1,
"results": [
{
"title": "exam tactics",
Expand Down
48 changes: 31 additions & 17 deletions api/test/unit/controllers/test_search_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,36 +13,49 @@


@pytest.mark.parametrize(
"total_hits, real_result_count, page_size, expected",
"total_hits, real_result_count, page_size, page, expected",
[
# No results
(0, 0, 10, (0, 0)),
(0, 0, 10, 1, (0, 0)),
# Setting page size to 0 raises an exception
# Not possible in the actual API
pytest.param(
0, 0, 0, (0, 0), marks=pytest.mark.raises(exception=ZeroDivisionError)
5, 5, 0, 1, (0, 0), marks=pytest.mark.raises(exception=ZeroDivisionError)
),
# Fewer results than page size leads to max of result total
(5, 5, 10, (5, 0)),
# Even if no real results exist, total result count and page count are returned
# (seems like an impossible case IRL)
(100, 0, 10, (100, 10)),
(5, 5, 10, 1, (5, 1)),
# If no real results exist, even if there are hits, fall back to 0, 0
# (This case represents where all the links for a result are dead, for example)
(100, 0, 10, 1, (0, 0)),
# If there are real results and ES reports no hits, nothing is expected
# (seems like an impossible case IRL)
(0, 100, 10, (0, 0)),
(0, 100, 10, 1, (0, 0)),
# Evenly divisible number of pages
(25, 5, 5, (25, 5)),
(25, 5, 5, 1, (25, 5)),
# An edge case that previously did not behave as expected with evenly divisble numbers of pages
(20, 5, 5, 1, (20, 4)),
# Unevenly divisible number of pages
(21, 5, 5, (21, 5)),
# My assumption would be that this yields (20, 4), but the code is such that
# when the "natural" page count can't be cleanly divisible by the page size,
# We increment it plus one. Why would that be the case? 20 results, with 5
# results per-page, would seem to result in 4 pages total not 5 🤷‍♀️
(20, 5, 5, (20, 5)),
(21, 5, 5, 1, (21, 5)),
# Fewer hits than page size, but result list somehow differs, use that for count
(48, 20, 50, (20, 0)),
(48, 20, 50, 1, (20, 1)),
# Despite us applying a pagination limit, that is applied further up in the API, not at this low a level
(2000, 20, 20, 2, (2000, 100)),
# Page count is reduced to the current page number even though 2000 / 20 is much larger than 5
# because despite that we have result count < page size which indicates we've exhausted the query
(2000, 5, 20, 5, (2000, 5)),
# First page, we got all the results and there are no further possible pages with the current page count
(10, 10, 20, 1, (10, 1)),
# This is here to test a case that used to erroneously produce (10, 2) by adding 1
# to the page count when it wasn't necessary to do so.
(10, 10, 10, 1, (10, 1)),
# This is technically impossible because we truncate results to the page size before entering this method
# I think the handling of this case is a likely source for the bug in the previous case
(10, 10, 9, 1, (10, 2)),
],
)
def test_get_result_and_page_count(total_hits, real_result_count, page_size, expected):
def test_get_result_and_page_count(
total_hits, real_result_count, page_size, page, expected
):
response_obj = mock.MagicMock()
response_obj.hits.total.value = total_hits
results = [mock.MagicMock() for _ in range(real_result_count)]
Expand All @@ -51,6 +64,7 @@ def test_get_result_and_page_count(total_hits, real_result_count, page_size, exp
response_obj,
results,
page_size,
page,
)
assert actual == expected

Expand Down

0 comments on commit aa7ae51

Please sign in to comment.