From f2c621f2a37a94a7f3734341146aed12b49fce19 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Fri, 5 Aug 2022 13:05:34 -0700 Subject: [PATCH 1/3] Limit total page count to 20 --- api/catalog/api/controllers/search_controller.py | 4 ++-- api/catalog/api/utils/pagination.py | 5 +++++ api/test/dead_link_filter_test.py | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/api/catalog/api/controllers/search_controller.py b/api/catalog/api/controllers/search_controller.py index 6de146eea..9e34f6ae4 100644 --- a/api/catalog/api/controllers/search_controller.py +++ b/api/catalog/api/controllers/search_controller.py @@ -18,6 +18,7 @@ import catalog.api.models as models from catalog.api.utils.dead_link_mask import get_query_hash, get_query_mask +from catalog.api.utils.pagination import MAX_TOTAL_PAGE_COUNT from catalog.api.utils.validate_images import validate_images @@ -427,8 +428,7 @@ def _get_result_and_page_count( natural_page_count = int(result_count / page_size) if natural_page_count % page_size != 0: natural_page_count += 1 - last_allowed_page = int((5000 + page_size / 2) / page_size) - page_count = min(natural_page_count, last_allowed_page) + page_count = min(natural_page_count, MAX_TOTAL_PAGE_COUNT) if len(results) < page_size and page_count == 0: result_count = len(results) diff --git a/api/catalog/api/utils/pagination.py b/api/catalog/api/utils/pagination.py index 484e455fe..271317a82 100644 --- a/api/catalog/api/utils/pagination.py +++ b/api/catalog/api/utils/pagination.py @@ -4,6 +4,9 @@ from catalog.api.utils.exceptions import get_api_exception +MAX_TOTAL_PAGE_COUNT = 20 + + class StandardPagination(PageNumberPagination): page_size_query_param = "page_size" page_query_param = "page" @@ -42,6 +45,8 @@ def page(self, value): value = int(value) # convert str params to int if value <= 0: raise get_api_exception("Page must be greater than 0.", 400) + elif value > 20: + raise get_api_exception("Searches are limited to 20 pages.", 400) self._page = value def get_paginated_response(self, data): diff --git a/api/test/dead_link_filter_test.py b/api/test/dead_link_filter_test.py index 1bd47c24c..563a8c413 100644 --- a/api/test/dead_link_filter_test.py +++ b/api/test/dead_link_filter_test.py @@ -4,6 +4,8 @@ import pytest import requests +from catalog.api.utils.pagination import MAX_TOTAL_PAGE_COUNT + def _patch_redis(): def redis_mget(keys, *_, **__): @@ -115,7 +117,7 @@ def test_page_consistency_removing_dead_links(search_without_dead_links): Test the results returned in consecutive pages are never repeated when filtering out dead links. """ - total_pages = 30 + total_pages = MAX_TOTAL_PAGE_COUNT page_size = 5 page_results = [] From 47e4fd41a3b8a67118bf9124252ad97c666d3e04 Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Fri, 5 Aug 2022 13:49:12 -0700 Subject: [PATCH 2/3] Add pytest-raises --- api/Pipfile | 1 + api/Pipfile.lock | 110 +++++++++++++++++++++++++++++------------------ 2 files changed, 68 insertions(+), 43 deletions(-) diff --git a/api/Pipfile b/api/Pipfile index e5c1ad4bc..6cd74f93a 100644 --- a/api/Pipfile +++ b/api/Pipfile @@ -7,6 +7,7 @@ verify_ssl = true ipython = "*" pycodestyle = "*" pytest-django = ">=3.5" +pytest-raises = "*" remote-pdb = "*" pre-commit = "*" locust = "*" diff --git a/api/Pipfile.lock b/api/Pipfile.lock index f2839671a..ff20c1811 100644 --- a/api/Pipfile.lock +++ b/api/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "2832f6faa6ba508df60afe298a8c82423b73d1a35cbd529035a90f7032757b34" + "sha256": "7741ea25abb5880b918232adf5bcb3c4b65b23a127d6647c0b25009da994e5c8" }, "pipfile-spec": 6, "requires": { @@ -42,19 +42,19 @@ }, "boto3": { "hashes": [ - "sha256:aec404d06690a0ca806592efc6bbe30a9ac88fa2ad73b6d907f7794a8b3b1459", - "sha256:df3d6ef02304bd7c3711090936c092d5db735dda109decac1e236c3ef7fdb7af" + "sha256:44026e44549148dbc5b261ead5f6b339e785680c350ef621bf85f7e2fca05b49", + "sha256:b2d9d55f123a9a91eea2fd8e379d90abf37634420fbb45c22d67e10b324ec71b" ], "index": "pypi", - "version": "==1.24.42" + "version": "==1.24.46" }, "botocore": { "hashes": [ - "sha256:38a180a6666c5a9b069a75ec3cf374ff2a64c3e90c9f24a916858bcdeb04456d", - "sha256:f8e6c2f69a9d577fb9c69e4e74f49f4315a48decee0e7dc88b6e470772110884" + "sha256:747b7e94aef41498f063fc0be79c5af102d940beea713965179e1ead89c7e9ec", + "sha256:f66d8305d1f59d83334df9b11b6512bb1e14698ec4d5d6d42f833f39f3304ca7" ], "markers": "python_version >= '3.7'", - "version": "==1.27.42" + "version": "==1.27.46" }, "certifi": { "hashes": [ @@ -271,11 +271,11 @@ }, "django-storages": { "hashes": [ - "sha256:204a99f218b747c46edbfeeb1310d357f83f90fa6a6024d8d0a3f422570cee84", - "sha256:a475edb2f0f04c4f7e548919a751ecd50117270833956ed5bd585c0575d2a5e7" + "sha256:3ef9dd5a2ac7221b571aa32f72a94a542b6b03fbd393e1c18e64d2a4006bfddf", + "sha256:d4fed98bbfb5347096ddfac2a6665d5133c788b9ec32c156ec5f7fb02ae57335" ], "index": "pypi", - "version": "==1.12.3" + "version": "==1.13" }, "django-tqdm": { "hashes": [ @@ -832,19 +832,19 @@ }, "sentry-sdk": { "hashes": [ - "sha256:60b13757d6344a94bf0ccb3c0a006c4de77daab09871b30fbbd05d5ec24e54fb", - "sha256:f185c53496d79b280fe5d9d21e6572aee1ab802d3354eb12314d216cfbaa8d30" + "sha256:011155f11ab828a977fe8a60c5b534c30f49d70c9938362536d206bb230bb519", + "sha256:9921e76e29135aa1fa65fe231aaaef47355f85470c5ec59ce32715e22e6340b6" ], "index": "pypi", - "version": "==1.9.0" + "version": "==1.9.2" }, "setuptools": { "hashes": [ - "sha256:273b6847ae61f7829c1affcdd9a32f67aa65233be508f4fbaab866c5faa4e408", - "sha256:d5340d16943a0f67057329db59b564e938bb3736c6e50ae16ea84d5e5d9ba6d0" + "sha256:7c7854ee1429a240090297628dc9f75b35318d193537968e2dc14010ee2f5bca", + "sha256:dc2662692f47d99cb8ae15a784529adeed535bcd7c277fee0beccf961522baf6" ], "markers": "python_version >= '3.7'", - "version": "==63.3.0" + "version": "==63.4.1" }, "six": { "hashes": [ @@ -890,7 +890,7 @@ "sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc", "sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", + "markers": "python_version >= '3.6'", "version": "==1.26.11" }, "webob": { @@ -1269,11 +1269,11 @@ }, "fakeredis": { "hashes": [ - "sha256:4a0f8fe0d5c18147864db50ae2e86f667420ea06653bec08b3a5fccfd3fbde6f", - "sha256:ca516f86181f85615cd8210854b43acbe7b1f37ed8a082c5557749c73f2f0dd3" + "sha256:60639946e3bb1274c30416f539f01f9d73b4ea68c244c1442f5524e45f51e882", + "sha256:868467ff399520fc77e37ff002c60d1b2a1674742982e27338adaeebcc537648" ], "index": "pypi", - "version": "==1.8.1" + "version": "==1.9.0" }, "filelock": { "hashes": [ @@ -1285,11 +1285,11 @@ }, "flask": { "hashes": [ - "sha256:15972e5017df0575c3d6c090ba168b6db90259e620ac8d7ea813a396bad5b6cb", - "sha256:9013281a7402ad527f8fd56375164f3aa021ecfaff89bfe3825346c24f87e04c" + "sha256:3c604c48c3d5b4c63e72134044c0b4fe90ff01ef65280b9fe2d38c8860d99fe5", + "sha256:9c2b81b9b1edcc835af72d600f1955e713a065e7cb41d7e51ee762b449d9c65d" ], "markers": "python_version >= '3.7'", - "version": "==2.1.3" + "version": "==2.2.1" }, "flask-basicauth": { "hashes": [ @@ -1486,11 +1486,11 @@ }, "identify": { "hashes": [ - "sha256:a3d4c096b384d50d5e6dc5bc8b9bc44f1f61cefebd750a7b3e9f939b53fb214d", - "sha256:feaa9db2dc0ce333b453ce171c0cf1247bbfde2c55fc6bb785022d411a1b78b5" + "sha256:25851c8c1370effb22aaa3c987b30449e9ff0cece408f810ae6ce408fdd20893", + "sha256:887e7b91a1be152b0d46bbf072130235a8117392b9f1828446079a816a05ef44" ], "markers": "python_version >= '3.7'", - "version": "==2.5.2" + "version": "==2.5.3" }, "idna": { "hashes": [ @@ -1508,6 +1508,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.4.1" }, + "importlib-metadata": { + "hashes": [ + "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670", + "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23" + ], + "markers": "python_version < '3.10'", + "version": "==4.12.0" + }, "iniconfig": { "hashes": [ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", @@ -1555,11 +1563,11 @@ }, "locust": { "hashes": [ - "sha256:3fab41217ca6677345bfe3fb222e646bda408f072c302b77164ccd77daf2a35b", - "sha256:4c6844f50f7b50b5328f310431375ad62dfb1c786a83641357838aa02e1f2382" + "sha256:d556e70898240deaa08097a7c3eee9456897150246ceec8367b9943580c0a7c6", + "sha256:e5571de2510bb6293daee6245b3e01a614dce5935b1d5283f709ce5b49125492" ], "index": "pypi", - "version": "==2.10.1" + "version": "==2.10.2" }, "markdown-it-py": { "hashes": [ @@ -1761,11 +1769,11 @@ }, "pre-commit": { "hashes": [ - "sha256:10c62741aa5704faea2ad69cb550ca78082efe5697d6f04e5710c3c229afdd10", - "sha256:4233a1e38621c87d9dda9808c6606d7e7ba0e087cd56d3fe03202a01d2919615" + "sha256:51a5ba7c480ae8072ecdb6933df22d2f812dc897d5fe848778116129a681aac7", + "sha256:a978dac7bc9ec0bcee55c18a277d553b0f419d259dadb4b9418ff2d00eb43959" ], "index": "pypi", - "version": "==2.19.0" + "version": "==2.20.0" }, "prompt-toolkit": { "hashes": [ @@ -1837,11 +1845,11 @@ }, "pycodestyle": { "hashes": [ - "sha256:289cdc0969d589d90752582bef6dff57c5fbc6949ee8b013ad6d6449a8ae9437", - "sha256:beaba44501f89d785be791c9462553f06958a221d166c64e1f107320f839acc2" + "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785", + "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b" ], "index": "pypi", - "version": "==2.9.0" + "version": "==2.9.1" }, "pygments": { "hashes": [ @@ -1875,6 +1883,14 @@ "index": "pypi", "version": "==4.5.2" }, + "pytest-raises": { + "hashes": [ + "sha256:33a1351f2debb9f74ca6ef70e374899f608a1217bf13ca4a0767f37b49e9cdda", + "sha256:f64a4dbcb5f89c100670fe83d87a5cd9d956586db461c5c628f7eb94b749c90b" + ], + "index": "pypi", + "version": "==0.11" + }, "python-dateutil": { "hashes": [ "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", @@ -2014,11 +2030,11 @@ }, "setuptools": { "hashes": [ - "sha256:273b6847ae61f7829c1affcdd9a32f67aa65233be508f4fbaab866c5faa4e408", - "sha256:d5340d16943a0f67057329db59b564e938bb3736c6e50ae16ea84d5e5d9ba6d0" + "sha256:7c7854ee1429a240090297628dc9f75b35318d193537968e2dc14010ee2f5bca", + "sha256:dc2662692f47d99cb8ae15a784529adeed535bcd7c277fee0beccf961522baf6" ], "markers": "python_version >= '3.7'", - "version": "==63.3.0" + "version": "==63.4.1" }, "six": { "hashes": [ @@ -2159,7 +2175,7 @@ "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" ], - "markers": "python_version >= '3.7'", + "markers": "python_version > '2.7'", "version": "==6.2" }, "traitlets": { @@ -2183,16 +2199,16 @@ "sha256:c33ccba33c819596124764c23a97d25f32b28433ba0dedeb77d873a38722c9bc", "sha256:ea6e8fb210b19d950fab93b60c9009226c63a28808bc8386e05301e25883ac0a" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", + "markers": "python_version >= '3.6'", "version": "==1.26.11" }, "virtualenv": { "hashes": [ - "sha256:0ef5be6d07181946891f5abc8047fda8bc2f0b4b9bf222c64e6e8963baee76db", - "sha256:635b272a8e2f77cb051946f46c60a54ace3cb5e25568228bd6b57fc70eca9ff3" + "sha256:4193b7bc8a6cd23e4eb251ac64f29b4398ab2c233531e66e40b19a6b7b0d30c1", + "sha256:d86ea0bb50e06252d79e6c241507cb904fcd66090c3271381372d6221a3970f9" ], "markers": "python_version >= '3.6'", - "version": "==20.16.2" + "version": "==20.16.3" }, "wcwidth": { "hashes": [ @@ -2279,6 +2295,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==1.14.1" }, + "zipp": { + "hashes": [ + "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2", + "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009" + ], + "markers": "python_version >= '3.7'", + "version": "==3.8.1" + }, "zope.event": { "hashes": [ "sha256:2666401939cdaa5f4e0c08cf7f20c9b21423b95e88f4675b1443973bdb080c42", From 31c48176fbd358df19582116ef73ee65be404bbe Mon Sep 17 00:00:00 2001 From: Madison Swain-Bowden Date: Fri, 5 Aug 2022 14:10:52 -0700 Subject: [PATCH 3/3] Max page size tests There are several sections of this code I just don't understand and don't make sense to me. But I'm hesitant to change current functionality more than I have already --- api/test/dead_link_filter_test.py | 8 +++ .../controllers/test_search_controller.py | 51 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 api/test/unit/controllers/test_search_controller.py diff --git a/api/test/dead_link_filter_test.py b/api/test/dead_link_filter_test.py index 563a8c413..89fc87d31 100644 --- a/api/test/dead_link_filter_test.py +++ b/api/test/dead_link_filter_test.py @@ -136,3 +136,11 @@ def no_duplicates(xs): ids = list(map(lambda x: x["id"], page_results)) # No results should be repeated so we should have no duplicate ids assert no_duplicates(ids) + + +@pytest.mark.django_db +def test_max_page_count(): + response = requests.get( + f"{API_URL}/v1/images", params={"page": MAX_TOTAL_PAGE_COUNT + 1}, verify=False + ) + assert response.status_code == 400 diff --git a/api/test/unit/controllers/test_search_controller.py b/api/test/unit/controllers/test_search_controller.py new file mode 100644 index 000000000..0231213e4 --- /dev/null +++ b/api/test/unit/controllers/test_search_controller.py @@ -0,0 +1,51 @@ +from unittest import mock + +import pytest + +from catalog.api.controllers import search_controller +from catalog.api.utils.pagination import MAX_TOTAL_PAGE_COUNT + + +@pytest.mark.parametrize( + "total_hits, real_result_count, page_size, expected", + [ + # No results + (0, 0, 10, (0, 0)), + # Setting page size to 0 raises an exception + pytest.param( + 0, 0, 0, (0, 0), marks=pytest.mark.raises(exception=ZeroDivisionError) + ), + # Fewer results than page size leads to max of result total + (5, 5, 10, (5, 0)), + # Even if no real results exist, total result count and page count are returned + # (seems like an impossible case IRL) + (100, 0, 10, (100, 10)), + # If there are real results and ES reports no hits, nothing is expected + # (seems like an impossible case IRL) + (0, 100, 10, (0, 0)), + # Evenly divisible number of pages + (25, 5, 5, (25, 5)), + # Unevenly divisible number of pages + (21, 5, 5, (21, 5)), + # My assumption would be that this yields (20, 4), but the code is such that + # when the "natural" page count can't be cleanly divisible by the page size, + # We increment it plus one. Why would that be the case? 20 results, with 5 + # results per-page, would seem to result in 4 pages total not 5 🤷‍♀️ + (20, 5, 5, (20, 5)), + # Fewer hits than page size, but result list somehow differs, use that for count + (48, 20, 50, (20, 0)), + # Page count gets truncated always + (5000, 10, 10, (5000, MAX_TOTAL_PAGE_COUNT)), + ], +) +def test_get_result_and_page_count(total_hits, real_result_count, page_size, expected): + response_obj = mock.MagicMock() + response_obj.hits.total.value = total_hits + results = [mock.MagicMock() for _ in range(real_result_count)] + + actual = search_controller._get_result_and_page_count( + response_obj, + results, + page_size, + ) + assert actual == expected