diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2d5ecee3412..34316a7e43e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,6 +1,7 @@ # Specific assignments for the 'openverse-catalog' group -catalog/ @WordPress/openverse-catalog -dag-sync.sh @WordPress/openverse-catalog +catalog/ @WordPress/openverse-catalog +indexer_worker/ @WordPress/openverse-catalog +dag-sync.sh @WordPress/openverse-catalog api/ @WordPress/openverse-api packages/python/ @WordPress/openverse-api diff --git a/catalog/tests/dags/legacy_data_refresh/__init__.py b/catalog/tests/dags/legacy_data_refresh/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/catalog/tests/dags/legacy_data_refresh/test_reporting.py b/catalog/tests/dags/legacy_data_refresh/test_reporting.py new file mode 100644 index 00000000000..f7b4208a1e8 --- /dev/null +++ b/catalog/tests/dags/legacy_data_refresh/test_reporting.py @@ -0,0 +1,64 @@ +import pytest + +from legacy_data_refresh.reporting import ( + report_record_difference, + report_status, +) + + +@pytest.mark.parametrize( + "before, after, expected_in_message", + [ + [ + {"src1": 1, "src2": 19}, + {"src1": 2, "src2": 38}, + ["20 → 40", "+20 (+100.000000%", "`src1`:+1", "`src2`:+19"], + ], + [ + {"src1": 1, "src2": 19}, + {"src1": 3, "src2": 57, "src3": 20}, + ["20 → 80", "+60 (+300.000000%", "`src1`:+2", "`src2`:+38", "`src3`:+20"], + ], + [ + {"src1": 4, "src2": 21}, + {"src1": 4}, + # Unchanged source count shouldn't show up + ["25 → 4", "-21 (-84.000000%", "`src2`:-21"], + ], + [ + {"src1": 4000, "src2": 20}, + {"src1": 2000, "src2": 10}, + ["4,020 → 2,010", "-2,010 (-50.000000%", "`src1`:-2,000", "`src2`:-10"], + ], + [ + {}, + {"src1": 10, "src2": 10}, + ["0 → 20", "+20 (+inf%", "`src1`:+10", "`src2`:+10"], + ], + [ + {"src1": 10, "src2": 10}, + {}, + ["20 → 0", "-20 (-100.000000%", "`src1`:-10", "`src2`:-10"], + ], + [ + {"src1": 5000000000}, + {"src1": 4938271605}, + ["5,000,000,000 → 4,938,271,605", "-61,728,395 (-1.234568%"], + ], + [{"src1": 4}, {"src1": 4}, ["Sources not listed had no change in count"]], + [{}, {}, ["Both indices missing? No breakdown to show"]], + ], +) +def test_record_reporting(before, after, expected_in_message): + actual = report_record_difference(before, after, "media", "dag_id") + assert isinstance(expected_in_message, list), ( + "Value for 'expected_in_message' should be a list, " + "a string may give a false positive" + ) + for expected in expected_in_message: + assert expected in actual + + +def test_report_status(): + actual = report_status("image", "This is my message", "sample_dag_id") + assert actual == "`image`: This is my message" diff --git a/docker-compose.yml b/docker-compose.yml index cbc8eb5d7dd..82696d7cd2c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,7 @@ include: - "ingestion_server/compose.yml" - "api/compose.yml" - "frontend/compose.yml" + - "indexer_worker/compose.yml" services: # Database used by the API @@ -10,6 +11,7 @@ services: profiles: - ingestion_server - api + - catalog_indexer_worker image: docker.io/postgres:13.10-alpine ports: - "50254:5432" @@ -26,6 +28,7 @@ services: - catalog - catalog_dependencies - ingestion_server + - catalog_indexer_worker - api build: context: ./docker/upstream_db/ @@ -85,6 +88,7 @@ services: es: profiles: - ingestion_server + - catalog_indexer_worker - api image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2 ports: diff --git a/indexer_worker/.dockerignore b/indexer_worker/.dockerignore new file mode 100644 index 00000000000..7775b2e6115 --- /dev/null +++ b/indexer_worker/.dockerignore @@ -0,0 +1,5 @@ +# Exclude everything and only include certain files +* +!indexer_worker +!gunicorn* +!Pipfile* diff --git a/indexer_worker/Dockerfile b/indexer_worker/Dockerfile new file mode 100644 index 00000000000..8d36157eb0f --- /dev/null +++ b/indexer_worker/Dockerfile @@ -0,0 +1,82 @@ +# syntax=docker/dockerfile:1 + +# Automatically build image using Python version specified in the `Pipfile`. +ARG CATALOG_PY_VERSION + +################## +# Python builder # +################## + +FROM docker.io/python:${CATALOG_PY_VERSION} as builder + +# Container optimizations +ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 +ENV PIP_NO_COLOR=1 + +# Activate the virtualenv +ENV PATH="/venv/bin:$PATH" + +# - Install system packages needed for building Python dependencies +# - Create a virtualenv inside `/venv` +# - Install Pipenv to install Python dependencies +RUN apt-get update \ + && apt-get install -y python3-dev \ + && rm -rf /var/lib/apt/lists/* \ + && python -m venv /venv \ + && pip install --upgrade pipenv + +# Copy the Pipenv files into the container +COPY Pipfile Pipfile.lock ./ + +# Install Python dependencies system-wide (uses the active virtualenv) +RUN pipenv install --system --deploy --dev + +#################### +# Indexer worker # +#################### + +FROM docker.io/python:${CATALOG_PY_VERSION}-slim as ing + +LABEL org.opencontainers.image.source="https://github.com/WordPress/openverse" + +# Container optimizations +ENV PYTHONUNBUFFERED=1 +ENV PIP_NO_CACHE_DIR=1 +ENV PIP_NO_COLOR=1 + +# Activate the virtualenv +ENV PATH="/venv/bin:$PATH" + +ENV PYTHONPATH="$PYTHONPATH:/indexer_worker/" +# TLDEXTRACT fails to cache in /home/supervisord, set its cache to /tmp instead +ENV TLDEXTRACT_CACHE="/tmp/python-tldextract" + +WORKDIR /indexer_worker + +# Copy virtualenv from the builder image +COPY --from=builder /venv /venv + +# - Install system packages needed for running Python dependencies +# - libpq-dev: required by `psycopg2` +# - Create directory for holding worker state +RUN apt-get update \ + && apt-get install -y curl libpq-dev \ + && rm -rf /var/lib/apt/lists/* \ + && mkdir /worker_state + +# Create a non-root user +RUN useradd ingestionu +RUN chown ingestionu /worker_state +USER ingestionu + +# Copy code into the final image +COPY --chown=ingestionu . /indexer_worker/ + +# Expose Gunicorn server to indexer worker Falcon app +EXPOSE 8003 + +ARG SEMANTIC_VERSION +ENV SENTRY_RELEASE=$SEMANTIC_VERSION + +CMD ["gunicorn", "--bind", "0.0.0.0:8003", "api:api"] diff --git a/indexer_worker/Pipfile b/indexer_worker/Pipfile new file mode 100644 index 00000000000..a7b08d7159f --- /dev/null +++ b/indexer_worker/Pipfile @@ -0,0 +1,31 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +ipython = "~=8.16" +pytest = "~=7.4" +pytest-order = "~=1.1" +pytest-sugar = "~=0.9" +remote-pdb = "~=2.1" +pook = "~=1.0" + +[packages] +aws-requests-auth = "~=0.4" +boto3 = "~=1.28" +bottle = "~=0.12" +elasticsearch = "==8.13.0" +elasticsearch-dsl = "~=8.9" +falcon = "~=3.1" +filelock = "~=3.13" +gunicorn = "~=22.0" +jsonschema = "~=4.19" +psycopg2 = "~=2.9" +python-decouple = "~=3.8" +PyYAML = "~=6.0" +tldextract = "~=5.0" +sentry-sdk = {extras = ["falcon"], version = "*"} + +[requires] +python_version = "3.11" diff --git a/indexer_worker/Pipfile.lock b/indexer_worker/Pipfile.lock new file mode 100644 index 00000000000..d533d853f6f --- /dev/null +++ b/indexer_worker/Pipfile.lock @@ -0,0 +1,926 @@ +{ + "_meta": { + "hash": { + "sha256": "35ce9041cca8e854af408ce4b9ffaad4441f3e8b4429405c8aa24853c4b2077b" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.11" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "attrs": { + "hashes": [ + "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", + "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.0" + }, + "aws-requests-auth": { + "hashes": [ + "sha256:33593372018b960a31dbbe236f89421678b885c35f0b6a7abfae35bb77e069b2", + "sha256:646bc37d62140ea1c709d20148f5d43197e6bd2d63909eb36fa4bb2345759977" + ], + "index": "pypi", + "version": "==0.4.3" + }, + "boto3": { + "hashes": [ + "sha256:71f551491fb12fe07727d371d5561c5919fdf33dbc1d4251c57940d267a53a9e", + "sha256:b703e22775561a748adc4576c30424b81abd2a00d3c6fb28eec2e5cde92c1eed" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.34.74" + }, + "botocore": { + "hashes": [ + "sha256:32bb519bae62483893330c18a0ea4fd09d1ffa32bc573cd8559c2d9a08fb8c5c", + "sha256:5d2015b5d91d6c402c122783729ce995ed7283a746b0380957026dc2b3b75969" + ], + "markers": "python_version >= '3.8'", + "version": "==1.34.74" + }, + "bottle": { + "hashes": [ + "sha256:d6f15f9d422670b7c073d63bd8d287b135388da187a0f3e3c19293626ce034ea", + "sha256:e1a9c94970ae6d710b3fb4526294dfeb86f2cb4a81eff3a4b98dc40fb0e5e021" + ], + "index": "pypi", + "version": "==0.12.25" + }, + "certifi": { + "hashes": [ + "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f", + "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.2.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "elastic-transport": { + "hashes": [ + "sha256:2410ec1ff51221e8b3a01c0afa9f0d0498e1386a269283801f5c12f98e42dc45", + "sha256:aec890afdddd057762b27ff3553b0be8fa4673ec1a4fd922dfbd00325874bb3d" + ], + "markers": "python_version >= '3.7'", + "version": "==8.13.0" + }, + "elasticsearch": { + "hashes": [ + "sha256:4aaf49253e974eb500f01136a487bdd0f09d3cafd37a0456eff6acfff0c9199b", + "sha256:e4ebebb22d09f0ef839c26b6aa98e19ccd636bcb77f08c12b562b02cacd5e744" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==8.13.0" + }, + "elasticsearch-dsl": { + "hashes": [ + "sha256:2ea9e6ded64d21a8f1ef72477a4d116c6fbeea631ac32a2e2490b9c0d09a99a6", + "sha256:ce32b8529888a97be911531e7590816cf3b1f608263eff6fb75aa7106e233c88" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==8.12.0" + }, + "falcon": { + "hashes": [ + "sha256:04a92f159d392098a11d14b8ca71d17129d8b1ef37b7a3577f1f8bcb7b3aecba", + "sha256:094d295a767e2aa84f07bec6b23e9ebe2e43cde81d9d583bef037168bd775ad6", + "sha256:12432c3f6bce46fe4eec3db6db8d2df1abe43a7531219356f1ba859db207e57b", + "sha256:19b2ce8a613a29a9eaf8243ca285ebf80464e8a6489dff60425f850fb5548936", + "sha256:1c335f1118a6e42f08cf30d56914a0bc0d470aa6db7619fdc4c546b184f38248", + "sha256:23335dbccd44f29e85ec55f2f35d5a0bc12bd7a509f641ab81f5c64b65626263", + "sha256:24aa51ba4145f05649976c33664971ef36f92846208bd9d4d4158ceb51bc753f", + "sha256:3cda76fb21568aa058ce454fa6272ca5b2582ebb0efcb7ae0090d3bf6d0db5af", + "sha256:508fdf30617cf1fa5c9d3058c14124dc8e5f7e316e26dca22d974f916493fd0e", + "sha256:51bbbfa1ecb1d50bed9f8ae940b0f1049d958e945f1a08891769d40cfabe6fb2", + "sha256:56e8a4728fb0193e2ccd5301d864fd9743a989cc228e709e5c49ff1025cc1a4f", + "sha256:57d51f556ece73766f07ede57f17fa65dbbc2cc5e1c7075fb606f727464ad71e", + "sha256:656e738e0e220f4503e4f07747b564f4459da159a1f32ec6d2478efb651278dd", + "sha256:7471aab646875d4478377065246a4115aaf3c0801a6eb4b6871f9836c8ef60b1", + "sha256:796a57046b0717bff5ac488235c37ea63834a5cfc2c9291c5eeaa43c53e5e24c", + "sha256:7a1ee54bf19d9c7f998edd8ac21ab8ead1e2f73c24822237eb5485890979a25d", + "sha256:7b210c05b38a8d655e16aa3ae2befaa70ecfb49bef73c0c1995566b22afcfdd1", + "sha256:8b203408040e87e8323e1c1921b106353fa5fe5dc05c9b3f4881acb3af03f556", + "sha256:9c82cb54bbf67861febe80d394c9b7bfa0d2e16cc998b69bfff4e8b003c721a2", + "sha256:9e2fe54081f1cedc71462eff8dca074045d14380a4bca163882c6c4353f65af2", + "sha256:ad37c46322122f34e228be4fe7ae5fcfedb630eef788a198fbdff5971091d5dc", + "sha256:adc23ced91c4690042a11a0515c5cfe93eeeb7d063940900aee85f8eae7460ec", + "sha256:c6319883789ee3abcbde2dc10fed8016cc3d9a05018ae59944838b892101111a", + "sha256:ca3c6cbcba90e272f60581fb3c4561cdcd0ac6d19672f5a11a04309b1d23fa66", + "sha256:cb6b6a79d096b3a1f2f37f66f46a2cf18deb575db6dee9935057e6036d98d01f", + "sha256:cbd40435e99255e40ccfa849e4809cd1638fd8eccc08931fc9d355a6840a7332", + "sha256:d52a05be5c2ef364853cdc6d97056dd880a534016db73b95f5a6ebc652577533", + "sha256:d56d9a9886387585ce4547354c9929bf5743394df04a17df6ed51ad6bb58a4cc", + "sha256:d6b7131e85dff13abaacb4ff479c456256f0d57b262b1fb1771180f7535cc902", + "sha256:d78a6cfe2d135632673def489a19474e2508d83475c7662c4fa63be0ba82dd81", + "sha256:db78171113a3920f0f33d8dd26364527a362db2d1c3376a95778653ff87dea24", + "sha256:e19a0a3827821bcf754a9b24217e3b8b4750f7eb437c4a8c461135a86ca9b1c5", + "sha256:e1f622d73111912021b8311d1e5d1eabef484217d2d30abe3d237533cb225ce9" + ], + "index": "pypi", + "markers": "python_version >= '3.5'", + "version": "==3.1.3" + }, + "filelock": { + "hashes": [ + "sha256:5ffa845303983e7a0b7ae17636509bc97997d58afeafa72fb141a17b152284cb", + "sha256:a79895a25bbefdf55d1a2a0a80968f7dbb28edcd6d4234a0afb3f37ecde4b546" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==3.13.3" + }, + "gunicorn": { + "hashes": [ + "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9", + "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==22.0.0" + }, + "idna": { + "hashes": [ + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" + ], + "index": "pypi", + "markers": "python_version >= '3.5'", + "version": "==3.7" + }, + "jmespath": { + "hashes": [ + "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", + "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.1" + }, + "jsonschema": { + "hashes": [ + "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", + "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==4.21.1" + }, + "jsonschema-specifications": { + "hashes": [ + "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc", + "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c" + ], + "markers": "python_version >= '3.8'", + "version": "==2023.12.1" + }, + "packaging": { + "hashes": [ + "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", + "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + ], + "markers": "python_version >= '3.7'", + "version": "==24.0" + }, + "psycopg2": { + "hashes": [ + "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981", + "sha256:38a8dcc6856f569068b47de286b472b7c473ac7977243593a288ebce0dc89516", + "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3", + "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa", + "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a", + "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693", + "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372", + "sha256:bac58c024c9922c23550af2a581998624d6e02350f4ae9c5f0bc642c633a2d5e", + "sha256:c92811b2d4c9b6ea0285942b2e7cac98a59e166d59c588fe5cfe1eda58e72d59", + "sha256:d1454bde93fb1e224166811694d600e746430c006fbb031ea06ecc2ea41bf156", + "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024", + "sha256:de80739447af31525feddeb8effd640782cf5998e1a4e9192ebdf829717e3913", + "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==2.9.9" + }, + "python-dateutil": { + "hashes": [ + "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", + "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.9.0.post0" + }, + "python-decouple": { + "hashes": [ + "sha256:ba6e2657d4f376ecc46f77a3a615e058d93ba5e465c01bbe57289bfb7cce680f", + "sha256:d0d45340815b25f4de59c974b855bb38d03151d81b037d9e3f463b0c9f8cbd66" + ], + "index": "pypi", + "version": "==3.8" + }, + "pyyaml": { + "hashes": [ + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", + "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", + "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", + "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", + "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", + "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", + "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", + "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", + "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", + "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", + "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", + "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", + "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", + "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", + "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", + "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", + "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", + "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", + "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", + "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", + "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", + "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef", + "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", + "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", + "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", + "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", + "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", + "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", + "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", + "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", + "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", + "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", + "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", + "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", + "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", + "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", + "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", + "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", + "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", + "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", + "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" + ], + "index": "pypi", + "markers": "python_version >= '3.6'", + "version": "==6.0.1" + }, + "referencing": { + "hashes": [ + "sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844", + "sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4" + ], + "markers": "python_version >= '3.8'", + "version": "==0.34.0" + }, + "requests": { + "hashes": [ + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" + ], + "markers": "python_version >= '3.7'", + "version": "==2.31.0" + }, + "requests-file": { + "hashes": [ + "sha256:20c5931629c558fda566cacc10cfe2cd502433e628f568c34c80d96a0cc95972", + "sha256:3e493d390adb44aa102ebea827a48717336d5268968c370eaf19abaf5cae13bf" + ], + "version": "==2.0.0" + }, + "rpds-py": { + "hashes": [ + "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f", + "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c", + "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76", + "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e", + "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157", + "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f", + "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5", + "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05", + "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24", + "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1", + "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8", + "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b", + "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb", + "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07", + "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1", + "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6", + "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e", + "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e", + "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1", + "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab", + "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4", + "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17", + "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594", + "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d", + "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d", + "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3", + "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c", + "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66", + "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f", + "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80", + "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33", + "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f", + "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c", + "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022", + "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e", + "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f", + "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da", + "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1", + "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688", + "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795", + "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c", + "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98", + "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1", + "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20", + "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307", + "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4", + "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18", + "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294", + "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66", + "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467", + "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948", + "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e", + "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1", + "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0", + "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7", + "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd", + "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641", + "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d", + "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9", + "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1", + "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da", + "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3", + "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa", + "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7", + "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40", + "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496", + "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124", + "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836", + "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434", + "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984", + "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f", + "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6", + "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e", + "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461", + "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c", + "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432", + "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73", + "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58", + "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88", + "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337", + "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7", + "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863", + "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475", + "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3", + "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51", + "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf", + "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024", + "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40", + "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9", + "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec", + "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb", + "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7", + "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861", + "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880", + "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f", + "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd", + "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca", + "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58", + "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e" + ], + "markers": "python_version >= '3.8'", + "version": "==0.18.0" + }, + "s3transfer": { + "hashes": [ + "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19", + "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d" + ], + "markers": "python_version >= '3.8'", + "version": "==0.10.1" + }, + "sentry-sdk": { + "extras": [ + "falcon" + ], + "hashes": [ + "sha256:eb65289da013ca92fad2694851ad2f086aa3825e808dc285bd7dcaf63602bb18", + "sha256:f7125a9235795811962d52ff796dc032cd1d0dd98b59beaced8380371cd9c13c" + ], + "version": "==1.44.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "tldextract": { + "hashes": [ + "sha256:4dfc4c277b6b97fa053899fcdb892d2dc27295851ab5fac4e07797b6a21b2e46", + "sha256:c9e17f756f05afb5abac04fe8f766e7e70f9fe387adb1859f0f52408ee060200" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==5.1.2" + }, + "urllib3": { + "hashes": [ + "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", + "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" + ], + "markers": "python_version >= '3.6'", + "version": "==2.2.1" + } + }, + "develop": { + "asttokens": { + "hashes": [ + "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", + "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0" + ], + "version": "==2.4.1" + }, + "attrs": { + "hashes": [ + "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", + "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1" + ], + "markers": "python_version >= '3.7'", + "version": "==23.2.0" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "executing": { + "hashes": [ + "sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147", + "sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc" + ], + "markers": "python_version >= '3.5'", + "version": "==2.0.1" + }, + "furl": { + "hashes": [ + "sha256:5a6188fe2666c484a12159c18be97a1977a71d632ef5bb867ef15f54af39cc4e", + "sha256:9ab425062c4217f9802508e45feb4a83e54324273ac4b202f1850363309666c0" + ], + "version": "==2.1.3" + }, + "iniconfig": { + "hashes": [ + "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", + "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374" + ], + "markers": "python_version >= '3.7'", + "version": "==2.0.0" + }, + "ipython": { + "hashes": [ + "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1", + "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d" + ], + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==8.23.0" + }, + "jedi": { + "hashes": [ + "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd", + "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0" + ], + "markers": "python_version >= '3.6'", + "version": "==0.19.1" + }, + "jsonschema": { + "hashes": [ + "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f", + "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==4.21.1" + }, + "jsonschema-specifications": { + "hashes": [ + "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc", + "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c" + ], + "markers": "python_version >= '3.8'", + "version": "==2023.12.1" + }, + "matplotlib-inline": { + "hashes": [ + "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311", + "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304" + ], + "markers": "python_version >= '3.5'", + "version": "==0.1.6" + }, + "orderedmultidict": { + "hashes": [ + "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad", + "sha256:43c839a17ee3cdd62234c47deca1a8508a3f2ca1d0678a3bf791c87cf84adbf3" + ], + "version": "==1.0.1" + }, + "packaging": { + "hashes": [ + "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", + "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" + ], + "markers": "python_version >= '3.7'", + "version": "==24.0" + }, + "parso": { + "hashes": [ + "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0", + "sha256:c001d4636cd3aecdaf33cbb40aebb59b094be2a74c556778ef5576c175e19e75" + ], + "markers": "python_version >= '3.6'", + "version": "==0.8.3" + }, + "pexpect": { + "hashes": [ + "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", + "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f" + ], + "markers": "sys_platform != 'win32' and sys_platform != 'emscripten'", + "version": "==4.9.0" + }, + "pluggy": { + "hashes": [ + "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981", + "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be" + ], + "markers": "python_version >= '3.8'", + "version": "==1.4.0" + }, + "pook": { + "hashes": [ + "sha256:4683a8a9d11fb56901ae15001a5bfb76a1bb960b1a841de1f0ca11c8c2d9eef8", + "sha256:61dbd9f6f9bf4d0bbab4abdf382bf7e8fbaae8561c5de3cd444e7c4be67df651" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.4.3" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:3527b7af26106cbc65a040bcc84839a3566ec1b051bb0bfe953631e704b0ff7d", + "sha256:a11a29cb3bf0a28a387fe5122cdb649816a957cd9261dcedf8c9f1fef33eacf6" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.0.43" + }, + "ptyprocess": { + "hashes": [ + "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", + "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" + ], + "version": "==0.7.0" + }, + "pure-eval": { + "hashes": [ + "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350", + "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3" + ], + "version": "==0.2.2" + }, + "pygments": { + "hashes": [ + "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c", + "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367" + ], + "markers": "python_version >= '3.7'", + "version": "==2.17.2" + }, + "pytest": { + "hashes": [ + "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280", + "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==7.4.4" + }, + "pytest-order": { + "hashes": [ + "sha256:944f86b6d441aa7b1da80f801c6ab65b84bbeba472d0a7a12eb43ba26650101a", + "sha256:9d65c3b6dc6d6ee984d6ae2c6c4aa4f1331e5b915116219075c888c8bcbb93b8" + ], + "index": "pypi", + "markers": "python_version >= '3.6'", + "version": "==1.2.0" + }, + "pytest-sugar": { + "hashes": [ + "sha256:8cb5a4e5f8bbcd834622b0235db9e50432f4cbd71fef55b467fe44e43701e062", + "sha256:f1e74c1abfa55f7241cf7088032b6e378566f16b938f3f08905e2cf4494edd46" + ], + "index": "pypi", + "version": "==0.9.7" + }, + "referencing": { + "hashes": [ + "sha256:5773bd84ef41799a5a8ca72dc34590c041eb01bf9aa02632b4a973fb0181a844", + "sha256:d53ae300ceddd3169f1ffa9caf2cb7b769e92657e4fafb23d34b93679116dfd4" + ], + "markers": "python_version >= '3.8'", + "version": "==0.34.0" + }, + "remote-pdb": { + "hashes": [ + "sha256:2d70c6f41e0eabf0165e8f1be58f82aa7a605aaeab8f2aefeb9ce246431091c1", + "sha256:94f73a92ac1248cf16189211011f97096bdada8a7baac8c79372663bbb57b5d0" + ], + "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==2.1.0" + }, + "rpds-py": { + "hashes": [ + "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f", + "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c", + "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76", + "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e", + "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157", + "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f", + "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5", + "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05", + "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24", + "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1", + "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8", + "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b", + "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb", + "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07", + "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1", + "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6", + "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e", + "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e", + "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1", + "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab", + "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4", + "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17", + "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594", + "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d", + "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d", + "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3", + "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c", + "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66", + "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f", + "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80", + "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33", + "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f", + "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c", + "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022", + "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e", + "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f", + "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da", + "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1", + "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688", + "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795", + "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c", + "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98", + "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1", + "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20", + "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307", + "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4", + "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18", + "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294", + "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66", + "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467", + "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948", + "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e", + "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1", + "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0", + "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7", + "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd", + "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641", + "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d", + "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9", + "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1", + "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da", + "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3", + "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa", + "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7", + "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40", + "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496", + "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124", + "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836", + "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434", + "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984", + "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f", + "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6", + "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e", + "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461", + "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c", + "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432", + "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73", + "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58", + "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88", + "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337", + "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7", + "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863", + "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475", + "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3", + "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51", + "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf", + "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024", + "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40", + "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9", + "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec", + "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb", + "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7", + "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861", + "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880", + "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f", + "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd", + "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca", + "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58", + "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e" + ], + "markers": "python_version >= '3.8'", + "version": "==0.18.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "stack-data": { + "hashes": [ + "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", + "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695" + ], + "version": "==0.6.3" + }, + "termcolor": { + "hashes": [ + "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63", + "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a" + ], + "markers": "python_version >= '3.8'", + "version": "==2.4.0" + }, + "traitlets": { + "hashes": [ + "sha256:8cdd83c040dab7d1dee822678e5f5d100b514f7b72b01615b26fc5718916fdf9", + "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80" + ], + "markers": "python_version >= '3.8'", + "version": "==5.14.2" + }, + "typing-extensions": { + "hashes": [ + "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475", + "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb" + ], + "markers": "python_version < '3.12'", + "version": "==4.10.0" + }, + "wcwidth": { + "hashes": [ + "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", + "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5" + ], + "version": "==0.2.13" + }, + "xmltodict": { + "hashes": [ + "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56", + "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852" + ], + "markers": "python_version >= '3.4'", + "version": "==0.13.0" + } + } +} diff --git a/indexer_worker/README.md b/indexer_worker/README.md new file mode 100644 index 00000000000..cc0a3cd993b --- /dev/null +++ b/indexer_worker/README.md @@ -0,0 +1,11 @@ +# Indexer Worker + +An indexer worker serves a small Falcon API which can be used to perform a chunk +of a "reindexing task", whereby records from a downstream table are converted +into ES documents and uploaded to an Elasticsearch index in bulk. The data +refresh DAG orchestrates multiple indexer workers, each of which performs a +portion of the total reindexing. + +Further reading: + +- [[IP] Removal of the ingestion server](https://docs.openverse.org/projects/proposals/ingestion_server_removal/20240328-implementation_plan_ingestion_server_removal.html) diff --git a/indexer_worker/compose.yml b/indexer_worker/compose.yml new file mode 100644 index 00000000000..273ed354764 --- /dev/null +++ b/indexer_worker/compose.yml @@ -0,0 +1,22 @@ +services: + catalog_indexer_worker: + profiles: + - api + build: + context: ./ + args: # Automatically inferred from env vars, unless specified + - CATALOG_PY_VERSION + image: openverse-catalog_indexer_worker + ports: + - "50282:8003" + depends_on: + - db + - upstream_db + - es + volumes: + - .:/indexer_worker:z + env_file: + - env.docker + - .env + stdin_open: true + tty: true diff --git a/indexer_worker/env.docker b/indexer_worker/env.docker new file mode 100644 index 00000000000..6271a262274 --- /dev/null +++ b/indexer_worker/env.docker @@ -0,0 +1,8 @@ +PYTHONUNBUFFERED="0" + +ELASTICSEARCH_URL="es" + +DATABASE_HOST="db" + +UPSTREAM_DB_HOST="upstream_db" +UPSTREAM_DB_PORT="5432" diff --git a/indexer_worker/env.template b/indexer_worker/env.template new file mode 100644 index 00000000000..45f62d7e06c --- /dev/null +++ b/indexer_worker/env.template @@ -0,0 +1,27 @@ +PYTHONUNBUFFERED="0" + +#AWS_REGION="us-east-1" + +#ENVIRONMENT="local" + +#AWS_REGION="" +#AWS_ACCESS_KEY_ID="" +#AWS_SECRET_ACCESS_KEY="" + +#ELASTICSEARCH_SCHEME="http://" +#ELASTICSEARCH_URL="es" +#ELASTICSEARCH_PORT="9200" + +#DATABASE_HOST="db" +#DATABASE_PORT="5432" +#DATABASE_USER="deploy" +#DATABASE_PASSWORD="deploy" +#DATABASE_NAME="openledger" + +#UPSTREAM_DB_HOST="upstream_db" +#UPSTREAM_DB_PORT="5432" +#UPSTREAM_DB_USER="deploy" +#UPSTREAM_DB_PASSWORD="deploy" +#UPSTREAM_DB_NAME="openledger" + +#DB_BUFFER_SIZE="100000" diff --git a/indexer_worker/gunicorn.conf.py b/indexer_worker/gunicorn.conf.py new file mode 100644 index 00000000000..f901eadcaa5 --- /dev/null +++ b/indexer_worker/gunicorn.conf.py @@ -0,0 +1,44 @@ +capture_output = True +accesslog = "-" +errorlog = "-" +chdir = "./indexer_worker/" +timeout = 120 +reload = True +logconfig_dict = { + # NOTE: Most of this is inherited from the default configuration + # https://github.com/benoitc/gunicorn/blob/cc2e3835784542e65886cd27f64d444309fbaad0/gunicorn/glogging.py#L48-L86 + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "generic": { + "format": "[%(asctime)s - %(name)s - %(lineno)3d][%(levelname)s] %(message)s", # noqa: E501 + }, + }, + "root": {"level": "INFO", "handlers": ["console"]}, + "loggers": { + "gunicorn.error": { + "level": "DEBUG", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.error", + }, + "gunicorn.access": { + "level": "INFO", + "handlers": ["console"], + "propagate": False, # Prevents default handler from also logging this + "qualname": "gunicorn.access", + }, + "": { + "level": "INFO", + "handlers": ["console"], + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "generic", + "stream": "ext://sys.stdout", + }, + }, +} +loglevel = "debug" diff --git a/indexer_worker/indexer_worker/__init__.py b/indexer_worker/indexer_worker/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/indexer_worker/indexer_worker/api.py b/indexer_worker/indexer_worker/api.py new file mode 100644 index 00000000000..94952d7ed27 --- /dev/null +++ b/indexer_worker/indexer_worker/api.py @@ -0,0 +1,123 @@ +""" +A single worker responsible for indexing a subset of the records stored in the database. + +Accept an HTTP request specifying a range of image IDs to reindex. +""" + +import logging as log +import uuid +from multiprocessing import Process, Value +from urllib.parse import urlparse + +import boto3 +import falcon +from decouple import config + +from indexer_worker.indexer import launch_reindex +from indexer_worker.tasks import TaskTracker + + +ec2_client = boto3.client( + "ec2", + region_name=config("AWS_REGION", default="us-east-1"), + aws_access_key_id=config("AWS_ACCESS_KEY_ID", default=None), + aws_secret_access_key=config("AWS_SECRET_ACCESS_KEY", default=None), +) + + +class HealthcheckResource: + def on_get(self, req, resp): + resp.status = falcon.HTTP_200 + + +class BaseTaskResource: + """Base class for all resource that need access to a task tracker.""" + + def __init__(self, tracker: TaskTracker, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tracker = tracker + + +class IndexingJobResource(BaseTaskResource): + @staticmethod + def _get_base_url(req): + parsed = urlparse(req.url) + return parsed.scheme + "://" + parsed.netloc + + def on_get(self, _, resp): + resp.media = self.tracker.get_task_list() + + def on_post(self, req, resp): + body = req.get_media() + + task_id = uuid.uuid4().hex # no hyphens + model_name = body.get("model_name") + table_name = body.get("table_name") + target_index = body.get("target_index") + start_id = body.get("start_id") + end_id = body.get("end_id") + log.info(f"Received indexing request for records {start_id}-{end_id}") + + # Shared memory + progress = Value("d", 0.0) + finish_time = Value("d", 0.0) + + task = Process( + target=launch_reindex, + kwargs={ + "model_name": model_name, + "table_name": table_name, + "target_index": target_index, + "start_id": start_id, + "end_id": end_id, + # Task tracking arguments + "progress": progress, + "finish_time": finish_time, + }, + ) + task.start() + + # Begin tracking the task + self.tracker.add_task( + task_id, + task=task, + model=model_name, + target_index=target_index, + progress=progress, + finish_time=finish_time, + ) + + resp.status = falcon.HTTP_202 + resp.media = { + "message": "Successfully scheduled task.", + "task_id": task_id, + "status_check": f"{self._get_base_url(req)}/task/{task_id}", + } + + +class TaskStatusResource(BaseTaskResource): + def on_get(self, _, resp, task_id): + """Handle an incoming GET request and provide information about a single task.""" + + try: + result = self.tracker.get_task_status(task_id) + resp.media = result + except KeyError: + resp.status = falcon.HTTP_404 + resp.media = {"message": f"No task found with id {task_id}."} + + +def create_api(): + """Create an instance of the Falcon API server.""" + _api = falcon.App() + + task_tracker = TaskTracker() + + _api.add_route("/healthcheck", HealthcheckResource()) + _api.add_route("/task", IndexingJobResource(task_tracker)) + _api.add_route("/task/{task_id}", TaskStatusResource(task_tracker)) + + return _api + + +api = create_api() diff --git a/indexer_worker/indexer_worker/authority.py b/indexer_worker/indexer_worker/authority.py new file mode 100644 index 00000000000..77967ca55f1 --- /dev/null +++ b/indexer_worker/indexer_worker/authority.py @@ -0,0 +1,71 @@ +""" +Authority is a ranking indicating the pedigree of an image. + +It ranges from 0 (least authoritative) to 100 (most authoritative). Some examples of +things that could impact authority are: +- The reputation of the website that posted an image +- The popularity of the uploader on a social media site in terms of number of +followers +- Whether the uploader has uploaded images that have previously been flagged for +copyright infringement. +- etc + +The authority can be set from the catalog layer through the meta_data field +or through the ingestion layer. As of now, we are only factoring in the +reputation of the website as a static hand-picked list based on experience +and search result quality, with the intention to add more sophisticated and +tailored measures of authority later on. + +Also note that this is just one factor in rankings, and the magnitude of the +boost can be adjusted at search-time. +""" + +from enum import Enum, auto + + +class AuthorityTypes(Enum): + CURATED = auto() + CULTURAL_INSTITUTION = auto() + SOCIAL_MEDIA = auto() + DEFAULT = auto() + + +# We want to boost curated collections where each image has been vetted for +# cultural significance. +boost = { + AuthorityTypes.CURATED: 85, + AuthorityTypes.CULTURAL_INSTITUTION: 90, + AuthorityTypes.SOCIAL_MEDIA: 75, + AuthorityTypes.DEFAULT: 80, +} + +authority_types = { + "flickr": AuthorityTypes.SOCIAL_MEDIA, + "behance": AuthorityTypes.SOCIAL_MEDIA, + "thingiverse": AuthorityTypes.SOCIAL_MEDIA, + "sketchfab": AuthorityTypes.SOCIAL_MEDIA, + "deviantart": AuthorityTypes.SOCIAL_MEDIA, + "thorvaldsensmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "svgsilh": AuthorityTypes.CULTURAL_INSTITUTION, + "smithsonian": AuthorityTypes.CULTURAL_INSTITUTION, + "rijksmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "museumsvictoria": AuthorityTypes.CULTURAL_INSTITUTION, + "met": AuthorityTypes.CULTURAL_INSTITUTION, + "mccordsmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "digitaltmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "clevelandmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "brooklynmuseum": AuthorityTypes.CULTURAL_INSTITUTION, + "stocksnap": AuthorityTypes.CURATED, + "rawpixel": AuthorityTypes.CURATED, +} + + +def get_authority_boost(source): + authority_boost = None + if source in authority_types: + authority_type = authority_types[source] + if authority_type in boost: + authority_boost = boost[authority_type] + else: + authority_boost = boost[AuthorityTypes.DEFAULT] + return authority_boost diff --git a/indexer_worker/indexer_worker/db_helpers.py b/indexer_worker/indexer_worker/db_helpers.py new file mode 100644 index 00000000000..6d8ea76e7f9 --- /dev/null +++ b/indexer_worker/indexer_worker/db_helpers.py @@ -0,0 +1,58 @@ +import logging as log +import time +from typing import NamedTuple + +import psycopg2 +from decouple import config + + +class DbConfig(NamedTuple): + host: str + port: int + user: str + password: str + dbname: str + + +DB_API_CONFIG = DbConfig( + host=config("DATABASE_HOST", default="localhost"), + port=config("DATABASE_PORT", default=5432, cast=int), + user=config("DATABASE_USER", default="deploy"), + password=config("DATABASE_PASSWORD", default="deploy"), + dbname=config("DATABASE_NAME", default="openledger"), +) + +DB_UPSTREAM_CONFIG = DbConfig( + host=config("UPSTREAM_DB_HOST", default="localhost"), + port=config("UPSTREAM_DB_PORT", default=5433, cast=int), + user=config("UPSTREAM_DB_USER", default="deploy"), + password=config("UPSTREAM_DB_PASSWORD", default="deploy"), + dbname=config("UPSTREAM_DB_NAME", default="openledger"), +) + + +def database_connect( + autocommit: bool = False, + dbconfig: DbConfig = DB_API_CONFIG, + timeout: int = 5, + attempt_reconnect: bool = True, +): + """ + Repeatedly try to connect to the downstream (API) database until successful + (unless otherwise specified). + """ + while True: + try: + conn = psycopg2.connect(**dbconfig._asdict(), connect_timeout=timeout) + if autocommit: + conn.set_session(autocommit=True) + except psycopg2.OperationalError as e: + if not attempt_reconnect: + return None + log.exception(e) + log.error("Reconnecting to database in 5 seconds. . .") + time.sleep(5) + continue + break + + return conn diff --git a/indexer_worker/indexer_worker/elasticsearch_models.py b/indexer_worker/indexer_worker/elasticsearch_models.py new file mode 100644 index 00000000000..40dc6366af9 --- /dev/null +++ b/indexer_worker/indexer_worker/elasticsearch_models.py @@ -0,0 +1,343 @@ +""" +Provides an ORM-like experience for accessing data in Elasticsearch. + +Note the any low-level changes to the index here, such as changing the order +of fields, must be reflected in the actual schema defined in the catalog. +""" + +from enum import Enum, auto + +from elasticsearch_dsl import Document, Field, Integer + +from indexer_worker.authority import get_authority_boost + + +class RankFeature(Field): + name = "rank_feature" + + +def _verify_rank_feature(value, low, high): + """ + Rank features must be a positive non-zero float. + + Our features are scaled from 0 to 100 for fair comparison. + """ + if value is None or value == 0: + return None + ceiling = min(value, high) + floor = max(low, ceiling) + return floor + + +class SyncableDocType(Document): + """Represents tables in the source-of-truth that will be replicated to ES.""" + + # Aggregations can't be performed on the _id meta-column, which + # necessitates copying it to this column in the doc. Aggregation is + # used to find the last document inserted into Elasticsearch + id = Integer() + + @staticmethod + def database_row_to_elasticsearch_doc(row, schema): + """ + Children of this class must have a function mapping a PSQL model to an ES doc. + + :param row: A tuple representing a row in Postgres. + :param schema: A map of each field name to its position in the row. + :return: + """ + raise NotImplementedError( + "Model is missing database -> Elasticsearch translation." + ) + + +class Media(SyncableDocType): + """Represents a media object in Elasticsearch.""" + + class Index: + name = "media" + + @staticmethod + def database_row_to_elasticsearch_doc(row: tuple, schema: dict[str, int]): + """ + Map the DB row to a Python dictionary that represents a doc in the ES index. + + :param row: the database row as a tuple obtained by the psycopg2 cursor + :param schema: the mapping of database column names to the tuple index + :return: a dictionary mapping the row tuple to an ES doc + """ + + raise NotImplementedError( + "Missing database row -> Elasticsearch schema translation." + ) + + @staticmethod + def get_instance_attrs(row, schema): + """ + Map the common columns in the DB row to a Python dictionary. + + This dictionary is a smaller part of the document indexed by Elasticsearch. + + :param row: the database row as a tuple obtained by the psycopg2 cursor + :param schema: the mapping of database column names to the tuple index + :return: the ES sub-document holding the common cols of the row tuple + """ + + meta = row[schema["meta_data"]] + + if "standardized_popularity" in schema: + popularity = Media.get_popularity(row[schema["standardized_popularity"]]) + else: + popularity = None + # Extracted for compatibility with the old image schema + category = row[schema["category"]] if "category" in schema else None + + provider = row[schema["provider"]] + authority_boost = Media.get_authority_boost(meta, provider) + + # This matches the order of fields defined in the schema. + return { + "_id": row[schema["id"]], + "id": row[schema["id"]], + "created_on": row[schema["created_on"]], + "mature": Media.get_maturity(meta, row[schema["mature"]]), + # Keyword fields + "identifier": row[schema["identifier"]], + "license": row[schema["license"]].lower(), + "provider": provider, + "source": row[schema["source"]], + "category": category, + # Text-based fields + "title": row[schema["title"]], + "description": Media.parse_description(meta), + "creator": row[schema["creator"]], + # Rank feature fields + "standardized_popularity": popularity, + "authority_boost": authority_boost, + "max_boost": max(popularity or 1, authority_boost or 1), + "min_boost": min(popularity or 1, authority_boost or 1), + # Nested fields + "tags": Media.parse_detailed_tags(row[schema["tags"]]), + # Extra fields, not indexed + "url": row[schema["url"]], + } + + @staticmethod + def parse_description(metadata_field): + """ + Parse the description field from the metadata if available. + + Limit to the first 2000 characters. + """ + try: + if "description" in metadata_field: + return metadata_field["description"][:2000] + except TypeError: + return None + + @staticmethod + def get_license_url(meta_data): + """ + Get license URL from the metadata. + + If the license_url is not provided, we'll try to generate it elsewhere + from the `license` and `license_version`. + """ + if meta_data and "license_url" in meta_data: + return meta_data["license_url"] + else: + return None + + @staticmethod + def get_maturity(meta_data, api_maturity_flag): + """ + Determine whether a work has been labeled for mature audiences only. + + :param meta_data: The metadata column, which may have a 'mature' + flag. + :param api_maturity_flag: An API layer flag that indicates we have + manually labeled a work as mature ourselves. If it is True, + we will ignore the meta_data column and mark the work 'mature'. + :return: + """ + _mature = False + if meta_data and "mature" in meta_data: + _mature = meta_data["mature"] + if api_maturity_flag: + _mature = True + return _mature + + @staticmethod + def get_authority_boost(meta_data, source): + authority_boost = None + if meta_data and "authority_boost" in meta_data: + try: + authority_boost = float(meta_data["authority_boost"]) + authority_boost = _verify_rank_feature(authority_boost, low=0, high=100) + except (ValueError, TypeError): + pass + else: + authority_boost = get_authority_boost(source) + return authority_boost + + @staticmethod + def get_popularity(raw): + if not raw: + return None + popularity = raw * 100 + return _verify_rank_feature(popularity, low=0, high=100) + + @staticmethod + def parse_detailed_tags(json_tags): + if not json_tags: + return [] + parsed_tags = [] + for tag in json_tags: + if "name" in tag: + parsed_tag = {"name": tag["name"]} + if "accuracy" in tag: + parsed_tag["accuracy"] = tag["accuracy"] + parsed_tags.append(parsed_tag) + return parsed_tags + + +class Image(Media): + """ + Represents an image in Elasticsearch. + + Note that actual mappings are defined in the schema. + """ + + class AspectRatios(Enum): + """Also defined in ``api/api/constants/field_values.py``.""" + + TALL = auto() + WIDE = auto() + SQUARE = auto() + + class ImageSizes(Enum): + """ + Maximum threshold for each image size band. + + These sizes are also defined in + ``api/catalog/api/constants/field_values.py.`` + """ + + SMALL = 640 * 480 + MEDIUM = 1600 * 900 + LARGE = float("inf") + + class Index: + name = "image" + + @staticmethod + def database_row_to_elasticsearch_doc(row, schema): + extension = Image.get_extension(row[schema["url"]]) + height = row[schema["height"]] + width = row[schema["width"]] + aspect_ratio = Image.get_aspect_ratio(height, width) + size = Image.get_size(height, width) + attrs = Image.get_instance_attrs(row, schema) + + return Image( + aspect_ratio=aspect_ratio, + extension=extension, + size=size, + **attrs, + ) + + @staticmethod + def get_aspect_ratio(height, width): + if height is None or width is None: + return None + elif height > width: + aspect_ratio = Image.AspectRatios.TALL.name + elif height < width: + aspect_ratio = Image.AspectRatios.WIDE.name + else: + aspect_ratio = Image.AspectRatios.SQUARE.name + return aspect_ratio.lower() + + @staticmethod + def get_extension(url): + """ + Get the extension from the last segment of the URL separated by a dot. + + TODO: Use the `filetype` field once the following issue is completed: + https://github.com/WordPress/openverse-catalog/issues/536 + """ + extension = url.split(".")[-1].lower() + if not extension or "/" in extension: + return None + else: + return extension + + @staticmethod + def get_size(height, width): + if height is None or width is None: + return None + resolution = height * width + for size in Image.ImageSizes: + if resolution < size.value: + return size.name.lower() + + +class Audio(Media): + """ + Represents an audio in Elasticsearch. + + Note that actual mappings are defined in the schema. + """ + + class Durations(Enum): + """ + Maximum threshold for each audio duration band. + + These durations are also defined in + ``api/catalog/api/constants/field_values.py.`` + """ + + SHORTEST = 30 * 1e3 # under 30 seconds + SHORT = 2 * 60 * 1e3 # 30 seconds - 2 minutes + MEDIUM = 10 * 60 * 1e3 # 2 - 10 minutes + LONG = float("inf") # longer than 10 minutes + + class Index: + name = "audio" + + @staticmethod + def database_row_to_elasticsearch_doc(row, schema): + alt_files = row[schema["alt_files"]] + filetype = row[schema["filetype"]] + extension = Audio.get_extensions(filetype, alt_files) + attrs = Audio.get_instance_attrs(row, schema) + length = Audio.get_length(row[schema["duration"]]) + + return Audio( + length=length, + filetype=filetype, + extension=extension, + **attrs, + ) + + @staticmethod + def get_extensions(filetype, alt_files): + if not alt_files: + return filetype + + return [file["filetype"] for file in alt_files] + + @staticmethod + def get_length(duration): + if not duration: + return None + for length in Audio.Durations: + if duration < length.value: + return length.name.lower() + + +# Table name -> Elasticsearch model +media_type_to_elasticsearch_model = { + "image": Image, + "audio": Audio, +} diff --git a/indexer_worker/indexer_worker/es_helpers.py b/indexer_worker/indexer_worker/es_helpers.py new file mode 100644 index 00000000000..1e26cb8fe44 --- /dev/null +++ b/indexer_worker/indexer_worker/es_helpers.py @@ -0,0 +1,51 @@ +import logging as log +import time + +from decouple import config +from elasticsearch import ConnectionError as EsConnectionError +from elasticsearch import Elasticsearch + + +def elasticsearch_connect(timeout: int = 300) -> Elasticsearch: + """ + Repeatedly try to connect to Elasticsearch until successful. + + :param timeout: the amount of time in seconds to wait for a successful connection + :return: an Elasticsearch client. + """ + + while timeout > 0: + try: + return _elasticsearch_connect() + except EsConnectionError as err: + log.exception(err) + log.error("Reconnecting to Elasticsearch in 5 seconds...") + timeout -= 5 + time.sleep(5) + continue + + +def _elasticsearch_connect() -> Elasticsearch: + """ + Connect to an Elasticsearch indices at the configured domain. + + This method also handles AWS authentication using the AWS access key ID and the + secret access key. + + :return: an Elasticsearch client + """ + + es_scheme = config("ELASTICSEARCH_SCHEME", default="http://") + es_url = config("ELASTICSEARCH_URL", default="localhost") + es_port = config("ELASTICSEARCH_PORT", default=9200, cast=int) + + es_endpoint = f"{es_scheme}{es_url}:{es_port}" + + timeout = 12 # hours + + es = Elasticsearch( + es_endpoint, + timeout=timeout * 3600, # seconds + ) + es.info() + return es diff --git a/indexer_worker/indexer_worker/indexer.py b/indexer_worker/indexer_worker/indexer.py new file mode 100644 index 00000000000..13bc5d42c38 --- /dev/null +++ b/indexer_worker/indexer_worker/indexer.py @@ -0,0 +1,179 @@ +import logging as log +import time +from collections import deque + +import elasticsearch +from decouple import config +from elasticsearch import helpers + +from indexer_worker.db_helpers import database_connect +from indexer_worker.elasticsearch_models import ( + media_type_to_elasticsearch_model, +) +from indexer_worker.es_helpers import elasticsearch_connect +from indexer_worker.queries import get_reindex_query + + +# The number of database records to load in memory at once. +DB_BUFFER_SIZE = config("DB_BUFFER_SIZE", default=100000, cast=int) + + +def launch_reindex( + model_name: str, + table_name: str, + target_index: str, + start_id: int, + end_id: int, + progress: float, + finish_time: int, +): + """ + Copy data from the given PostgreSQL table to the given Elasticsearch index. + + Required Arguments: + + model_name: the name of the ES models to use to generate the ES docs + table_name: the name of the PostgreSQL table from which to copy data + target_index: the name of the Elasticsearch index to which to upload data + start_id: the index of the first record to be copied + end_id: the index of the last record to be copied + progress: tracks the percentage of records that have been copied so far + finish_time: the time at which the task finishes + """ + try: + reindex(model_name, table_name, target_index, start_id, end_id, progress) + finish_time.value = time.time() + except Exception as err: + exception_type = f"{err.__class__.__module__}.{err.__class__.__name__}" + log.error( + f":x_red: Error in worker while reindexing `{target_index}`" + f"(`{exception_type}`): \n" + f"```\n{err}\n```" + ) + log.error("Indexing error occurred: ", exc_info=True) + + +def reindex( + model_name: str, + table_name: str, + target_index: str, + start_id: int, + end_id: int, + progress: float, +): + # Enable writing to Postgres so we can create a server-side cursor. + pg_conn = database_connect() + es_conn = elasticsearch_connect() + + query = get_reindex_query(model_name, table_name, start_id, end_id) + + total_indexed_so_far = 0 + with pg_conn.cursor(name=f"{table_name}_indexing_cursor") as server_cur: + server_cur.itersize = DB_BUFFER_SIZE + server_cur.execute(query) + + # Keep track of how many documents have been indexed so far + num_converted_documents = 0 + # Number of documents we expect to index + num_to_index = end_id - start_id + + # Repeatedly fetch chunks and push to Elasticsearch until we run + # out of data. + while True: + # Fetch chunk of records + dl_start_time = time.time() + chunk = server_cur.fetchmany(server_cur.itersize) + + if not chunk: + log.info("No data left to process.") + break + + dl_end_time = time.time() - dl_start_time + dl_rate = len(chunk) / dl_end_time + log.info( + f"PSQL indexer down: batch_size={len(chunk)}, " + f"downloaded_per_second={dl_rate}" + ) + + # Create a batch of Elasticsearch documents from the fetched + # records + es_batch = pg_chunk_to_es( + pg_chunk=chunk, + columns=server_cur.description, + model_name=model_name, + target_index=target_index, + ) + + # Bulk upload to Elasticsearch in parallel. + log.info(f"Pushing {len(es_batch)} docs to Elasticsearch.") + push_start_time = time.time() + try: + _bulk_upload(es_conn, es_batch) + except ValueError: + log.error("Failed to index chunk.") + + upload_time = time.time() - push_start_time + upload_rate = len(es_batch) / upload_time + log.info( + f"Elasticsearch up: batch_size={len(es_batch)}," + f" uploaded_per_second={upload_rate}" + ) + + num_converted_documents += len(chunk) + total_indexed_so_far += len(chunk) + if progress is not None: + progress.value = (total_indexed_so_far / num_to_index) * 100 + + log.info( + f"Synchronized {num_converted_documents} from " + f"table '{table_name}' to Elasticsearch" + ) + pg_conn.commit() + pg_conn.close() + + +def pg_chunk_to_es(pg_chunk, columns, model_name, target_index): + """Convert the given list of psycopg2 results to Elasticsearch documents.""" + + # Map column names to locations in the row tuple + schema = {col[0]: idx for idx, col in enumerate(columns)} + model = media_type_to_elasticsearch_model.get(model_name) + if model is None: + log.error(f"Table {model_name} is not defined in elasticsearch_models.") + return [] + + documents = [] + for row in pg_chunk: + if not (row[schema["removed_from_source"]] or row[schema["deleted"]]): + converted = model.database_row_to_elasticsearch_doc(row, schema) + converted = converted.to_dict(include_meta=True) + if target_index: + converted["_index"] = target_index + documents.append(converted) + + return documents + + +def _bulk_upload(es_conn, es_batch): + max_attempts = 4 + attempts = 0 + # Initial time to wait between indexing attempts + # Grows exponentially + cooloff = 5 + while True: + try: + deque(helpers.parallel_bulk(es_conn, es_batch, chunk_size=400)) + except elasticsearch.ApiError: + # Something went wrong during indexing. + log.warning( + f"Elasticsearch rejected bulk query. We will retry in" + f" {cooloff}s. Attempt {attempts}. Details: ", + exc_info=True, + ) + time.sleep(cooloff) + cooloff *= 2 + if attempts >= max_attempts: + raise ValueError("Exceeded maximum bulk index retries") + attempts += 1 + continue + break diff --git a/indexer_worker/indexer_worker/queries.py b/indexer_worker/indexer_worker/queries.py new file mode 100644 index 00000000000..9f6eae56dda --- /dev/null +++ b/indexer_worker/indexer_worker/queries.py @@ -0,0 +1,48 @@ +from psycopg2.sql import SQL, Identifier, Literal + + +def get_existence_queries(model_name: str, table_name: str) -> tuple[SQL, SQL]: + """ + Get the query for checking whether a media is deleted or mature. + + Returns two SQL queries for checking if an identifier exists in the deleted or the + mature tables for the media respectively. The media tables are assumed to be named + with the prefixes "api_deleted" and "api_mature" respectively. + + Required Arguments: + + model_name: the name to use for the deleted and mature tables + table_name: the name of the media table to check entries in + """ + exists_in_table = ( + "EXISTS(SELECT 1 FROM {table} " "WHERE identifier = {identifier}) AS {name}" + ) + exists_in_deleted_table = SQL(exists_in_table).format( + table=Identifier(f"api_deleted{model_name}"), + identifier=Identifier(table_name, "identifier"), + name=Identifier("deleted"), + ) + exists_in_mature_table = SQL(exists_in_table).format( + table=Identifier(f"api_mature{model_name}"), + identifier=Identifier(table_name, "identifier"), + name=Identifier("mature"), + ) + return exists_in_deleted_table, exists_in_mature_table + + +def get_reindex_query( + model_name: str, table_name: str, start_id: int, end_id: int +) -> SQL: + deleted, mature = get_existence_queries(model_name, table_name) + + return SQL( + "SELECT *, {deleted}, {mature} " + "FROM {table_name} " + "WHERE id BETWEEN {start_id} AND {end_id};" + ).format( + deleted=deleted, + mature=mature, + table_name=Identifier(table_name), + start_id=Literal(start_id), + end_id=Literal(end_id), + ) diff --git a/indexer_worker/indexer_worker/tasks.py b/indexer_worker/indexer_worker/tasks.py new file mode 100644 index 00000000000..89544003f97 --- /dev/null +++ b/indexer_worker/indexer_worker/tasks.py @@ -0,0 +1,65 @@ +"""Simple in-memory tracking of executed tasks.""" + +import datetime + + +def _time_fmt(timestamp: int) -> str | None: + """ + Format the timestamp into a human-readable date and time notation. + + :param timestamp: the timestamp to format + :return: the human-readable form of the timestamp + """ + + if timestamp == 0: + return None + return str(datetime.datetime.utcfromtimestamp(timestamp)) + + +class TaskTracker: + def __init__(self): + self.tasks = {} + + def add_task(self, task_id: str, **kwargs): + """ + Store information about a new task in memory. + + :param task: the task being performed + :param task_id: the UUID of the task + """ + self.tasks[task_id] = { + "start_time": datetime.datetime.utcnow().timestamp(), + } | kwargs + + def get_task_status(self, task_id: str) -> dict: + """ + Get the status of a single task with the given task ID. + + :param task_id: the ID of the task to get the status for + :return: response dictionary containing all relevant info about the task + """ + task_info = self.tasks[task_id] + active = task_info["task"].is_alive() + model = task_info["model"] + target_index = task_info["target_index"] + start_time = task_info["start_time"] + finish_time = task_info["finish_time"].value + progress = task_info["progress"].value + + return { + "task_id": task_id, + "active": active, + "model": model, + "target_index": target_index, + "progress": progress, + "start_time": _time_fmt(start_time), + "finish_time": _time_fmt(finish_time), + # The task is considered to have errored if the task is no longer alive, + # but progress did not reach 100%. This can happen if an individual chunk + # of records fails to upload to ES. + "error": progress < 100 and not active, + } + + def get_task_list(self): + """Get all listed tasks.""" + return [self.get_task_status(task_id) for task_id in self.tasks.keys()] diff --git a/indexer_worker/justfile b/indexer_worker/justfile new file mode 100644 index 00000000000..5835da1dd54 --- /dev/null +++ b/indexer_worker/justfile @@ -0,0 +1,80 @@ +set dotenv-load := false + +COLOR := "\\033[0;34m" +NO_COLOR := "\\033[0m" +IS_CI := env_var_or_default("CI", "") + +# Show all available recipes +@_default: + printf "\n{{ COLOR }}# Indexer Worker (path: \`indexer_worker/\`)\n" + printf "============================{{ NO_COLOR }}\n" + just --list --unsorted + +########### +# Version # +########### + +export CATALOG_PY_VERSION := `grep '# PYTHON' requirements-prod.txt | awk -F= '{print $2}'` +export CATALOG_AIRFLOW_VERSION := `grep '^apache-airflow' requirements-prod.txt | awk -F= '{print $3}'` + +# Print the required Python version +@py-version: + echo $CATALOG_PY_VERSION + +########### +# Install # +########### + +# Install dependencies +install *args="--dev": + pipenv install {{ args }} + +###### +# Up # +###### + +# Bring up services specific to the indexer worker profile +up *flags: + env COMPOSE_PROFILES="indexer_worker" just ../up {{ flags }} + +########## +# Health # +########## + +# Check the health of the service +@health host="localhost:50282": + -curl -s -o /dev/null -w '%{http_code}' 'http://{{ host }}/healthcheck' + +# Wait for the service to be healthy +@wait host="localhost:50282": + # The just command on the second line is executed in the context of the + # parent directory and so must be prefixed with `ingestion_server/`. + just ../_loop \ + '"$(just indexer_worker/health {{ host }})" != "200"' \ + "Waiting for the indexer-worker to be healthy..." + +######## +# cURL # +######## + +# Make a cURL POST request to the service with the given data +curl-post data host="localhost:50281": + STATUS_CODE=$(curl \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{{ data }}' \ + -o /dev/stderr \ + -w "%{http_code}" \ + 'http://{{ host }}/task'); \ + if [ $STATUS_CODE -lt 200 ] || [ $STATUS_CODE -ge 300 ]; then \ + echo "Status: $STATUS_CODE"; \ + exit 1; \ + fi + +######### +# Tests # +######### + +# Run indexer-worker tests locally +test-local *args: + pipenv run pytest {{ args }} diff --git a/indexer_worker/pytest.ini b/indexer_worker/pytest.ini new file mode 100644 index 00000000000..ce6e0538edf --- /dev/null +++ b/indexer_worker/pytest.ini @@ -0,0 +1,9 @@ +[pytest] +pythonpath = . + +filterwarnings= +# Ignore warnings related to unverified HTTPS requests. +# Reason: This warning is suppressed to avoid raising warnings when making HTTP requests +# to servers with invalid or self-signed SSL certificates. It allows the tests to proceed +# without being interrupted by these warnings. + ignore:Unverified HTTPS request diff --git a/indexer_worker/tests/__init__.py b/indexer_worker/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/indexer_worker/tests/test_es_audio_model.py b/indexer_worker/tests/test_es_audio_model.py new file mode 100644 index 00000000000..3fe322558f7 --- /dev/null +++ b/indexer_worker/tests/test_es_audio_model.py @@ -0,0 +1,10 @@ +from tests.utils import create_mock_audio + + +class TestAudio: + @staticmethod + def test_extension(): + single_file = create_mock_audio({"alt_files": None}) + assert single_file.extension == "mp3" + alt_files = create_mock_audio() + assert alt_files.extension == ["m4a"] diff --git a/indexer_worker/tests/test_es_image_model.py b/indexer_worker/tests/test_es_image_model.py new file mode 100644 index 00000000000..1aee19a7155 --- /dev/null +++ b/indexer_worker/tests/test_es_image_model.py @@ -0,0 +1,46 @@ +from indexer_worker.elasticsearch_models import Image +from tests.utils import create_mock_image + + +class TestImage: + @staticmethod + def test_size(): + small = create_mock_image({"height": 600, "width": 300}) + assert small.size == "small" + huge = create_mock_image({"height": 4096, "width": 4096}) + assert huge.size == "large" + + @staticmethod + def test_aspect_ratio(): + square = create_mock_image({"height": 300, "width": 300}) + assert square.aspect_ratio == "square" + tall = create_mock_image({"height": 500, "width": 200}) + assert tall.aspect_ratio == Image.AspectRatios.TALL.name.lower() + wide = create_mock_image({"height": 200, "width": 500}) + assert wide.aspect_ratio == Image.AspectRatios.WIDE.name.lower() + + @staticmethod + def test_extension(): + no_extension = create_mock_image({"url": "https://creativecommons.org/hello"}) + assert no_extension.extension is None + jpg = create_mock_image({"url": "https://creativecommons.org/hello.jpg"}) + assert jpg.extension == "jpg" + + @staticmethod + def test_mature_metadata(): + # Received upstream indication the work is mature + meta = {"mature": True} + mature_metadata = create_mock_image({"meta_data": meta}) + assert mature_metadata["mature"] + + @staticmethod + def test_mature_api(): + # Manually flagged work as mature ourselves + mature_work = create_mock_image({"mature": True}) + assert mature_work["mature"] + + @staticmethod + def test_default_maturity(): + # Default to not flagged + sfw = create_mock_image() + assert not sfw["mature"] diff --git a/indexer_worker/tests/utils.py b/indexer_worker/tests/utils.py new file mode 100644 index 00000000000..5764f6c063c --- /dev/null +++ b/indexer_worker/tests/utils.py @@ -0,0 +1,112 @@ +import datetime +from uuid import uuid4 + +from indexer_worker.elasticsearch_models import Audio, Image + + +def create_mock_audio(override=None): + """ + Produce a mock audio. + + Override default fields by passing in a dict with the desired keys and values. + For example, to make an image with a custom title and default everything + else: + >>> create_mock_audio({'title': 'My title'}) + :return: + """ + + test_popularity = {"downloads": 3} + license_url = "http://creativecommons.org/publicdomain/zero/1.0/" + meta_data = {"popularity_metrics": test_popularity, "license_url": license_url} + test_data = { + "id": 0, + "title": "Torneira de água", + "identifier": str(uuid4()), + "creator": "MartaSarmento", + "creator_url": "https://freesound.org/people/MartaSarmento", + "tags": [{"name": "test", "accuracy": 0.9}], + "created_on": datetime.datetime.now(), + "foreign_landing_url": "https://freesound.org/people/MartaSarmento/sounds/509257", + "url": "https://cdn.freesound.org/previews/509/509257_11129467-hq.mp3", + "genres": ["genre1", "genre2"], + "category": "test_category", + "duration": 8544, + "bit_rate": 128000, + "sample_rate": None, + "filesize": 168919, + "filetype": "mp3", + "provider": "test", + "source": "test", + "license": "cc0", + "license_version": "1.0", + "mature": False, + "meta_data": meta_data, + "alt_files": [ + { + "url": "https://freesound.org/apiv2/sounds/509257/download/", + "bit_rate": "70000", + "filesize": "75591", + "filetype": "m4a", + "sample_rate": "44100", + } + ], + } + if override: + for k, v in override.items(): + test_data[k] = v + schema = {} + row = [] + idx = 0 + for k, v in test_data.items(): + schema[k] = idx + row.append(v) + idx += 1 + return Audio.database_row_to_elasticsearch_doc(row, schema) + + +def create_mock_image(override=None): + """ + Produce a mock image. + + Override default fields by passing in a dict with the desired keys and values. + For example, to make an image with a custom title and default everything + else: + >>> create_mock_image({'title': 'My title'}) + :return: + """ + + test_popularity = {"views": 50, "likes": 3, "comments": 1} + license_url = "https://creativecommons.org/licenses/by/2.0/fr/legalcode" + meta_data = {"popularity_metrics": test_popularity, "license_url": license_url} + test_data = { + "id": 0, + "title": "Unit test title", + "identifier": str(uuid4()), + "creator": "Eric Idle", + "creator_url": "https://creativecommons.org", + "tags": [{"name": "test", "accuracy": 0.9}], + "created_on": datetime.datetime.now(), + "url": "https://creativecommons.org", + "thumbnail": "https://creativecommons.org", + "provider": "test", + "source": "test", + "license": "cc-by", + "license_version": "4.0", + "foreign_landing_url": "https://creativecommons.org", + "view_count": 0, + "height": 500, + "width": 500, + "mature": False, + "meta_data": meta_data, + } + if override: + for k, v in override.items(): + test_data[k] = v + schema = {} + row = [] + idx = 0 + for k, v in test_data.items(): + schema[k] = idx + row.append(v) + idx += 1 + return Image.database_row_to_elasticsearch_doc(row, schema) diff --git a/justfile b/justfile index 103400c20be..82187015176 100644 --- a/justfile +++ b/justfile @@ -20,6 +20,7 @@ DC_USER := env_var_or_default("DC_USER", "opener") cd catalog && just cd api && just cd ingestion_server && just + cd indexer_worker && just cd frontend && just cd automations/python && just cd automations/js && just @@ -144,6 +145,7 @@ lint-codeowners checks="stable": # First-party services ([ ! -f catalog/.env ] && cp catalog/env.template catalog/.env) || true ([ ! -f ingestion_server/.env ] && cp ingestion_server/env.template ingestion_server/.env) || true + ([ ! -f indexer_worker/.env ] && cp indexer_worker/env.template indexer_worker/.env) || true ([ ! -f api/.env ] && cp api/env.template api/.env) || true ########## diff --git a/pyproject.toml b/pyproject.toml index 17da4d086b1..65b336bf2db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ known-first-party = [ "conf", "api", "ingestion_server", + "indexer_worker", "common", "data_refresh", "database",