-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Set up basic docker container that can be reached from airflow * Get reindexing task working * Add task tracking, clean up * Add readme, cleanup unused variables * Clean up * Remove reference to ingestion server * Move indexer-worker out of catlog for easier testing & management * Copy in relevant test files * Move indexer worker out of catalog module, add tests * Remove integration tests for now, clean up unit tests * Add indexer worker to catalog codeowners * Fix bad merge, remove log statements * Move utility function
- Loading branch information
Showing
30 changed files
with
2,425 additions
and
2 deletions.
There are no files selected for viewing
Validating CODEOWNERS rules …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import pytest | ||
|
||
from legacy_data_refresh.reporting import ( | ||
report_record_difference, | ||
report_status, | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"before, after, expected_in_message", | ||
[ | ||
[ | ||
{"src1": 1, "src2": 19}, | ||
{"src1": 2, "src2": 38}, | ||
["20 → 40", "+20 (+100.000000%", "`src1`:+1", "`src2`:+19"], | ||
], | ||
[ | ||
{"src1": 1, "src2": 19}, | ||
{"src1": 3, "src2": 57, "src3": 20}, | ||
["20 → 80", "+60 (+300.000000%", "`src1`:+2", "`src2`:+38", "`src3`:+20"], | ||
], | ||
[ | ||
{"src1": 4, "src2": 21}, | ||
{"src1": 4}, | ||
# Unchanged source count shouldn't show up | ||
["25 → 4", "-21 (-84.000000%", "`src2`:-21"], | ||
], | ||
[ | ||
{"src1": 4000, "src2": 20}, | ||
{"src1": 2000, "src2": 10}, | ||
["4,020 → 2,010", "-2,010 (-50.000000%", "`src1`:-2,000", "`src2`:-10"], | ||
], | ||
[ | ||
{}, | ||
{"src1": 10, "src2": 10}, | ||
["0 → 20", "+20 (+inf%", "`src1`:+10", "`src2`:+10"], | ||
], | ||
[ | ||
{"src1": 10, "src2": 10}, | ||
{}, | ||
["20 → 0", "-20 (-100.000000%", "`src1`:-10", "`src2`:-10"], | ||
], | ||
[ | ||
{"src1": 5000000000}, | ||
{"src1": 4938271605}, | ||
["5,000,000,000 → 4,938,271,605", "-61,728,395 (-1.234568%"], | ||
], | ||
[{"src1": 4}, {"src1": 4}, ["Sources not listed had no change in count"]], | ||
[{}, {}, ["Both indices missing? No breakdown to show"]], | ||
], | ||
) | ||
def test_record_reporting(before, after, expected_in_message): | ||
actual = report_record_difference(before, after, "media", "dag_id") | ||
assert isinstance(expected_in_message, list), ( | ||
"Value for 'expected_in_message' should be a list, " | ||
"a string may give a false positive" | ||
) | ||
for expected in expected_in_message: | ||
assert expected in actual | ||
|
||
|
||
def test_report_status(): | ||
actual = report_status("image", "This is my message", "sample_dag_id") | ||
assert actual == "`image`: This is my message" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Exclude everything and only include certain files | ||
* | ||
!indexer_worker | ||
!gunicorn* | ||
!Pipfile* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
# Automatically build image using Python version specified in the `Pipfile`. | ||
ARG CATALOG_PY_VERSION | ||
|
||
################## | ||
# Python builder # | ||
################## | ||
|
||
FROM docker.io/python:${CATALOG_PY_VERSION} as builder | ||
|
||
# Container optimizations | ||
ENV PYTHONUNBUFFERED=1 | ||
ENV PIP_NO_CACHE_DIR=1 | ||
ENV PIP_NO_COLOR=1 | ||
|
||
# Activate the virtualenv | ||
ENV PATH="/venv/bin:$PATH" | ||
|
||
# - Install system packages needed for building Python dependencies | ||
# - Create a virtualenv inside `/venv` | ||
# - Install Pipenv to install Python dependencies | ||
RUN apt-get update \ | ||
&& apt-get install -y python3-dev \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& python -m venv /venv \ | ||
&& pip install --upgrade pipenv | ||
|
||
# Copy the Pipenv files into the container | ||
COPY Pipfile Pipfile.lock ./ | ||
|
||
# Install Python dependencies system-wide (uses the active virtualenv) | ||
RUN pipenv install --system --deploy --dev | ||
|
||
#################### | ||
# Indexer worker # | ||
#################### | ||
|
||
FROM docker.io/python:${CATALOG_PY_VERSION}-slim as ing | ||
|
||
LABEL org.opencontainers.image.source="https://github.com/WordPress/openverse" | ||
|
||
# Container optimizations | ||
ENV PYTHONUNBUFFERED=1 | ||
ENV PIP_NO_CACHE_DIR=1 | ||
ENV PIP_NO_COLOR=1 | ||
|
||
# Activate the virtualenv | ||
ENV PATH="/venv/bin:$PATH" | ||
|
||
ENV PYTHONPATH="$PYTHONPATH:/indexer_worker/" | ||
# TLDEXTRACT fails to cache in /home/supervisord, set its cache to /tmp instead | ||
ENV TLDEXTRACT_CACHE="/tmp/python-tldextract" | ||
|
||
WORKDIR /indexer_worker | ||
|
||
# Copy virtualenv from the builder image | ||
COPY --from=builder /venv /venv | ||
|
||
# - Install system packages needed for running Python dependencies | ||
# - libpq-dev: required by `psycopg2` | ||
# - Create directory for holding worker state | ||
RUN apt-get update \ | ||
&& apt-get install -y curl libpq-dev \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& mkdir /worker_state | ||
|
||
# Create a non-root user | ||
RUN useradd ingestionu | ||
RUN chown ingestionu /worker_state | ||
USER ingestionu | ||
|
||
# Copy code into the final image | ||
COPY --chown=ingestionu . /indexer_worker/ | ||
|
||
# Expose Gunicorn server to indexer worker Falcon app | ||
EXPOSE 8003 | ||
|
||
ARG SEMANTIC_VERSION | ||
ENV SENTRY_RELEASE=$SEMANTIC_VERSION | ||
|
||
CMD ["gunicorn", "--bind", "0.0.0.0:8003", "api:api"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[[source]] | ||
name = "pypi" | ||
url = "https://pypi.org/simple" | ||
verify_ssl = true | ||
|
||
[dev-packages] | ||
ipython = "~=8.16" | ||
pytest = "~=7.4" | ||
pytest-order = "~=1.1" | ||
pytest-sugar = "~=0.9" | ||
remote-pdb = "~=2.1" | ||
pook = "~=1.0" | ||
|
||
[packages] | ||
aws-requests-auth = "~=0.4" | ||
boto3 = "~=1.28" | ||
bottle = "~=0.12" | ||
elasticsearch = "==8.13.0" | ||
elasticsearch-dsl = "~=8.9" | ||
falcon = "~=3.1" | ||
filelock = "~=3.13" | ||
gunicorn = "~=22.0" | ||
jsonschema = "~=4.19" | ||
psycopg2 = "~=2.9" | ||
python-decouple = "~=3.8" | ||
PyYAML = "~=6.0" | ||
tldextract = "~=5.0" | ||
sentry-sdk = {extras = ["falcon"], version = "*"} | ||
|
||
[requires] | ||
python_version = "3.11" |
Oops, something went wrong.