Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Merge pull request #137 from WordPress/ingestion_docker
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvkb authored Jul 10, 2021
2 parents be6df89 + df29349 commit 37c15e0
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 16 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,7 @@ ccbot/crawl_plan.yml
.DS_Store
*.iml
.idea

# Ingestion server
ingestion_server/ingestion_server/db
ingestion_server/ingestion_server/lock
8 changes: 6 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ services:
- es
- indexer-worker
volumes:
- ./ingestion_server:/ingestion-server
- ./ingestion_server:/ingestion_server
environment:
PYTHONUNBUFFERED: "0"
ELASTICSEARCH_URL: 'es'
Expand All @@ -114,6 +114,8 @@ services:
DB_BUFFER_SIZE: '100000'
COPY_TABLES: 'image'
SYNCER_POLL_INTERVAL: '60'
LOCK_PATH: /worker_state/lock
SHELF_PATH: /worker_state/db
stdin_open: true
tty: true

Expand All @@ -127,7 +129,7 @@ services:
- db
- es
volumes:
- ./ingestion_server:/ingestion-server
- ./ingestion_server:/ingestion_server
environment:
PYTHONUNBUFFERED: "0"
ELASTICSEARCH_URL: 'es'
Expand All @@ -142,6 +144,8 @@ services:
DB_BUFFER_SIZE: '100000'
COPY_TABLES: 'image'
SYNCER_POLL_INTERVAL: '60'
LOCK_PATH: /worker_state/lock
SHELF_PATH: /worker_state/db
stdin_open: true
tty: true

Expand Down
22 changes: 15 additions & 7 deletions ingestion_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,35 @@ FROM python:3.9

ENV PYTHONBUFFERED 1

RUN groupadd --system supervisord && useradd --system --gid supervisord supervisord
RUN groupadd --system supervisord \
&& useradd --system --gid supervisord supervisord

RUN apt-get update \
&& apt-get install -y supervisor \
&& mkdir -p /var/log/supervisord/ \
&& chown -R supervisord:supervisord /var/log/supervisord
&& chown -R supervisord:supervisord /var/log/supervisord \
&& mkdir /ingestion_server \
&& mkdir /worker_state \
&& chown -R supervisord:supervisord /worker_state

ENV PYTHONPATH=$PYTHONPATH:/ingestion_server/

WORKDIR /ingestion_server

# Install Python dependency management tools
RUN pip install --upgrade pip \
&& pip install --upgrade setuptools \
&& pip install --upgrade pipenv

# Copy all files into the container
COPY . /ingestion_server/
WORKDIR /ingestion_server
RUN chown -R supervisord:supervisord /ingestion_server
ENV PYTHONPATH=$PYTHONPATH:/ingestion_server/
# Copy the Pipenv files into the container
COPY Pipfile /ingestion_server/
COPY Pipfile.lock /ingestion_server/

# Install the dependencies system-wide
# TODO: Use build args to avoid installing dev dependencies in production
RUN pipenv install --deploy --system --dev

USER supervisord
EXPOSE 8001

CMD ["supervisord", "-c", "/ingestion_server/config/supervisord.conf"]
18 changes: 14 additions & 4 deletions ingestion_server/Dockerfile-worker
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,26 @@ FROM python:3.9

ENV PYTHONBUFFERED 1

RUN mkdir /ingestion_server \
&& mkdir /worker_state

ENV PYTHONPATH=$PYTHONPATH:/ingestion_server/

WORKDIR /ingestion_server

# Install Python dependency management tools
RUN pip install --upgrade pip \
&& pip install --upgrade setuptools \
&& pip install --upgrade pipenv

# Copy all files into the container
COPY . /ingestion_server/
WORKDIR /ingestion_server
ENV PYTHONPATH=$PYTHONPATH:/ingestion_server/
# Copy the Pipenv files into the container
COPY Pipfile /ingestion_server/
COPY Pipfile.lock /ingestion_server/

# Install the dependencies system-wide
# TODO: Use build args to avoid installing dev dependencies in production
RUN pipenv install --deploy --system --dev

EXPOSE 8002

CMD gunicorn indexer_worker:api -b 0.0.0.0:8002 --reload --access-logfile '-' --error-logfile '-' --chdir ./ingestion_server/
11 changes: 8 additions & 3 deletions ingestion_server/ingestion_server/state.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import shelve
import datetime
import enum
Expand All @@ -14,6 +15,10 @@
"""


lock_path = os.getenv('LOCK_PATH', 'lock')
shelf_path = os.getenv('SHELF_PATH', 'db')


class WorkerStatus(enum.Enum):
RUNNING = 0
FINISHED = 1
Expand All @@ -30,7 +35,7 @@ def register_indexing_job(worker_ips, target_index):
promoted to production after indexing is complete
:return: Return True if scheduling succeeds
"""
with FileLock('lock'), shelve.open('db', writeback=True) as db:
with FileLock(lock_path), shelve.open(shelf_path, writeback=True) as db:
# Wipe last job out if it has finished.
indexing_in_progress = False
if 'worker_statuses' in db:
Expand Down Expand Up @@ -60,7 +65,7 @@ def worker_finished(worker_ip):
:param worker_ip: The private IP of the worker.
:return: The target index if all workers are finished, else False.
"""
with FileLock('lock'), shelve.open('db', writeback=True) as db:
with FileLock(lock_path), shelve.open(shelf_path, writeback=True) as db:
try:
_ = db['worker_statuses'][worker_ip]
db['worker_statuses'][worker_ip] = WorkerStatus.FINISHED
Expand All @@ -81,7 +86,7 @@ def clear_state():
"""
Forget about all running index jobs. Use with care.
"""
with FileLock('lock'), shelve.open('db', writeback=True) as db:
with FileLock(lock_path), shelve.open(shelf_path, writeback=True) as db:
for key in db:
log.info('Deleting ' + str(db[key]))
del db[key]
Expand Down

0 comments on commit 37c15e0

Please sign in to comment.