Skip to content

Commit

Permalink
Feature/pdct 1538 bulk import make async (#231)
Browse files Browse the repository at this point in the history
* Add navigator-notify dependency and update python

* Fix upgrade to python 3.10 issues

* Add notification service

* Make integration tests use the aws s3 mock

* Rename

* Fix test and change bcrypt version

* Fix logging

* Save local bulk import results to a dir that is ignored by git

* Bump python in Dockerfile

* Remove print
  • Loading branch information
annaCPR authored Oct 14, 2024
1 parent 8f410be commit b08b68d
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 55 deletions.
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ SECRET_KEY=secret_test_key
API_HOST=http://navigator-admin-backend:8888

# AWS
INGEST_JSON_BUCKET=test_bucket
BULK_IMPORT_BUCKET=skip
AWS_ACCESS_KEY_ID=test
_AWS_SECRET_ACCESS_KEY=test

# Slack Notifications
SLACK_WEBHOOK_URL=skip
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
bulk_import_results/

# Translations
*.mo
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim

WORKDIR /usr/src
ENV PYTHONPATH=/usr/src
Expand Down
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ async def ingest(
:param UploadFile new_data: file containing json representation of data to ingest.
:return Json: json representation of the data to ingest.
"""
_LOGGER.info(f"Starting bulk import for corpus: {corpus_import_id}")
_LOGGER.info(f"Received bulk import request for corpus: {corpus_import_id}")

try:
content = await new_data.read()
Expand Down
9 changes: 6 additions & 3 deletions app/clients/aws/s3bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,16 @@ def upload_ingest_json_to_s3(
:param str corpus_import_id: The import_id of the corpus the ingest data belongs to.
:param dict[str, Any] json_data: The ingest json data to be uploaded to S3.
"""
ingest_upload_bucket = os.environ["INGEST_JSON_BUCKET"]
_LOGGER.info(os.getenv("BULK_IMPORT_BUCKET", "Not there"))
ingest_upload_bucket = os.environ["BULK_IMPORT_BUCKET"]
current_timestamp = datetime.now().strftime("%m-%d-%YT%H:%M:%S")

filename = f"{ingest_id}-{corpus_import_id}-{current_timestamp}.json"

if ingest_upload_bucket == "skip":
with open(filename, "w") as f:
json.dump(data, f, indent=4)
os.makedirs("bulk_import_results", exist_ok=True)
with open(f"bulk_import_results/{filename}", "w") as file:
json.dump(data, file, indent=4)
return

s3_client = get_s3_client()
Expand Down
15 changes: 11 additions & 4 deletions app/service/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import app.repository.family as family_repository
import app.service.corpus as corpus
import app.service.geography as geography
import app.service.notification as notification_service
import app.service.validation as validation
from app.clients.aws.s3bucket import upload_ingest_json_to_s3
from app.model.ingest import (
Expand Down Expand Up @@ -224,6 +225,10 @@ def import_data(data: dict[str, Any], corpus_import_id: str) -> None:
:raises RepositoryError: raised on a database error.
:raises ValidationError: raised should the data be invalid.
"""
notification_service.send_notification(
f"🚀 Bulk import for corpus: {corpus_import_id} has started."
)

ingest_uuid = uuid4()
upload_ingest_json_to_s3(f"{ingest_uuid}-request", corpus_import_id, data)

Expand Down Expand Up @@ -254,16 +259,18 @@ def import_data(data: dict[str, Any], corpus_import_id: str) -> None:
_LOGGER.info("Saving events")
result["events"] = save_events(event_data, corpus_import_id, db)

_LOGGER.info(
f"Bulk import for corpus: {corpus_import_id} successfully completed"
)

upload_ingest_json_to_s3(f"{ingest_uuid}-result", corpus_import_id, result)

notification_service.send_notification(
f"🎉 Bulk import for corpus: {corpus_import_id} successfully completed."
)
except Exception as e:
_LOGGER.error(
f"Rolling back transaction due to the following error: {e}", exc_info=True
)
db.rollback()
notification_service.send_notification(
f"💥 Bulk import for corpus: {corpus_import_id} has failed."
)
finally:
db.commit()
16 changes: 16 additions & 0 deletions app/service/notification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import logging
import os

from notify.slack import slack_message

_LOGGER = logging.getLogger(__name__)
_LOGGER.setLevel(logging.DEBUG)


def send_notification(notification: str):
try:
_LOGGER.info(notification)
if os.environ["SLACK_WEBHOOK_URL"] != "skip":
slack_message(notification)
except Exception as e:
_LOGGER.error(f"Error sending notification caused by: {e}")
120 changes: 91 additions & 29 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
[tool.poetry]
name = "admin_backend"
version = "2.17.0"
version = "2.17.1"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]

[tool.poetry.dependencies]
python = "^3.9"
python = "^3.10"
fastapi = "^0.103.0"
fastapi-health = "^0.4.0"
fastapi-pagination = "^0.12.9"
Expand All @@ -30,9 +30,11 @@ moto = "^4.2.2"
types-sqlalchemy = "^1.4.53.38"
urllib3 = "^1.26.17"
db-client = { git = "https://github.com/climatepolicyradar/navigator-db-client.git", tag = "v3.8.18" }
navigator-notify = { git = "https://github.com/climatepolicyradar/navigator-notify.git", tag = "v0.0.2-beta" }
bcrypt = "4.0.1"

[tool.poetry.dev-dependencies]
pre-commit = "^2.17.0"
pre-commit = "^3.8.0"
python-dotenv = "^0.19.2"
pytest = "^8.3.2"
pytest-dotenv = "^0.5.2"
Expand Down Expand Up @@ -67,5 +69,5 @@ exclude = "^/alembic/versions/"
[tool.pyright]
include = ["app", "tests"]
exclude = ["**/__pycache__"]
pythonVersion = "3.9"
pythonVersion = "3.10"
venv = "admin-backend"
Loading

0 comments on commit b08b68d

Please sign in to comment.