From bb41dfa1cac8fdb245c247607ec9599fdea8edee Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Sat, 3 Feb 2024 22:55:22 +0100 Subject: [PATCH 1/6] feat: add scheduled project cleean up task --- backend/editor/api.py | 41 +++++------ backend/editor/controllers/node_controller.py | 9 +++ .../editor/controllers/project_controller.py | 29 +++++++- backend/editor/entries.py | 18 +---- backend/editor/github_functions.py | 14 ++++ backend/editor/graph_db.py | 16 ++--- backend/editor/scheduler.py | 39 +++++++++++ backend/poetry.lock | 69 ++++++++++++++++++- backend/pyproject.toml | 1 + 9 files changed, 185 insertions(+), 51 deletions(-) create mode 100644 backend/editor/controllers/node_controller.py create mode 100644 backend/editor/scheduler.py diff --git a/backend/editor/api.py b/backend/editor/api.py index f803b27e..54fe2e62 100644 --- a/backend/editor/api.py +++ b/backend/editor/api.py @@ -1,6 +1,7 @@ """ Taxonomy Editor Backend API """ +import contextlib import logging # Required imports @@ -25,11 +26,13 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse +from .scheduler import scheduler_lifespan + # DB helper imports from . import graph_db # Controller imports -from .controllers.project_controller import edit_project +from .controllers import project_controller from .entries import TaxonomyGraph # Custom exceptions @@ -49,7 +52,16 @@ log = logging.getLogger(__name__) -app = FastAPI(title="Open Food Facts Taxonomy Editor API") + +# Setup FastAPI app lifespan +@contextlib.asynccontextmanager +async def app_lifespan(app: FastAPI): + async with graph_db.database_lifespan(): + with scheduler_lifespan(): + yield + + +app = FastAPI(title="Open Food Facts Taxonomy Editor API", lifespan=app_lifespan) # Allow anyone to call the API from their own apps app.add_middleware( @@ -69,22 +81,6 @@ ) -@app.on_event("startup") -async def startup(): - """ - Initialize database - """ - graph_db.initialize_db() - - -@app.on_event("shutdown") -async def shutdown(): - """ - Shutdown database - """ - await graph_db.shutdown_db() - - @app.middleware("http") async def initialize_neo4j_transactions(request: Request, call_next): async with graph_db.TransactionCtx(): @@ -167,7 +163,7 @@ async def set_project_status( Set the status of a Taxonomy Editor project """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - result = await edit_project(taxonomy.project_name, ProjectEdit(status=status)) + result = await project_controller.edit_project(taxonomy.project_name, ProjectEdit(status=status)) return result @@ -505,11 +501,10 @@ async def delete_node(request: Request, branch: str, taxonomy_name: str): await taxonomy.delete_node(taxonomy.get_label(id), id) -@app.delete("/{taxonomy_name}/{branch}/delete") -async def delete_project(response: Response, branch: str, taxonomy_name: str): +@app.delete("/{taxonomy_name}/{branch}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_project(branch: str, taxonomy_name: str): """ Delete a project """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - result_data = await taxonomy.delete_taxonomy_project(branch, taxonomy_name) - return {"message": "Deleted {} projects".format(result_data)} + await project_controller.delete_project(taxonomy.project_name) diff --git a/backend/editor/controllers/node_controller.py b/backend/editor/controllers/node_controller.py new file mode 100644 index 00000000..f6e57647 --- /dev/null +++ b/backend/editor/controllers/node_controller.py @@ -0,0 +1,9 @@ +from ..graph_db import get_current_transaction + + +async def delete_project_nodes(project_id: str): + query = f""" + MATCH (n:{project_id}) + DETACH DELETE n + """ + await get_current_transaction().run(query) diff --git a/backend/editor/controllers/project_controller.py b/backend/editor/controllers/project_controller.py index b4427c7d..7020f5c3 100644 --- a/backend/editor/controllers/project_controller.py +++ b/backend/editor/controllers/project_controller.py @@ -1,5 +1,6 @@ +from .node_controller import delete_project_nodes from ..graph_db import get_current_transaction -from ..models.project_models import Project, ProjectCreate, ProjectEdit +from ..models.project_models import Project, ProjectCreate, ProjectEdit, ProjectStatus async def get_project(project_id: str) -> Project: @@ -15,6 +16,19 @@ async def get_project(project_id: str) -> Project: return Project(**(await result.single())["p"]) +async def get_projects_by_status(status: ProjectStatus) -> list[Project]: + """ + Get projects by status + """ + query = """ + MATCH (p:PROJECT {status: $status}) + RETURN p + """ + params = {"status": status} + result = await get_current_transaction().run(query, params) + return [Project(**record["p"]) async for record in result] + + async def create_project(project: ProjectCreate): """ Create project @@ -39,3 +53,16 @@ async def edit_project(project_id: str, project_edit: ProjectEdit): "project_edit": project_edit.model_dump(exclude_unset=True), } await get_current_transaction().run(query, params) + + +async def delete_project(project_id: str): + """ + Delete project, its nodes and relationships + """ + query = """ + MATCH (p:PROJECT {id: $project_id}) + DETACH DELETE p + """ + params = {"project_id": project_id} + await get_current_transaction().run(query, params) + await delete_project_nodes(project_id) diff --git a/backend/editor/entries.py b/backend/editor/entries.py index 71212f66..ab3fa5d3 100644 --- a/backend/editor/entries.py +++ b/backend/editor/entries.py @@ -685,20 +685,4 @@ async def full_text_search(self, text): """ _result = await get_current_transaction().run(query, params) result = [record["node"] for record in await _result.data()] - return result - - async def delete_taxonomy_project(self, branch, taxonomy_name): - """ - Delete taxonomy projects - """ - - delete_query = """ - MATCH (n:PROJECT {taxonomy_name: $taxonomy_name, branch_name: $branch_name}) - DELETE n - """ - result = await get_current_transaction().run( - delete_query, taxonomy_name=taxonomy_name, branch_name=branch - ) - summary = await result.consume() - count = summary.counters.nodes_deleted - return count + return result \ No newline at end of file diff --git a/backend/editor/github_functions.py b/backend/editor/github_functions.py index dfbd3f1b..7f0e85db 100644 --- a/backend/editor/github_functions.py +++ b/backend/editor/github_functions.py @@ -135,3 +135,17 @@ async def create_pr(self, description) -> PullRequest: *self.repo_info, title=title, body=body, head=self.branch_name, base="main" ) ).parsed_data + + async def is_pr_merged(self, pr_number: int) -> bool: + """ + Check if a pull request is merged + """ + try: + await self.connection.rest.pulls.async_check_if_merged( + *self.repo_info, pull_number=pr_number + ) + return True + except RequestFailed as e: + if e.response.status_code == 404: + return False + raise e diff --git a/backend/editor/graph_db.py b/backend/editor/graph_db.py index 4b06457c..bc128e38 100644 --- a/backend/editor/graph_db.py +++ b/backend/editor/graph_db.py @@ -41,20 +41,18 @@ async def TransactionCtx(): session.set(None) -def initialize_db(): +@contextlib.asynccontextmanager +async def database_lifespan(): """ - Initialize Neo4J database + Context manager for Neo4J database """ global driver uri = settings.uri driver = neo4j.AsyncGraphDatabase.driver(uri) - - -async def shutdown_db(): - """ - Close session and driver of Neo4J database - """ - await driver.close() + try: + yield + finally: + await driver.close() def get_current_transaction(): diff --git a/backend/editor/scheduler.py b/backend/editor/scheduler.py new file mode 100644 index 00000000..9af82906 --- /dev/null +++ b/backend/editor/scheduler.py @@ -0,0 +1,39 @@ +import contextlib +import logging + +from apscheduler.schedulers.asyncio import AsyncIOScheduler + +from .graph_db import TransactionCtx + + +from .github_functions import GithubOperations + +from .models.project_models import ProjectStatus + +from .controllers.project_controller import get_projects_by_status, delete_project + +log = logging.getLogger(__name__) + + +async def delete_merged_projects(): + async with TransactionCtx(): + exported_projects = await get_projects_by_status(ProjectStatus.EXPORTED) + for project in exported_projects: + pr_number = project.github_pr_url and project.github_pr_url.split("/")[-1] + if not pr_number: + log.warning(f"PR number not found for project {project.id}") + continue + github_object = GithubOperations(project.taxonomy_name, project.branch_name) + if await github_object.is_pr_merged(int(pr_number)): + await delete_project(project.id) + + +@contextlib.contextmanager +def scheduler_lifespan(): + scheduler = AsyncIOScheduler() + try: + scheduler.add_job(delete_merged_projects, "interval", hours=24) + scheduler.start() + yield + finally: + scheduler.shutdown() diff --git a/backend/poetry.lock b/backend/poetry.lock index 15c49a07..09e5b1ce 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -31,6 +31,34 @@ doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd- test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] trio = ["trio (<0.22)"] +[[package]] +name = "apscheduler" +version = "3.10.4" +description = "In-process task scheduler with Cron-like capabilities" +optional = false +python-versions = ">=3.6" +files = [ + {file = "APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661"}, + {file = "APScheduler-3.10.4.tar.gz", hash = "sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a"}, +] + +[package.dependencies] +pytz = "*" +six = ">=1.4.0" +tzlocal = ">=2.0,<3.dev0 || >=4.dev0" + +[package.extras] +doc = ["sphinx", "sphinx-rtd-theme"] +gevent = ["gevent"] +mongodb = ["pymongo (>=3.0)"] +redis = ["redis (>=3.0)"] +rethinkdb = ["rethinkdb (>=2.4.0)"] +sqlalchemy = ["sqlalchemy (>=1.4)"] +testing = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-tornado5"] +tornado = ["tornado (>=4.3)"] +twisted = ["twisted"] +zookeeper = ["kazoo"] + [[package]] name = "black" version = "23.10.1" @@ -975,6 +1003,17 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + [[package]] name = "sniffio" version = "1.3.0" @@ -1014,6 +1053,34 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "tzdata" +version = "2023.4" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.4-py2.py3-none-any.whl", hash = "sha256:aa3ace4329eeacda5b7beb7ea08ece826c28d761cda36e747cfbf97996d39bf3"}, + {file = "tzdata-2023.4.tar.gz", hash = "sha256:dd54c94f294765522c77399649b4fefd95522479a664a0cec87f41bebc6148c9"}, +] + +[[package]] +name = "tzlocal" +version = "5.2" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, + {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, +] + +[package.dependencies] +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] + [[package]] name = "ujson" version = "5.8.0" @@ -1335,4 +1402,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "850abff4d8ea83d6784621b77479fb273b13bc85fe3973a0eeb6a3547e469875" +content-hash = "2bb8da04768393533f0834bc5638b56f881a6cffd6610b41661b1c5476e13323" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 74bf2c53..8a446962 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -49,6 +49,7 @@ uvicorn = { extras = ["standard"], version = "^0.23.2" } neo4j = "^5.14.0" openfoodfacts_taxonomy_parser = { path = "../parser", develop = true } python-multipart = "^0.0.6" +apscheduler = "^3.10.4" [tool.poetry.group.dev.dependencies] black = "^23.10.1" From ddc6642d89b08c689d7f36ccfd5f8f4e2da8825e Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Sat, 3 Feb 2024 22:57:24 +0100 Subject: [PATCH 2/6] style: run lint --- backend/editor/api.py | 7 ++++--- backend/editor/controllers/project_controller.py | 2 +- backend/editor/entries.py | 2 +- backend/editor/scheduler.py | 8 ++------ 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/backend/editor/api.py b/backend/editor/api.py index 54fe2e62..8aacea17 100644 --- a/backend/editor/api.py +++ b/backend/editor/api.py @@ -26,8 +26,6 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, JSONResponse -from .scheduler import scheduler_lifespan - # DB helper imports from . import graph_db @@ -41,6 +39,7 @@ # Data model imports from .models.node_models import Footer, Header from .models.project_models import ProjectEdit, ProjectStatus +from .scheduler import scheduler_lifespan # -----------------------------------------------------------------------------------# @@ -163,7 +162,9 @@ async def set_project_status( Set the status of a Taxonomy Editor project """ taxonomy = TaxonomyGraph(branch, taxonomy_name) - result = await project_controller.edit_project(taxonomy.project_name, ProjectEdit(status=status)) + result = await project_controller.edit_project( + taxonomy.project_name, ProjectEdit(status=status) + ) return result diff --git a/backend/editor/controllers/project_controller.py b/backend/editor/controllers/project_controller.py index 7020f5c3..7756bc48 100644 --- a/backend/editor/controllers/project_controller.py +++ b/backend/editor/controllers/project_controller.py @@ -1,6 +1,6 @@ -from .node_controller import delete_project_nodes from ..graph_db import get_current_transaction from ..models.project_models import Project, ProjectCreate, ProjectEdit, ProjectStatus +from .node_controller import delete_project_nodes async def get_project(project_id: str) -> Project: diff --git a/backend/editor/entries.py b/backend/editor/entries.py index ab3fa5d3..9b057b31 100644 --- a/backend/editor/entries.py +++ b/backend/editor/entries.py @@ -685,4 +685,4 @@ async def full_text_search(self, text): """ _result = await get_current_transaction().run(query, params) result = [record["node"] for record in await _result.data()] - return result \ No newline at end of file + return result diff --git a/backend/editor/scheduler.py b/backend/editor/scheduler.py index 9af82906..72dc365a 100644 --- a/backend/editor/scheduler.py +++ b/backend/editor/scheduler.py @@ -3,15 +3,11 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler -from .graph_db import TransactionCtx - - +from .controllers.project_controller import delete_project, get_projects_by_status from .github_functions import GithubOperations - +from .graph_db import TransactionCtx from .models.project_models import ProjectStatus -from .controllers.project_controller import get_projects_by_status, delete_project - log = logging.getLogger(__name__) From 4a788d0371545bd0bf1e5f075620ea4f6ceba50d Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Sun, 4 Feb 2024 12:53:39 +0100 Subject: [PATCH 3/6] test: update tests --- backend/tests/test_api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/tests/test_api.py b/backend/tests/test_api.py index b8280eea..1eaf58ed 100644 --- a/backend/tests/test_api.py +++ b/backend/tests/test_api.py @@ -74,10 +74,9 @@ def test_add_taxonomy_duplicate_project_name(client): def test_delete_project(client): test_upload_taxonomy(client) - response = client.delete("/test_taxonomy/test_branch/delete") + response = client.delete("/test_taxonomy/test_branch") - assert response.status_code == 200 - assert response.json() == {"message": "Deleted 1 projects"} + assert response.status_code == 204 def test_load_and_dump(): From af4ad45bda783e7359e377cb029cea64c91250d9 Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Tue, 6 Feb 2024 19:57:09 +0100 Subject: [PATCH 4/6] chore: bump githubkit --- backend/poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/poetry.lock b/backend/poetry.lock index 09e5b1ce..d1d555d6 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -218,13 +218,13 @@ pyflakes = ">=3.1.0,<3.2.0" [[package]] name = "githubkit" -version = "0.11.0a4" +version = "0.11.1" description = "GitHub SDK for Python" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "githubkit-0.11.0a4-py3-none-any.whl", hash = "sha256:50891e52acb9ea9f436fea97d53d5ca605c0524aeab9034a4a0ce300056b7d87"}, - {file = "githubkit-0.11.0a4.tar.gz", hash = "sha256:8263ed628bb89b23458fe0eb9688708b04142d58d037d93b6d411878d8da62ca"}, + {file = "githubkit-0.11.1-py3-none-any.whl", hash = "sha256:1f67c1973fbb2c6ec21efe93b904888f3e0fdfb10bcdf046884d7726fccf3880"}, + {file = "githubkit-0.11.1.tar.gz", hash = "sha256:1604b86ca7135c0b971a2b048b86c13eedad16271c8134c81c4da660a3de7a29"}, ] [package.dependencies] From bca0fe3081505292198e1e1e84b9b71e42de1e30 Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Tue, 6 Feb 2024 20:00:26 +0100 Subject: [PATCH 5/6] refactor: run delete_merged_project concurrently --- backend/editor/controllers/node_controller.py | 5 ++++ backend/editor/scheduler.py | 27 ++++++++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/backend/editor/controllers/node_controller.py b/backend/editor/controllers/node_controller.py index f6e57647..b82cc3e9 100644 --- a/backend/editor/controllers/node_controller.py +++ b/backend/editor/controllers/node_controller.py @@ -2,6 +2,11 @@ async def delete_project_nodes(project_id: str): + """ + Remove all nodes for project. + This includes entries, stopwords, synonyms and errors + """ + query = f""" MATCH (n:{project_id}) DETACH DELETE n diff --git a/backend/editor/scheduler.py b/backend/editor/scheduler.py index 72dc365a..861439e8 100644 --- a/backend/editor/scheduler.py +++ b/backend/editor/scheduler.py @@ -1,3 +1,4 @@ +import asyncio import contextlib import logging @@ -6,7 +7,7 @@ from .controllers.project_controller import delete_project, get_projects_by_status from .github_functions import GithubOperations from .graph_db import TransactionCtx -from .models.project_models import ProjectStatus +from .models.project_models import Project, ProjectStatus log = logging.getLogger(__name__) @@ -14,14 +15,22 @@ async def delete_merged_projects(): async with TransactionCtx(): exported_projects = await get_projects_by_status(ProjectStatus.EXPORTED) - for project in exported_projects: - pr_number = project.github_pr_url and project.github_pr_url.split("/")[-1] - if not pr_number: - log.warning(f"PR number not found for project {project.id}") - continue - github_object = GithubOperations(project.taxonomy_name, project.branch_name) - if await github_object.is_pr_merged(int(pr_number)): - await delete_project(project.id) + results = await asyncio.gather( + *map(delete_merged_project, exported_projects), return_exceptions=True + ) + for exception_result in filter(lambda x: x is not None, results): + log.warn(exception_result) + + +async def delete_merged_project(exported_project: Project): + pr_number = exported_project.github_pr_url and exported_project.github_pr_url.rsplit("/", 1)[-1] + if not pr_number: + log.warning(f"PR number not found for project {exported_project.id}") + return + + github_object = GithubOperations(exported_project.taxonomy_name, exported_project.branch_name) + if await github_object.is_pr_merged(int(pr_number)): + await delete_project(exported_project.id) @contextlib.contextmanager From c1dadab5b2182cbccd17e6446b905cc9cf5c3833 Mon Sep 17 00:00:00 2001 From: Eric Nguyen Date: Wed, 7 Feb 2024 11:05:11 +0100 Subject: [PATCH 6/6] chore: add comments --- backend/editor/github_functions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/editor/github_functions.py b/backend/editor/github_functions.py index 7f0e85db..2d8f2e1f 100644 --- a/backend/editor/github_functions.py +++ b/backend/editor/github_functions.py @@ -1,6 +1,7 @@ """ Github helper functions for the Taxonomy Editor API """ + import base64 from functools import cached_property from textwrap import dedent @@ -146,6 +147,8 @@ async def is_pr_merged(self, pr_number: int) -> bool: ) return True except RequestFailed as e: + # The API returns 404 if pull request has not been merged if e.response.status_code == 404: return False + # re-raise in case of unexpected status code raise e