Skip to content

Commit

Permalink
Convert orphan files deletion task to async using celery task. Update…
Browse files Browse the repository at this point in the history
… celery config to make it possible (#1609)

* convert orphan files deletion task to async using celery task. Update celery config to make it possible

* Update Dockerfile to meet pyproject.toml python version

* flake8

* poetry regeneration

---------

Co-authored-by: OhMaley <[email protected]>
Co-authored-by: bbearce <[email protected]>
  • Loading branch information
3 people authored Oct 14, 2024
1 parent e5d7c8f commit 231fe22
Show file tree
Hide file tree
Showing 9 changed files with 459 additions and 264 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ FROM python:3.9.20

RUN apt-get update && apt-get install -y gcc build-essential && rm -rf /var/lib/apt/lists/*

ENV PYTHONUNBUFFERED 1
ENV PYTHONUNBUFFERED=1

RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.3
# Poetry location so future commands (below) work

ENV PATH $PATH:/root/.local/bin
ENV PATH=$PATH:/root/.local/bin
# Want poetry to use system python of docker container
RUN poetry config virtualenvs.create false
RUN poetry config virtualenvs.in-project false
Expand Down
475 changes: 253 additions & 222 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ channels = "2.4"
channels-redis = "3.2.0"
django-extra-fields = "0.9"
pillow = "10.3.0"
celery = "4.2.1"
celery = "4.4.7"
gunicorn = "22.0.0"
urllib3 = ">=1.21.1,<1.25"
uvicorn = {version = "0.13.3", extras = ["standard"]}
Expand Down
74 changes: 74 additions & 0 deletions src/apps/analytics/tasks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import time
import logging
from celery_config import app
Expand Down Expand Up @@ -570,3 +571,76 @@ def reset_computed_storage_analytics():
elapsed_time
)
)


@app.task(queue="site-worker")
def delete_orphan_files():
logger.info("Task delete_orphan_files started")

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file(logger)
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found. Nothing will be removed")
raise Exception("No storage inconsistency log file found")

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file), logger)

# Delete those files in batch (max 1000 element at once)
batch_size = 1000
for i in range(0, len(orphan_files_path), batch_size):
batch = orphan_files_path[i:i + batch_size]
objects_formatted = [{'Key': path} for path in batch]
BundleStorage.bucket.delete_objects(Delete={'Objects': objects_formatted})

logger.info("Delete oprhan files finished")


def get_most_recent_storage_inconsistency_log_file(logger):
log_folder = "/app/logs/"
try:
log_files = [f for f in os.listdir(log_folder) if os.path.isfile(os.path.join(log_folder, f))]
except FileNotFoundError:
logger.info(f"Folder '{log_folder}' does not exist.")
return None

most_recent_log_file = None
most_recent_datetime = None
datetime_format = "%Y%m%d-%H%M%S"
for file in log_files:
try:
basename = os.path.basename(file)
datetime_str = basename[len("db_storage_inconsistency_"):-len(".log")]
file_datetime = datetime.strptime(datetime_str, datetime_format)
if most_recent_datetime is None or file_datetime > most_recent_datetime:
most_recent_datetime = file_datetime
most_recent_log_file = file
except ValueError:
logger.warning(f"Filename '{file}' does not match the expected format and will be ignored.")

return most_recent_log_file


def get_files_path_from_orphan_log_file(log_file_path, logger):
files_path = []

try:
with open(log_file_path) as log_file:
lines = log_file.readlines()
orphan_files_lines = []
for i, line in enumerate(lines):
if "Orphaned files" in line:
orphan_files_lines = lines[i + 1:]
break

for orphan_files_line in orphan_files_lines:
files_path.append(orphan_files_line.split(maxsplit=1)[0])
except FileNotFoundError:
logger.error(f"File '{log_file_path}' does not exist.")
except PermissionError:
logger.error(f"Permission denied for reading the file '{log_file_path}'.")
except IOError as e:
logger.error(f"An I/O error occurred while accessing the file at {log_file_path}: {e}")

return files_path
1 change: 1 addition & 0 deletions src/apps/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
path('analytics/users_usage/', analytics.users_usage, name='users_usage'),
path('analytics/delete_orphan_files/', analytics.delete_orphan_files, name="delete_orphan_files"),
path('analytics/get_orphan_files/', analytics.get_orphan_files, name="get_orphan_files"),
path('analytics/check_orphans_deletion_status/', analytics.check_orphans_deletion_status, name="check_orphans_deletion_status"),

# API Docs
re_path(r'docs(?P<format>\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'),
Expand Down
49 changes: 24 additions & 25 deletions src/apps/api/views/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from competitions.models import Competition, Submission
from analytics.models import StorageUsageHistory, CompetitionStorageDataPoint, UserStorageDataPoint
from api.serializers.analytics import AnalyticsSerializer
from utils.storage import BundleStorage
from apps.analytics.tasks import delete_orphan_files as delete_orphan_files_async_task

import os
import datetime
Expand All @@ -22,6 +22,7 @@


User = get_user_model()
delete_orphan_files_task = None


class SimpleFilterBackend(BaseFilterBackend):
Expand Down Expand Up @@ -322,36 +323,16 @@ def get_orphan_files(request):
@api_view(["DELETE"])
def delete_orphan_files(request):
"""
Delete all orphan files from the storage based on the last storage analytics
Start the deletion of orphan files task
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

logger = logging.getLogger(__name__)
logger.info("Delete orphan files started")

# The analytics task generates a db_storage_inconsistency_<date>-<time>.log file that lists, among other things, the orphan files. Let's use it

# Find most recent file
most_recent_log_file = get_most_recent_storage_inconsistency_log_file()
if not most_recent_log_file:
logger.warning("No storage inconsistency log file found. Nothing will be removed")
return Response({"message": "No storage inconsistency log file found. Nothing will be removed"}, status=status.HTTP_404_NOT_FOUND)

# Get the list of orphan files from the content of the most recent log file
log_folder = "/app/logs/"
orphan_files_path = get_files_path_from_orphan_log_file(os.path.join(log_folder, most_recent_log_file))

# Delete those files in batch (max 1000 element at once)
batch_size = 1000
for i in range(0, len(orphan_files_path), batch_size):
batch = orphan_files_path[i:i + batch_size]
objects_formatted = [{'Key': path} for path in batch]
BundleStorage.bucket.delete_objects(Delete={'Objects': objects_formatted})
global delete_orphan_files_task
delete_orphan_files_task = delete_orphan_files_async_task.delay()

logger.info("Delete oprhan files finished")
return Response({"message": "done"}, status=status.HTTP_200_OK)
return Response({"success": True, "message": "orphan files deletion started"}, status=status.HTTP_200_OK)


def get_most_recent_storage_inconsistency_log_file():
Expand Down Expand Up @@ -405,3 +386,21 @@ def get_files_path_from_orphan_log_file(log_file_path):
logger.error(f"An I/O error occurred while accessing the file at {log_file_path}: {e}")

return files_path


@api_view(["GET"])
def check_orphans_deletion_status(request):
"""
Get the orphan files deletion task status.
Return one of ["PENDING", "STARTED", "SUCCESS", "FAILURE", "RETRY", "REVOKED"]
"""

if not request.user.is_superuser:
raise PermissionDenied(detail="Admin only")

global delete_orphan_files_task
state = None
if delete_orphan_files_task:
state = delete_orphan_files_task.state

return Response({"status": state}, status=status.HTTP_200_OK)
2 changes: 2 additions & 0 deletions src/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@
CELERY_BROKER_URL = os.environ.get('BROKER_URL')
if not CELERY_BROKER_URL:
CELERY_BROKER_URL = f'pyamqp://{RABBITMQ_DEFAULT_USER}:{RABBITMQ_DEFAULT_PASS}@{RABBITMQ_HOST}:{RABBITMQ_PORT}//'
CELERY_RESULT_BACKEND = os.environ.get("REDIS_URL", "redis://redis:6379")
CELERY_IGNORE_RESULT = False # Ensure that Celery tracks the state
CELERY_TASK_SERIALIZER = 'json'
CELERY_ACCEPT_CONTENT = ('json',)
CELERY_BEAT_SCHEDULE = {
Expand Down
3 changes: 3 additions & 0 deletions src/static/js/ours/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@ CODALAB.api = {
get_orphan_files: () => {
return CODALAB.api.request('GET', `${URLS.API}analytics/get_orphan_files/`)
},
check_orphans_deletion_status: () => {
return CODALAB.api.request('GET', `${URLS.API}analytics/check_orphans_deletion_status/`)
},
/*---------------------------------------------------------------------
User Quota and Cleanup
---------------------------------------------------------------------*/
Expand Down
113 changes: 99 additions & 14 deletions src/static/riot/analytics/analytics.tag
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@
<a class="item" data-tab="competitions-usage">Competitions usage</a>
<a class="item" data-tab="users-usage">Users usage</a>
<div class="delete-oprhans-container">
<button class="ui red button" onclick="{showConfirmationModal}">
<i class="icon warning"></i>Delete orphan files
<button class="ui red button { disabled: delete_orphans_button_modal_disabled }" onclick="{showConfirmationModal}">
<i class="icon { warning: !delete_orphans_button_modal_loading}"></i>
{delete_orphans_button_modal_text}
</button>
</div>
</div>
Expand Down Expand Up @@ -163,7 +164,7 @@
<h3>Do you want to proceed ?</h3>
</div>
<div class="actions">
<button class="ui icon button {delete_button_color} { loading: delete_button_loading } { disabled: delete_button_disabled }" onclick="{deleteOrphanFiles}">
<button class="ui icon button {delete_button_color} { disabled: delete_button_disabled }" onclick="{deleteOrphanFiles}">
<i if={delete_button_color=="green"} class="check icon"></i>
{delete_button_text}
</button>
Expand Down Expand Up @@ -217,6 +218,11 @@
self.delete_button_disabled = false
self.delete_button_text = "Yes, delete all orphan files"

self.delete_orphans_button_modal_text = "Delete orphan files"
self.delete_orphans_button_modal_loading = false
self.delete_orphans_button_modal_disabled = false
self.pollingInterval;

self.one("mount", function () {
// Semantic UI
$('.tabular.menu .item', self.root).tab();
Expand Down Expand Up @@ -327,6 +333,7 @@
self.time_range_shortcut("month");
self.update_chart_resolution("day");
self.getOrphanFiles();
self.startCheckOrphansDeletionStatus();
})

/*---------------------------------------------------------------------
Expand Down Expand Up @@ -525,23 +532,103 @@
self.update();
}

self.checkOrphansDeletionStatus = function() {
CODALAB.api.check_orphans_deletion_status()
.done(function(data) {
if (data.status) {
if (data.status == "SUCCESS") {
toastr.success("Orphan files deletion successful")
self.delete_button_color = "green";
self.delete_button_text = "Deletion Successful";
}
if (data.status == "FAILURE") {
toastr.error("Orphan files deletion failed")
self.delete_button_color = "red";
self.delete_button_text = "Deletion Failed";
}
if (data.status == "REVOKED") {
toastr.error("Orphan files deletion has been canceled")
self.delete_button_color = "red";
self.delete_button_text = "Deletion canceled";
}
if (data.status == "SUCCESS" || data.status == "FAILURE" || data.status == "REVOKED") {
// Task is over
self.stopCheckOrphansDeletionStatus();
self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_loading = false;
self.delete_orphans_button_modal_disabled = false;
self.delete_button_loading = false;
self.delete_button_disabled = true;
} else {
// Task is running
self.delete_orphans_button_modal_text = "Orphan files deletion in progress...";
self.delete_orphans_button_modal_disabled = true;
self.delete_orphans_button_modal_loading = true;

self.delete_button_text = "Orphan files deletion in progress...";
self.delete_button_disabled = true;
self.delete_button_loading = true;
}
} else {
// No task running
self.stopCheckOrphansDeletionStatus();

self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_disabled = false;
self.delete_orphans_button_modal_loading = false;

self.delete_button_color = "red";
self.delete_button_loading = false;
self.delete_button_disabled = false;
self.delete_button_text = "Yes, delete all orphan files";
}

})
.fail(function(response) {
toastr.error("Orphan files deletion's task status check failed")
self.delete_orphans_button_modal_text = "Delete orphan files";
self.delete_orphans_button_modal_loading = false;
self.delete_orphans_button_modal_disabled = false;

self.delete_button_text = "Yes, delete all orphan files";
self.delete_button_color = "red";
self.delete_button_loading = false;
self.delete_button_disabled = false;

self.stopCheckOrphansDeletionStatus();
})
.always(function() {
self.update();
})
}

self.startCheckOrphansDeletionStatus = function () {
self.pollingInterval = setInterval(self.checkOrphansDeletionStatus, 2000);
}

self.stopCheckOrphansDeletionStatus = function() {
if (self.pollingInterval) {
clearInterval(self.pollingInterval);
self.pollingInterval = null;
}
}

self.deleteOrphanFiles = function() {
self.delete_button_loading = true
self.delete_button_disabled = true
self.delete_orphans_button_modal_loading = true;
self.delete_orphans_button_modal_disabled = true;
self.delete_button_text = "Orphan files deletion in progress...";
self.delete_orphans_button_modal_text = "Orphan files deletion in progress...";
self.update()
CODALAB.api.delete_orphan_files()
.done(function (data) {
console.log("done", data);
self.delete_button_color = "green";
self.delete_button_disabled = true;
self.delete_button_text = "Deletion Successful";
if (data && data.success && !self.pollingInterval) {
self.startCheckOrphansDeletionStatus();
}
})
.fail(function (response) {
console.log("fail response=", response);
toastr.error("Deletion failed, error occurred")
self.delete_button_color = "red";
self.delete_button_disabled = false;
self.delete_button_text = "Deletion Failed";
toastr.error("Orphan files deletion failed to start")
})
.always(function () {
self.delete_button_loading = false
Expand All @@ -552,12 +639,10 @@
self.getOrphanFiles = function() {
CODALAB.api.get_orphan_files()
.done(function (data) {
console.log("get_orphan_files success. Response", data);
self.nb_orphan_files = data.data.length
self.update({nb_orphan_files: self.nb_orphan_files});
})
.fail(function (response) {
console.log("get_orphan_files failed. Response=", response);
toastr.error("Get oprhan files failed, error occurred")
});
}
Expand Down

0 comments on commit 231fe22

Please sign in to comment.