[forest-fixes] download forest task log no longer uses django rest fr…

…amework and has tests
onnela-lab · Dec 15, 2023 · d91bae7 · d91bae7
1 parent 14a4445
commit d91bae7
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 123 deletions.
diff --git a/frontend/templates/navbar.html b/frontend/templates/navbar.html
@@ -73,8 +73,8 @@
             <li><a href="{{ easy_url('forest_pages.task_log', study_id=study.id) }}">View Forest Task Log</a></li>
             <li><a href="{{ easy_url('forest_pages.forest_tasks_progress', study_id=study.id) }}">View Forest Analysis Progress</a></li>
             {% if site_admin %}
-            <li><a class="" href="{{ url('forest_pages.download_task_log') }}">
-              <i>Download Forest Task Log (all studies)</i>
+            <li><a class="" href="{{ easy_url('forest_pages.download_task_log', study_id=study.id) }}">
+              <i>Download Forest Task Log As CSV</i>
             </a></li>
             {% endif %}
           </ul>

diff --git a/pages/forest_pages.py b/pages/forest_pages.py
@@ -2,10 +2,12 @@
 import pickle
 from collections import defaultdict
 from datetime import date, datetime
+from typing import Dict
 
 import orjson
 from django.contrib import messages
 from django.core.exceptions import ValidationError
+from django.db.models import QuerySet
 from django.http.response import FileResponse, HttpResponse
 from django.shortcuts import redirect, render
 from django.utils import timezone
@@ -31,7 +33,6 @@
 from libs.s3 import NoSuchKeyException
 from libs.streaming_zip import ZipGenerator
 from libs.utils.date_utils import daterange
-from serializers.forest_serializers import display_true, ForestTaskCsvSerializer
 
 
 TASK_SERIALIZER_FIELDS = [
@@ -223,19 +224,14 @@ def task_log(request: ResearcherRequest, study_id=None):
         task_dict["has_output_data"] = task_dict["forest_output_exists"]
         task_dict["forest_tree_display"] = task_dict.pop("forest_tree").title()
         task_dict["created_on_display"] = task_dict.pop("created_on").strftime(DEV_TIME_FORMAT)
-        task_dict["forest_output_exists_display"] = display_true(task_dict["forest_output_exists"])
+        task_dict["forest_output_exists_display"] = yes_no_unknown(task_dict["forest_output_exists"])
 
         # dates/times that require safety (yes it could be less obnoxious)
-        task_dict["process_end_time"] = task_dict["process_end_time"].strftime(DEV_TIME_FORMAT) \
-            if task_dict["process_end_time"] else None
-        task_dict["process_start_time"] = task_dict["process_start_time"].strftime(DEV_TIME_FORMAT) \
-            if task_dict["process_start_time"] else None
-        task_dict["process_download_end_time"] = task_dict["process_download_end_time"].strftime(DEV_TIME_FORMAT) \
-            if task_dict["process_download_end_time"] else None
-        task_dict["data_date_end"] = task_dict["data_date_end"].isoformat() \
-            if task_dict["data_date_end"] else None
-        task_dict["data_date_start"] = task_dict["data_date_start"].isoformat() \
-            if task_dict["data_date_start"] else None
+        dict_datetime_to_display(task_dict, "process_end_time", None)
+        dict_datetime_to_display(task_dict, "process_start_time", None)
+        dict_datetime_to_display(task_dict, "process_download_end_time", None)
+        task_dict["data_date_end"] = task_dict["data_date_end"].isoformat()if task_dict["data_date_end"] else None
+        task_dict["data_date_start"] = task_dict["data_date_start"].isoformat()if task_dict["data_date_start"] else None
 
         # urls
         task_dict["cancel_url"] = easy_url(
@@ -277,18 +273,6 @@ def task_log(request: ResearcherRequest, study_id=None):
     )
 
 
-@require_GET
-@authenticate_admin
-def download_task_log(request: ResearcherRequest):
-    forest_tasks = ForestTask.objects.order_by("created_on")
-    return FileResponse(
-        stream_forest_task_log_csv(forest_tasks),
-        content_type="text/csv",
-        filename=f"forest_task_log_{timezone.now().isoformat()}.csv",
-        as_attachment=True,
-    )
-
-
 @require_POST
 @authenticate_admin
 @forest_enabled
@@ -428,17 +412,79 @@ def render_create_tasks(request: ResearcherRequest, study: Study):
     )
 
 
-def stream_forest_task_log_csv(forest_tasks):
+@require_GET
+@authenticate_admin
+def download_task_log(request: ResearcherRequest, study_id=str):
+    if not request.session_researcher.site_admin:
+        return HttpResponse(content="", status=403)
+
+    # study id is already validated by the url pattern?
+    forest_tasks = ForestTask.objects.filter(participant__study_id=study_id)
+
+    return FileResponse(
+        stream_forest_task_log_csv(forest_tasks),
+        content_type="text/csv",
+        filename=f"forest_task_log_{timezone.now().isoformat()}.csv",
+        as_attachment=True,
+    )
+
+
+def stream_forest_task_log_csv(forest_tasks: QuerySet[ForestTask]):
+    # titles of rows as values, query filter values as keys
+    field_map = {
+        "created_on": "Created On",
+        "data_date_end": "Data Date End",
+        "data_date_start": "Data Date Start",
+        "external_id": "Id",
+        "forest_tree": "Forest Tree",
+        "forest_output_exists": "Forest Output Exists",
+        "participant__patient_id": "Patient Id",
+        "process_start_time": "Process Start Time",
+        "process_download_end_time": "Process Download End Time",
+        "process_end_time": "Process End Time",
+        "status": "Status",
+        "total_file_size": "Total File Size",
+    }
+
+    # setup
     buffer = CSVBuffer()
-    writer = csv.DictWriter(buffer, fieldnames=ForestTaskCsvSerializer.Meta.fields)
-    writer.writeheader()
-    yield buffer.read()
+    # the csv writer isn't well handled in the vscode ide. its has a writerow and writerows method,
+    # writes to a file-like object, CSVBuffer might be overkill, strio or bytesio might be work
+    writer = csv.writer(buffer, dialect="excel")
+    writer.writerow(field_map.values())
+    yield buffer.read()  # write the header
 
-    for forest_task in forest_tasks:
-        writer.writerow(ForestTaskCsvSerializer(forest_task).data)
+    # yield rows
+    for forest_data in forest_tasks.values(*field_map.keys()):
+        dict_datetime_to_display(forest_data, "created_on", "")
+        dict_datetime_to_display(forest_data, "process_start_time", "")
+        dict_datetime_to_display(forest_data, "process_download_end_time", "")
+        dict_datetime_to_display(forest_data, "process_end_time", "")
+        forest_data["forest_tree"] = forest_data["forest_tree"].title()
+        forest_data["forest_output_exists"] = yes_no_unknown(forest_data["forest_output_exists"])
+        writer.writerow(forest_data.values())
         yield buffer.read()
 
 
+def dict_datetime_to_display(some_dict: Dict[str, datetime], key: str, default: str = None):
+    # this pattern is repeated numerous times.
+    dt = some_dict[key]
+    if dt is None:
+        some_dict[key] = default
+    else:
+        some_dict[key] = dt.strftime(DEV_TIME_FORMAT)
+
+
+def yes_no_unknown(a_bool: bool):
+    if a_bool is True:
+        return "Yes"
+    elif a_bool is False:
+        return "No"
+    else:
+        return "Unknown"
+
+
+# we need a class that has a read and write method for the csv writer machinery to use.
 class CSVBuffer:
     line = ""
 

diff --git a/serializers/forest_serializers.py b/serializers/forest_serializers.py
diff --git a/serializers/tableau_serializers.py b/serializers/tableau_serializers.py
@@ -2,7 +2,7 @@
 
 from database.security_models import ApiKey
 
-
+# FIXMME: this is the only remaining DRF serializer we can do it we can finally get rid of them all
 class ApiKeySerializer(serializers.ModelSerializer):
     class Meta:
         model = ApiKey
@@ -14,24 +14,3 @@ class Meta:
             "readable_name",
         ]
 
-
-# class SummaryStatisticDailySerializer(serializers.ModelSerializer):
-#     class Meta:
-#         model = SummaryStatisticDaily
-#         fields = SERIALIZABLE_FIELD_NAMES
-
-#     participant_id = serializers.SlugRelatedField(
-#         slug_field="patient_id", source="participant", read_only=True
-#     )
-#     study_id = serializers.SerializerMethodField()  # Study object id
-
-#     def __init__(self, *args, fields=None, **kwargs):
-#         """ dynamically modify the subset of fields on instantiation """
-#         super().__init__(*args, **kwargs)
-#         if fields is not None:
-#             for field_name in set(self.fields) - set(fields):
-#                 # is this pop valid? the value is a cached property... this needs to be tested.
-#                 self.fields.pop(field_name)
-
-#     def get_study_id(self, obj):
-#         return obj.participant.study.object_id
diff --git a/tests/test_forest_pages.py b/tests/test_forest_pages.py
@@ -1,5 +1,5 @@
 import uuid
-from datetime import datetime
+from datetime import date, datetime
 from unittest.mock import MagicMock, patch
 
 import dateutil
@@ -48,12 +48,69 @@ def test(self):
 
 
 # TODO: make a test for whether a tsudy admin can hit this endpoint? I think there's a bug in the authenticate_admin decorator that allows that.....
-# class TestForestDownloadTaskLog(ResearcherSessionTest):
-#     ENDPOINT_NAME = "forest_pages.download_task_log"
-#
-#     def test(self):
-#         # this streams a csv of tasks on a tsudy
-#         self.smart_get()
+class TestForestDownloadTaskLog(ResearcherSessionTest):
+    ENDPOINT_NAME = "forest_pages.download_task_log"
+    REDIRECT_ENDPOINT_NAME = ResearcherSessionTest.IGNORE_THIS_ENDPOINT
+
+    header_row = "Created On,Data Date End,Data Date Start,Id,Forest Tree,"\
+        "Forest Output Exists,Patient Id,Process Start Time,Process Download End Time,"\
+        "Process End Time,Status,Total File Size\r\n"
+
+    def test_no_relation_no_site_admin_no_worky(self):
+        # this streams a csv of tasks on a study
+        resp = self.smart_get_status_code(403, self.session_study.id)
+        self.assertEqual(resp.content, b"")
+
+    def test_researcher_no_worky(self):
+        self.set_session_study_relation(ResearcherRole.researcher)
+        resp = self.smart_get_status_code(403, self.session_study.id)
+        self.assertEqual(resp.content, b"")
+
+    def test_study_admin_no_worky(self):
+        self.set_session_study_relation(ResearcherRole.study_admin)
+        resp = self.smart_get_status_code(403, self.session_study.id)
+        self.assertEqual(resp.content, b"")
+
+    def test_site_admin_can(self):
+        self.set_session_study_relation(ResearcherRole.site_admin)
+        resp = self.smart_get_status_code(200, self.session_study.id)
+        self.assertEqual(b"".join(resp.streaming_content).decode(), self.header_row)
+
+    def test_single_forest_task(self):
+        self.set_session_study_relation(ResearcherRole.site_admin)
+        self.default_forest_task.update(
+            created_on=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC),
+            data_date_start=date(2020, 1, 1),
+            data_date_end=date(2020, 1, 5),
+            forest_tree=ForestTree.jasmine,
+            forest_output_exists=True,
+            process_start_time=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC),  # midnight
+            process_download_end_time=datetime(2020, 1, 1, 1, tzinfo=dateutil.tz.UTC),  # 1am
+            process_end_time=datetime(2020, 1, 1, 2, tzinfo=dateutil.tz.UTC),  # 2am
+            status=ForestTaskStatus.success,
+            total_file_size=123456789,
+        )
+
+        resp = self.smart_get_status_code(200, self.session_study.id)
+        content = b"".join(resp.streaming_content).decode()
+        self.assertEqual(content.count("\r\n"), 2)
+        line = content.splitlines()[1]
+
+        # 12 columns, 11 commas
+        self.assertEqual(line.count(","), 11)
+        items = line.split(",")
+        self.assertEqual(items[0], "2020-01-01 00:00 (UTC)")                   # Created On
+        self.assertEqual(items[1], "2020-01-05")                               # Data Date End
+        self.assertEqual(items[2], "2020-01-01")                               # Data Date Start
+        self.assertEqual(items[3], str(self.default_forest_task.external_id))  # duh
+        self.assertEqual(items[4], "Jasmine")                                  # Forest Tree
+        self.assertEqual(items[5], "Yes")                                      # Forest Output Exists
+        self.assertEqual(items[6], "patient1")                                 # Patient Id
+        self.assertEqual(items[7], "2020-01-01 00:00 (UTC)")                   # Process Start Time
+        self.assertEqual(items[8], "2020-01-01 01:00 (UTC)")                   # Process Download End Time
+        self.assertEqual(items[9], "2020-01-01 02:00 (UTC)")                   # Process End Time
+        self.assertEqual(items[10], "success")                                 # Status
+        self.assertEqual(items[11], "123456789")                               # Total File Size
 
 
 class TestForestCancelTask(ResearcherSessionTest):

diff --git a/urls.py b/urls.py
@@ -224,7 +224,7 @@ def path(
 path('studies/<str:study_id>/forest/progress', forest_pages.forest_tasks_progress, login_redirect=SAFE)
 path("studies/<str:study_id>/forest/tasks/<str:forest_task_external_id>/cancel", forest_pages.cancel_task)
 path('studies/<str:study_id>/forest/tasks', forest_pages.task_log, login_redirect=SAFE)
-path('forest/tasks/download', forest_pages.download_task_log)
+path('studies/<str:study_id>/forest/tasks/download', forest_pages.download_task_log)
 path(
     "studies/<str:study_id>/forest/tasks/<str:forest_task_external_id>/download_output",
     forest_pages.download_output_data