Skip to content

Commit

Permalink
[forest-fixes] download forest task log no longer uses django rest fr…
Browse files Browse the repository at this point in the history
…amework and has tests
  • Loading branch information
biblicabeebli committed Dec 15, 2023
1 parent 14a4445 commit d91bae7
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 123 deletions.
4 changes: 2 additions & 2 deletions frontend/templates/navbar.html
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
<li><a href="{{ easy_url('forest_pages.task_log', study_id=study.id) }}">View Forest Task Log</a></li>
<li><a href="{{ easy_url('forest_pages.forest_tasks_progress', study_id=study.id) }}">View Forest Analysis Progress</a></li>
{% if site_admin %}
<li><a class="" href="{{ url('forest_pages.download_task_log') }}">
<i>Download Forest Task Log (all studies)</i>
<li><a class="" href="{{ easy_url('forest_pages.download_task_log', study_id=study.id) }}">
<i>Download Forest Task Log As CSV</i>
</a></li>
{% endif %}
</ul>
Expand Down
106 changes: 76 additions & 30 deletions pages/forest_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
import pickle
from collections import defaultdict
from datetime import date, datetime
from typing import Dict

import orjson
from django.contrib import messages
from django.core.exceptions import ValidationError
from django.db.models import QuerySet
from django.http.response import FileResponse, HttpResponse
from django.shortcuts import redirect, render
from django.utils import timezone
Expand All @@ -31,7 +33,6 @@
from libs.s3 import NoSuchKeyException
from libs.streaming_zip import ZipGenerator
from libs.utils.date_utils import daterange
from serializers.forest_serializers import display_true, ForestTaskCsvSerializer


TASK_SERIALIZER_FIELDS = [
Expand Down Expand Up @@ -223,19 +224,14 @@ def task_log(request: ResearcherRequest, study_id=None):
task_dict["has_output_data"] = task_dict["forest_output_exists"]
task_dict["forest_tree_display"] = task_dict.pop("forest_tree").title()
task_dict["created_on_display"] = task_dict.pop("created_on").strftime(DEV_TIME_FORMAT)
task_dict["forest_output_exists_display"] = display_true(task_dict["forest_output_exists"])
task_dict["forest_output_exists_display"] = yes_no_unknown(task_dict["forest_output_exists"])

# dates/times that require safety (yes it could be less obnoxious)
task_dict["process_end_time"] = task_dict["process_end_time"].strftime(DEV_TIME_FORMAT) \
if task_dict["process_end_time"] else None
task_dict["process_start_time"] = task_dict["process_start_time"].strftime(DEV_TIME_FORMAT) \
if task_dict["process_start_time"] else None
task_dict["process_download_end_time"] = task_dict["process_download_end_time"].strftime(DEV_TIME_FORMAT) \
if task_dict["process_download_end_time"] else None
task_dict["data_date_end"] = task_dict["data_date_end"].isoformat() \
if task_dict["data_date_end"] else None
task_dict["data_date_start"] = task_dict["data_date_start"].isoformat() \
if task_dict["data_date_start"] else None
dict_datetime_to_display(task_dict, "process_end_time", None)
dict_datetime_to_display(task_dict, "process_start_time", None)
dict_datetime_to_display(task_dict, "process_download_end_time", None)
task_dict["data_date_end"] = task_dict["data_date_end"].isoformat()if task_dict["data_date_end"] else None
task_dict["data_date_start"] = task_dict["data_date_start"].isoformat()if task_dict["data_date_start"] else None

# urls
task_dict["cancel_url"] = easy_url(
Expand Down Expand Up @@ -277,18 +273,6 @@ def task_log(request: ResearcherRequest, study_id=None):
)


@require_GET
@authenticate_admin
def download_task_log(request: ResearcherRequest):
forest_tasks = ForestTask.objects.order_by("created_on")
return FileResponse(
stream_forest_task_log_csv(forest_tasks),
content_type="text/csv",
filename=f"forest_task_log_{timezone.now().isoformat()}.csv",
as_attachment=True,
)


@require_POST
@authenticate_admin
@forest_enabled
Expand Down Expand Up @@ -428,17 +412,79 @@ def render_create_tasks(request: ResearcherRequest, study: Study):
)


def stream_forest_task_log_csv(forest_tasks):
@require_GET
@authenticate_admin
def download_task_log(request: ResearcherRequest, study_id=str):
if not request.session_researcher.site_admin:
return HttpResponse(content="", status=403)

# study id is already validated by the url pattern?
forest_tasks = ForestTask.objects.filter(participant__study_id=study_id)

return FileResponse(
stream_forest_task_log_csv(forest_tasks),
content_type="text/csv",
filename=f"forest_task_log_{timezone.now().isoformat()}.csv",
as_attachment=True,
)


def stream_forest_task_log_csv(forest_tasks: QuerySet[ForestTask]):
# titles of rows as values, query filter values as keys
field_map = {
"created_on": "Created On",
"data_date_end": "Data Date End",
"data_date_start": "Data Date Start",
"external_id": "Id",
"forest_tree": "Forest Tree",
"forest_output_exists": "Forest Output Exists",
"participant__patient_id": "Patient Id",
"process_start_time": "Process Start Time",
"process_download_end_time": "Process Download End Time",
"process_end_time": "Process End Time",
"status": "Status",
"total_file_size": "Total File Size",
}

# setup
buffer = CSVBuffer()
writer = csv.DictWriter(buffer, fieldnames=ForestTaskCsvSerializer.Meta.fields)
writer.writeheader()
yield buffer.read()
# the csv writer isn't well handled in the vscode ide. its has a writerow and writerows method,
# writes to a file-like object, CSVBuffer might be overkill, strio or bytesio might be work
writer = csv.writer(buffer, dialect="excel")
writer.writerow(field_map.values())
yield buffer.read() # write the header

for forest_task in forest_tasks:
writer.writerow(ForestTaskCsvSerializer(forest_task).data)
# yield rows
for forest_data in forest_tasks.values(*field_map.keys()):
dict_datetime_to_display(forest_data, "created_on", "")
dict_datetime_to_display(forest_data, "process_start_time", "")
dict_datetime_to_display(forest_data, "process_download_end_time", "")
dict_datetime_to_display(forest_data, "process_end_time", "")
forest_data["forest_tree"] = forest_data["forest_tree"].title()
forest_data["forest_output_exists"] = yes_no_unknown(forest_data["forest_output_exists"])
writer.writerow(forest_data.values())
yield buffer.read()


def dict_datetime_to_display(some_dict: Dict[str, datetime], key: str, default: str = None):
# this pattern is repeated numerous times.
dt = some_dict[key]
if dt is None:
some_dict[key] = default
else:
some_dict[key] = dt.strftime(DEV_TIME_FORMAT)


def yes_no_unknown(a_bool: bool):
if a_bool is True:
return "Yes"
elif a_bool is False:
return "No"
else:
return "Unknown"


# we need a class that has a read and write method for the csv writer machinery to use.
class CSVBuffer:
line = ""

Expand Down
61 changes: 0 additions & 61 deletions serializers/forest_serializers.py

This file was deleted.

23 changes: 1 addition & 22 deletions serializers/tableau_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from database.security_models import ApiKey


# FIXMME: this is the only remaining DRF serializer we can do it we can finally get rid of them all
class ApiKeySerializer(serializers.ModelSerializer):
class Meta:
model = ApiKey
Expand All @@ -14,24 +14,3 @@ class Meta:
"readable_name",
]


# class SummaryStatisticDailySerializer(serializers.ModelSerializer):
# class Meta:
# model = SummaryStatisticDaily
# fields = SERIALIZABLE_FIELD_NAMES

# participant_id = serializers.SlugRelatedField(
# slug_field="patient_id", source="participant", read_only=True
# )
# study_id = serializers.SerializerMethodField() # Study object id

# def __init__(self, *args, fields=None, **kwargs):
# """ dynamically modify the subset of fields on instantiation """
# super().__init__(*args, **kwargs)
# if fields is not None:
# for field_name in set(self.fields) - set(fields):
# # is this pop valid? the value is a cached property... this needs to be tested.
# self.fields.pop(field_name)

# def get_study_id(self, obj):
# return obj.participant.study.object_id
71 changes: 64 additions & 7 deletions tests/test_forest_pages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
from datetime import datetime
from datetime import date, datetime
from unittest.mock import MagicMock, patch

import dateutil
Expand Down Expand Up @@ -48,12 +48,69 @@ def test(self):


# TODO: make a test for whether a tsudy admin can hit this endpoint? I think there's a bug in the authenticate_admin decorator that allows that.....
# class TestForestDownloadTaskLog(ResearcherSessionTest):
# ENDPOINT_NAME = "forest_pages.download_task_log"
#
# def test(self):
# # this streams a csv of tasks on a tsudy
# self.smart_get()
class TestForestDownloadTaskLog(ResearcherSessionTest):
ENDPOINT_NAME = "forest_pages.download_task_log"
REDIRECT_ENDPOINT_NAME = ResearcherSessionTest.IGNORE_THIS_ENDPOINT

header_row = "Created On,Data Date End,Data Date Start,Id,Forest Tree,"\
"Forest Output Exists,Patient Id,Process Start Time,Process Download End Time,"\
"Process End Time,Status,Total File Size\r\n"

def test_no_relation_no_site_admin_no_worky(self):
# this streams a csv of tasks on a study
resp = self.smart_get_status_code(403, self.session_study.id)
self.assertEqual(resp.content, b"")

def test_researcher_no_worky(self):
self.set_session_study_relation(ResearcherRole.researcher)
resp = self.smart_get_status_code(403, self.session_study.id)
self.assertEqual(resp.content, b"")

def test_study_admin_no_worky(self):
self.set_session_study_relation(ResearcherRole.study_admin)
resp = self.smart_get_status_code(403, self.session_study.id)
self.assertEqual(resp.content, b"")

def test_site_admin_can(self):
self.set_session_study_relation(ResearcherRole.site_admin)
resp = self.smart_get_status_code(200, self.session_study.id)
self.assertEqual(b"".join(resp.streaming_content).decode(), self.header_row)

def test_single_forest_task(self):
self.set_session_study_relation(ResearcherRole.site_admin)
self.default_forest_task.update(
created_on=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC),
data_date_start=date(2020, 1, 1),
data_date_end=date(2020, 1, 5),
forest_tree=ForestTree.jasmine,
forest_output_exists=True,
process_start_time=datetime(2020, 1, 1, tzinfo=dateutil.tz.UTC), # midnight
process_download_end_time=datetime(2020, 1, 1, 1, tzinfo=dateutil.tz.UTC), # 1am
process_end_time=datetime(2020, 1, 1, 2, tzinfo=dateutil.tz.UTC), # 2am
status=ForestTaskStatus.success,
total_file_size=123456789,
)

resp = self.smart_get_status_code(200, self.session_study.id)
content = b"".join(resp.streaming_content).decode()
self.assertEqual(content.count("\r\n"), 2)
line = content.splitlines()[1]

# 12 columns, 11 commas
self.assertEqual(line.count(","), 11)
items = line.split(",")
self.assertEqual(items[0], "2020-01-01 00:00 (UTC)") # Created On
self.assertEqual(items[1], "2020-01-05") # Data Date End
self.assertEqual(items[2], "2020-01-01") # Data Date Start
self.assertEqual(items[3], str(self.default_forest_task.external_id)) # duh
self.assertEqual(items[4], "Jasmine") # Forest Tree
self.assertEqual(items[5], "Yes") # Forest Output Exists
self.assertEqual(items[6], "patient1") # Patient Id
self.assertEqual(items[7], "2020-01-01 00:00 (UTC)") # Process Start Time
self.assertEqual(items[8], "2020-01-01 01:00 (UTC)") # Process Download End Time
self.assertEqual(items[9], "2020-01-01 02:00 (UTC)") # Process End Time
self.assertEqual(items[10], "success") # Status
self.assertEqual(items[11], "123456789") # Total File Size


class TestForestCancelTask(ResearcherSessionTest):
Expand Down
2 changes: 1 addition & 1 deletion urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def path(
path('studies/<str:study_id>/forest/progress', forest_pages.forest_tasks_progress, login_redirect=SAFE)
path("studies/<str:study_id>/forest/tasks/<str:forest_task_external_id>/cancel", forest_pages.cancel_task)
path('studies/<str:study_id>/forest/tasks', forest_pages.task_log, login_redirect=SAFE)
path('forest/tasks/download', forest_pages.download_task_log)
path('studies/<str:study_id>/forest/tasks/download', forest_pages.download_task_log)
path(
"studies/<str:study_id>/forest/tasks/<str:forest_task_external_id>/download_output",
forest_pages.download_output_data
Expand Down

0 comments on commit d91bae7

Please sign in to comment.