Skip to content

Commit

Permalink
Fixed figures.tasks.backfill_enrollment_data_for_course
Browse files Browse the repository at this point in the history
Now it looks for stale course enrollments and for each stale course
enrollment found, updates that enrollment's metrics. Prior to this
commit, it was just calling the normal daily enrolment data update
function.

* Updated unit tests
  • Loading branch information
johnbaldwin committed Mar 15, 2022
1 parent ded1349 commit 3511f2b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
42 changes: 35 additions & 7 deletions figures/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
from celery.utils.log import get_task_logger

from figures.compat import CourseEnrollment, CourseOverview
from figures.course import Course
from figures.helpers import as_course_key, as_date, is_past_date
from figures.log import log_exec_time
from figures.models import EnrollmentData
from figures.sites import get_sites, get_sites_by_id, site_course_ids

from figures.pipeline.backfill import backfill_enrollment_data_for_site
Expand All @@ -29,7 +31,10 @@
from figures.pipeline.mau_pipeline import collect_course_mau
from figures.pipeline.helpers import DateForCannotBeFutureError
from figures.pipeline.site_monthly_metrics import fill_last_month as fill_last_smm_month
from figures.pipeline.enrollment_metrics_next import update_enrollment_data_for_course
from figures.pipeline.enrollment_metrics_next import (
update_enrollment_data_for_course,
stale_course_enrollments,
)


logger = get_task_logger(__name__)
Expand Down Expand Up @@ -324,15 +329,38 @@ def populate_daily_metrics_next(site_id=None, force_update=False):

@shared_task
def backfill_enrollment_data_for_course(course_id):
"""Create or update EnrollmentData records for the course
"""Update EnrollmentData records for activity before "yesterday"
This is a simple wrapper to run the enrollment update as a Celery task
This task function is to get `EnrollmentData` records up to date. This is
needed under at least the following conditions
We usually run this task through the Figures Django management command,
`backfill_figures_enrollment_data`
A. Figures is being installed/enabled on an existing Open edX deployment
B. The daily pipeline was stopped or failed to run for longer than a day
Why this function is needed is because it costs too much time to query
an enrollment's `StudentModule` record to find the latest date it was
modified. For the regular day to day pipeline, Figures can perform a **much**
fasater query to find if `StudentModule` records exist for the specific day
we gather our daily metrics. This is always the previous calendar day given
UTC time. The following function is what updates `EnrollmentData` on the
daily job:
```
figures.pipeline.enrollment_metrics_next.update_enrollment_data_for_course
```
There is a Figures Django management command to run this task:
```
backfill_figures_enrollment_data
```
"""
# results are a list of (object, created_flag) tuples
updated = update_enrollment_data_for_course(course_id)
course = Course(course_id)
updated = []
for enrollment in stale_course_enrollments(course_id):
# `update_metrics` results are a (object, created_flag) tuple
updated.append(EnrollmentData.objects.update_metrics(course.site, enrollment))

msg = ('figures.tasks.backfill_enrollment_data_for_course "{course_id}".'
' Updated {edrec_count} enrollment data records.')
logger.info(msg.format(course_id=course_id, edrec_count=len(updated)))
Expand Down
11 changes: 7 additions & 4 deletions tests/tasks/test_backfill_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
from __future__ import absolute_import
import logging
import mock
import pytest

from figures.tasks import backfill_enrollment_data_for_course
Expand All @@ -28,7 +29,7 @@ def test_backfill_enrollment_data_for_course_no_update(self, transactional_db,
# The function returns a list of tuples with (object, created)
# ed_recs = [(EnrollmentDataFactory(), False) for _ in range(2)]
caplog.set_level(logging.INFO)
func_path = 'figures.tasks.update_enrollment_data_for_course'
func_path = 'figures.tasks.stale_course_enrollments'
monkeypatch.setattr(func_path, lambda course_id: [])
backfill_enrollment_data_for_course(course_id)
assert len(caplog.records) == 1
Expand All @@ -46,9 +47,11 @@ def test_backfill_enrollment_data_for_course_with_updates(self, transactional_db
# The function returns a list of tuples with (object, created)
ed_recs = [(EnrollmentDataFactory(), False) for _ in range(2)]
caplog.set_level(logging.INFO)
func_path = 'figures.tasks.update_enrollment_data_for_course'
monkeypatch.setattr(func_path, lambda course_id: ed_recs)
backfill_enrollment_data_for_course(course_id)
monkeypatch.setattr('figures.tasks.stale_course_enrollments', lambda course_id: ed_recs)
with mock.patch('figures.tasks.EnrollmentData') as mock_ed_class:
mock_ed_class.return_value.objects.update_metrics.return_value = ed_recs
backfill_enrollment_data_for_course(course_id)

assert len(caplog.records) == 1
assert caplog.records[0].message == self.expected_message_template.format(
course_id=course_id,
Expand Down

0 comments on commit 3511f2b

Please sign in to comment.