Skip to content

Commit

Permalink
Add stale course enrollment handling to pipeline
Browse files Browse the repository at this point in the history
What's included here is a generator to yield CourseEnrollment records
that need to have their EnrollmentData records updated

If we install Figures on a site that's already been running or we  miss a day
or more in processing, then the daily processing will not pick up on all
"stale enrollments", enrollments that Figures EnrollmentData record updated.

This is because Figures daily metrics jobs only look at "yesterday" in
order to work around the limitations of StudentModule.

The real logic is in `figures.enrollment.is_enrollment_data_out_of_date`
(Which is still pretty simple)
  • Loading branch information
johnbaldwin committed Mar 15, 2022
1 parent 2566818 commit ded1349
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
36 changes: 36 additions & 0 deletions figures/pipeline/enrollment_metrics_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"""
from django.db.models import Avg
from figures.course import Course
from figures.enrollment import is_enrollment_data_out_of_date
from figures.helpers import utc_yesterday
from figures.models import EnrollmentData
from figures.sites import UnlinkedCourseError
Expand All @@ -42,6 +43,8 @@ def update_enrollment_data_for_course(course_id):
`EnrollmentData` records
Return results are a list of the results returned by `update_enrollment_data`
TODO: Add check if the course data actually exists in Mongo
"""
date_for = utc_yesterday()
the_course = Course(course_id)
Expand All @@ -55,6 +58,39 @@ def update_enrollment_data_for_course(course_id):
for ce in active_enrollments]


def stale_course_enrollments(course_id):
"""Find missing/out of date EnrollmentData records for the course
The `EnrollmentData` model holds the most recent data about the enrollment.
This model is not created until after two events happen in sequence
1. The enrollment has activity that creates one or more`StudentModule` records
2. Figures daily metrics ran after the `StudentModule` records were created
OR
Figures `EnrollmentData` backfill was run
Maybe... We might want to add query methods to EnrollmentDataManager to
get out of date `EnrollmentData` records for a course.
NOTE: Naming this function was a bit of a challenge. What I used before:
"update_enrollment_data_for_course"
"enrollments_with_out_of_date_enrollment_data"
"ce_needs_enrollment_data_update"
Since we are in the context of Figures and figures doesn't modifity the platform,
we should be save with saying "stale_course_enrollments" for brevity
"""
course = Course(course_id)

# If we have a course with no activity, nothing can be out of date.
# As such if the course has no student modules, the loop won't execute

# We are only interested in enrollments with activity
for enrollment in course.enrollments_with_student_modules():
if is_enrollment_data_out_of_date(enrollment):
yield enrollment


def calculate_course_progress(course_id):
"""Return average progress percentage for all enrollments in the course
"""
Expand Down
54 changes: 54 additions & 0 deletions tests/pipeline/test_enrollment_metrics_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from figures.compat import CourseEnrollment
from figures.pipeline.enrollment_metrics_next import (
update_enrollment_data_for_course,
stale_course_enrollments,
calculate_course_progress,
)
from figures.sites import UnlinkedCourseError
Expand All @@ -20,6 +21,7 @@
CourseEnrollmentFactory,
CourseOverviewFactory,
EnrollmentDataFactory,
StudentModuleFactory,
)


Expand Down Expand Up @@ -91,6 +93,58 @@ def test_course_is_unlinked(self, monkeypatch):
assert str(excinfo.value) == expected_msg


@pytest.mark.django_db
class TestStaleCourseEnrollments(object):
"""Tests `stale_course_enrollments`
"""

@pytest.fixture(autouse=True)
def setup(self, db, settings):
self.course_overview = CourseOverviewFactory()
self.site = Site.objects.first()

def test_empty_course_list_comprehension(self):
found = [rec for rec in stale_course_enrollments(self.course_overview.id)]
assert found == []

def test_emtpy_course_next(self):
# import pdb; pdb.set_trace()

with pytest.raises(StopIteration):
rec = next(stale_course_enrollments(self.course_overview.id))


def test_no_update_needed(self, monkeypatch):
"""Call to function yields no results
This test and the next one, 'test_needs_update' do "all or nothing" for
returning boolean values from `is_enrollment_out_of_date`. That's ok,
because there's test coverage specifically for this function in `test_enrollments.py`
So as long as this function has unit tests for its possible states, we
don't need to do it here.
"""
# Need to mock/monkeypatch. Either code within 'is_enrollment_data_out_of_date'
# or the function itself.
# See test_enrollment.py::TestIsEnrollmentDataOutOfDate

ce_recs = [CourseEnrollmentFactory(course_id=self.course_overview.id) for _ in range(2)]
[StudentModuleFactory.from_course_enrollment(ce) for ce in ce_recs]
monkeypatch.setattr('figures.pipeline.enrollment_metrics_next.is_enrollment_data_out_of_date', lambda val: False)
assert not [rec for rec in stale_course_enrollments(self.course_overview.id)]

def test_needs_update(self, monkeypatch):
"""Call to function yields two results
We create three enrollments and leave one enrollment without any StudentModule
"""
ce = [CourseEnrollmentFactory(course_id=self.course_overview.id) for _ in range(3)]
StudentModuleFactory.from_course_enrollment(ce[0])
StudentModuleFactory.from_course_enrollment(ce[1])
monkeypatch.setattr('figures.pipeline.enrollment_metrics_next.is_enrollment_data_out_of_date', lambda val: True)
found = [rec for rec in stale_course_enrollments(self.course_overview.id)]
assert set(found) == set(ce[:2])


@pytest.mark.django_db
class TestCalculateProgress(object):
"""Tests `calculate_course_progress`
Expand Down

0 comments on commit ded1349

Please sign in to comment.