From ded1349ac2ba85a131690b7c37e96c3b1cd796a1 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 15 Mar 2022 11:53:43 -0400 Subject: [PATCH] Add stale course enrollment handling to pipeline What's included here is a generator to yield CourseEnrollment records that need to have their EnrollmentData records updated If we install Figures on a site that's already been running or we miss a day or more in processing, then the daily processing will not pick up on all "stale enrollments", enrollments that Figures EnrollmentData record updated. This is because Figures daily metrics jobs only look at "yesterday" in order to work around the limitations of StudentModule. The real logic is in `figures.enrollment.is_enrollment_data_out_of_date` (Which is still pretty simple) --- figures/pipeline/enrollment_metrics_next.py | 36 +++++++++++++ .../pipeline/test_enrollment_metrics_next.py | 54 +++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/figures/pipeline/enrollment_metrics_next.py b/figures/pipeline/enrollment_metrics_next.py index 7b00fa07..b000ae03 100644 --- a/figures/pipeline/enrollment_metrics_next.py +++ b/figures/pipeline/enrollment_metrics_next.py @@ -31,6 +31,7 @@ """ from django.db.models import Avg from figures.course import Course +from figures.enrollment import is_enrollment_data_out_of_date from figures.helpers import utc_yesterday from figures.models import EnrollmentData from figures.sites import UnlinkedCourseError @@ -42,6 +43,8 @@ def update_enrollment_data_for_course(course_id): `EnrollmentData` records Return results are a list of the results returned by `update_enrollment_data` + + TODO: Add check if the course data actually exists in Mongo """ date_for = utc_yesterday() the_course = Course(course_id) @@ -55,6 +58,39 @@ def update_enrollment_data_for_course(course_id): for ce in active_enrollments] +def stale_course_enrollments(course_id): + """Find missing/out of date EnrollmentData records for the course + + The `EnrollmentData` model holds the most recent data about the enrollment. + This model is not created until after two events happen in sequence + 1. The enrollment has activity that creates one or more`StudentModule` records + 2. Figures daily metrics ran after the `StudentModule` records were created + OR + Figures `EnrollmentData` backfill was run + + Maybe... We might want to add query methods to EnrollmentDataManager to + get out of date `EnrollmentData` records for a course. + + NOTE: Naming this function was a bit of a challenge. What I used before: + + "update_enrollment_data_for_course" + "enrollments_with_out_of_date_enrollment_data" + "ce_needs_enrollment_data_update" + + Since we are in the context of Figures and figures doesn't modifity the platform, + we should be save with saying "stale_course_enrollments" for brevity + """ + course = Course(course_id) + + # If we have a course with no activity, nothing can be out of date. + # As such if the course has no student modules, the loop won't execute + + # We are only interested in enrollments with activity + for enrollment in course.enrollments_with_student_modules(): + if is_enrollment_data_out_of_date(enrollment): + yield enrollment + + def calculate_course_progress(course_id): """Return average progress percentage for all enrollments in the course """ diff --git a/tests/pipeline/test_enrollment_metrics_next.py b/tests/pipeline/test_enrollment_metrics_next.py index b48fe44a..d606cd78 100644 --- a/tests/pipeline/test_enrollment_metrics_next.py +++ b/tests/pipeline/test_enrollment_metrics_next.py @@ -12,6 +12,7 @@ from figures.compat import CourseEnrollment from figures.pipeline.enrollment_metrics_next import ( update_enrollment_data_for_course, + stale_course_enrollments, calculate_course_progress, ) from figures.sites import UnlinkedCourseError @@ -20,6 +21,7 @@ CourseEnrollmentFactory, CourseOverviewFactory, EnrollmentDataFactory, + StudentModuleFactory, ) @@ -91,6 +93,58 @@ def test_course_is_unlinked(self, monkeypatch): assert str(excinfo.value) == expected_msg +@pytest.mark.django_db +class TestStaleCourseEnrollments(object): + """Tests `stale_course_enrollments` + """ + + @pytest.fixture(autouse=True) + def setup(self, db, settings): + self.course_overview = CourseOverviewFactory() + self.site = Site.objects.first() + + def test_empty_course_list_comprehension(self): + found = [rec for rec in stale_course_enrollments(self.course_overview.id)] + assert found == [] + + def test_emtpy_course_next(self): + # import pdb; pdb.set_trace() + + with pytest.raises(StopIteration): + rec = next(stale_course_enrollments(self.course_overview.id)) + + + def test_no_update_needed(self, monkeypatch): + """Call to function yields no results + + This test and the next one, 'test_needs_update' do "all or nothing" for + returning boolean values from `is_enrollment_out_of_date`. That's ok, + because there's test coverage specifically for this function in `test_enrollments.py` + So as long as this function has unit tests for its possible states, we + don't need to do it here. + """ + # Need to mock/monkeypatch. Either code within 'is_enrollment_data_out_of_date' + # or the function itself. + # See test_enrollment.py::TestIsEnrollmentDataOutOfDate + + ce_recs = [CourseEnrollmentFactory(course_id=self.course_overview.id) for _ in range(2)] + [StudentModuleFactory.from_course_enrollment(ce) for ce in ce_recs] + monkeypatch.setattr('figures.pipeline.enrollment_metrics_next.is_enrollment_data_out_of_date', lambda val: False) + assert not [rec for rec in stale_course_enrollments(self.course_overview.id)] + + def test_needs_update(self, monkeypatch): + """Call to function yields two results + + We create three enrollments and leave one enrollment without any StudentModule + """ + ce = [CourseEnrollmentFactory(course_id=self.course_overview.id) for _ in range(3)] + StudentModuleFactory.from_course_enrollment(ce[0]) + StudentModuleFactory.from_course_enrollment(ce[1]) + monkeypatch.setattr('figures.pipeline.enrollment_metrics_next.is_enrollment_data_out_of_date', lambda val: True) + found = [rec for rec in stale_course_enrollments(self.course_overview.id)] + assert set(found) == set(ce[:2]) + + @pytest.mark.django_db class TestCalculateProgress(object): """Tests `calculate_course_progress`