From 4dcbd75301545b2689f9079e5f6f8a30accfad31 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Fri, 25 Feb 2022 14:17:48 -0500 Subject: [PATCH] Add next iteration to collect enrollment data This PR provides alternate execution to create the same data as before * See the module docstring in `figures.pipeline.enrollment_metrics_next` for details * This PR contains a new module and tests to support the new module --- figures/pipeline/enrollment_metrics_next.py | 69 +++++++++++ .../pipeline/test_enrollment_metrics_next.py | 109 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 figures/pipeline/enrollment_metrics_next.py create mode 100644 tests/pipeline/test_enrollment_metrics_next.py diff --git a/figures/pipeline/enrollment_metrics_next.py b/figures/pipeline/enrollment_metrics_next.py new file mode 100644 index 000000000..88c7397cd --- /dev/null +++ b/figures/pipeline/enrollment_metrics_next.py @@ -0,0 +1,69 @@ +"""This module updates Figures enrollment data and calculates aggregate progress + +* It updates `EnrollmentData` and `LearnerCourseGradeMetrics` records +* It calculate course progress from EnrollmentData records + +This generates the same metrics as the original enrollment_metrics modules, +but does it differently. + +## How it differs from the previous version + +This module improves on the existing enrollment metrics collection module, +`figures.pipeline.enrollment_metrics` + +* It separates the activities to create and update Figures per-enrollment data + collected +* This separation lets Figures run metrics in distinct stages + * First, collect per-enrollment data + * Second, aggregate metrics based on collected data +* This provides a workflow that is easier to resume if interrupted +* This provides workflow that is simpler to debug +* This simplifies and speeds up aggregate progress metrics, collapsing complex + code into a single Django queryset aggregation +* This update lays groundwork for further metrics improvements and enhancements + such as metrics on subsets of learners in a course or progress of subsets of + learners across courses +* It does not make your whites whiter or your colors more bold + +# Developer Notes + +This module provides + +""" +from django.db.models import Avg +from figures.course import Course +from figures.helpers import utc_yesterday +from figures.models import EnrollmentData +from figures.sites import UnlinkedCourseError + + +def update_enrollment_data_for_course(course_id): + """Updates Figures per-enrollment data for enrollments in the course + Checks for and creates new `LearnerCourseGradeMetrics` records and updates + `EnrollmentData` records + + Return results are a list of the results returned by `update_enrollment_data` + """ + date_for = utc_yesterday() + the_course = Course(course_id) + if not the_course.site: + raise UnlinkedCourseError('No site found for course "{}"'.format(course_id)) + + # Any updated student module records? if so, then get the unique enrollments + # for each enrollment, check if LGCM is out of date or up to date + active_enrollments = the_course.enrollments_active_on_date(date_for) + return [EnrollmentData.objects.update_metrics(the_course.site, ce) + for ce in active_enrollments] + + +def calculate_course_progress_percent(course_id): + """Return average progress percentage for all enrollments in the course + """ + results = EnrollmentData.objects.filter(course_id=str(course_id)).aggregate( + average_progress=Avg('progress_percent')) + + # This is a bit of a hack. When we overhaul progress data, we should really + # have None for progress if there's no data. But check how SQL AVG performs + if results['average_progress'] is None: + results['average_progress'] = 0.0 + return results diff --git a/tests/pipeline/test_enrollment_metrics_next.py b/tests/pipeline/test_enrollment_metrics_next.py new file mode 100644 index 000000000..8e5bee9b5 --- /dev/null +++ b/tests/pipeline/test_enrollment_metrics_next.py @@ -0,0 +1,109 @@ +"""Test next iteration to collect enrollment metrics + +These tests exercies the module, `figures.pipeline.enrollment_metrics_next`. + +See the module docstring for details. +""" +import pytest +from mock import patch + +from figures.compat import CourseEnrollment +from figures.pipeline.enrollment_metrics_next import ( + update_enrollment_data_for_course, + calculate_course_progress_percent, +) +from figures.sites import UnlinkedCourseError + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + EnrollmentDataFactory, +) + + +@pytest.mark.django_db +class TestUpdateMetrics(object): + """Tests `update_enrollment_data_for_course` + + Since `figures.models.EnrollmentDataManager.update_metrics` is tested in + `tests/models/test_enrollment_data_update_metrics.py`, we can mock this + method in our tests here. + """ + + @pytest.fixture(autouse=True) + def setup(self, db, settings): + self.course_overview = CourseOverviewFactory() + + def test_course_has_no_enrollments(self): + """We have a new course with no enrollments + """ + result = update_enrollment_data_for_course(self.course_overview.id) + assert result == [] + + def test_course_has_enrollments_but_no_active_for_yesterday(self): + """We have enrollments, but none were active yesterday + """ + [CourseEnrollmentFactory(course_id=self.course_overview.id) + for _ in range(2)] + result = update_enrollment_data_for_course(self.course_overview.id) + assert result == [] + + def test_course_has_active_enrollments_for_yesterday(self): + """We have enrollments who were active yesterday + """ + expected_ce = [CourseEnrollmentFactory(course_id=self.course_overview.id) + for _ in range(2)] + ce = CourseEnrollment.objects.filter(course_id=self.course_overview.id) + + def mock_update_metrics(site, ce): + return ce + + with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class: + with patch('figures.pipeline.enrollment_metrics_next.EnrollmentData.objects') as edm: + course_class.return_value.enrollments_active_on_date.return_value = ce + edm.update_metrics = mock_update_metrics + result = update_enrollment_data_for_course(self.course_overview.id) + assert set(result) == set(expected_ce) + + def test_course_is_unlinked(self): + """Function should raise `UnlinkedCourseError` if there's not a site match + + For Tahoe's multisite implementation, this can happen if the course's + organization is not linked to a site + For standalone sites, this should never happen + + To learn more, see the `figures.sites.get_site_for_course` function. + """ + with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class: + course_class.return_value.site = None + with pytest.raises(UnlinkedCourseError) as excinfo: + update_enrollment_data_for_course(self.course_overview.id) + expected_msg = 'No site found for course "{}"'.format(str(self.course_overview.id)) + assert str(excinfo.value) == expected_msg + + +@pytest.mark.django_db +class TestCalculateProgress(object): + """Tests `calculate_course_progress_percent` + """ + @pytest.fixture(autouse=True) + def setup(self, db, settings): + self.course_overview = CourseOverviewFactory() + + def test_calc_course_progress_percent_empty(self): + """The course doesn't have any EnrollmentData records + """ + results = calculate_course_progress_percent(self.course_overview.id) + assert results['average_progress'] == pytest.approx(0.0) + + def test_calc_course_progress_percent(self): + """The course has EnrollmentData records + """ + some_percentages = [0.0, 25.0, 50.0] + expected_average = sum(some_percentages)/len(some_percentages) + [ + EnrollmentDataFactory(course_id=str(self.course_overview.id), progress_percent=pp) + for pp in some_percentages + ] + results = calculate_course_progress_percent(self.course_overview.id) + assert results['average_progress'] == pytest.approx(expected_average)