From 2ad0f40e66ada4f145236456ca98ee1394c79445 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 12 May 2020 20:13:29 -0400 Subject: [PATCH 1/4] New code: figures.pipeline.enrollment_metrics This module processes data to provide course enrollment metrics. Course enrollment is a unique learner/course pair. See the source code in this commit for documentation --- figures/pipeline/enrollment_metrics.py | 272 +++++++++++++++++++ tests/pipeline/test_enrollment_metrics.py | 312 ++++++++++++++++++++++ 2 files changed, 584 insertions(+) create mode 100644 figures/pipeline/enrollment_metrics.py create mode 100644 tests/pipeline/test_enrollment_metrics.py diff --git a/figures/pipeline/enrollment_metrics.py b/figures/pipeline/enrollment_metrics.py new file mode 100644 index 00000000..1e2479cf --- /dev/null +++ b/figures/pipeline/enrollment_metrics.py @@ -0,0 +1,272 @@ +"""This module collects metrics for a course enrollment (learner/course pair) + +# Overview + +This is new code for Figures 0.3.10. Its purpose is to address pipeline +performance by reducing calls to `CourseGradeFactory().read. + +Calls are reduced by checking if the learner has a `StudentModule` record +modified after the most recent `LearnerCourseGradeMetrics` record `date_for` +field + + +# Logic + +``` +for each learner+course + get newest lgcm record + get newest sm record + + if not lcgm and not sm + # Learner has not started course + skip collection for this learner+course + else if lcgm and sm + # we have saved history so we can check to update + if lcgm.modified is older than sm.modified: + collect new course data + return new metrics object created + + else if not lcgm and sm + # Learner started course after last collection action + collect new course data + return new metrics object created + + else # lcgm and not sm + # We had collected data previously, but the upstream went away + # This could happen with data cleansing, like GPPR requests + # Likely other causes, too + log condition to figures PipelineError log + return None +``` + +# NOTE: Change 'lcgm' in this file to either 'enrollment' or 'lp' when an + abbreviation is acceptible. This is to get this module forward ready for + reworking `LearnerCourseGradeMetrics` + +""" + +from datetime import datetime +from decimal import Decimal + +from django.utils.timezone import utc + +from figures.metrics import LearnerCourseGrades +from figures.models import LearnerCourseGradeMetrics, PipelineError +from figures.pipeline.logger import log_error +from figures.sites import ( + get_site_for_course, + course_enrollments_for_course, + student_modules_for_course_enrollment, + UnlinkedCourseError, + ) + + +def bulk_calculate_course_progress_data(course_id, date_for=None): + """Calculates the average progress for a set of course enrollments + + How it works + 1. collects progress percent for each course enrollment + 1.1. If up to date enrollment metrics record already exists, use that + 2. calculate and return the average of these enrollments + + TODO: Update to filter on active users + + Questions: + - What makes a learner an active learner? + + """ + progress_percentages = [] + + if not date_for: + date_for = datetime.utcnow().replace(tzinfo=utc).date() + + for ce in course_enrollments_for_course(course_id): + metrics = collect_metrics_for_enrollment(course_enrollment=ce, + date_for=date_for) + if metrics: + progress_percentages.append(metrics.progress_percent) + else: + # Log this for troubleshooting + error_data = dict( + msg=('Unable to create or retrieve enrollment metrics ' + + 'for user {} and course {}'.format(ce.user.username, + str(ce.course_id))) + ) + log_error( + error_data=error_data, + error_type=PipelineError.COURSE_DATA, + user=ce.user, + course_id=str(course_id) + ) + + return dict( + average_progress=calculate_average_progress(progress_percentages), + ) + + +def calculate_average_progress(progress_percentages): + """Calcuates average progress from a list of values + + TODO: How do we want to handle malformed data? + """ + if progress_percentages: + average_progress = float(sum(progress_percentages)) / float(len(progress_percentages)) + average_progress = float(Decimal(average_progress).quantize(Decimal('.00'))) + else: + average_progress = 0.0 + return average_progress + + +def collect_metrics_for_enrollment(course_enrollment, date_for=None, **_kwargs): + """Collect metrics for enrollment (learner+course) + + This function performs course enrollment merics collection for the given + unique enrollment identified by the learner and course. It is initial code + in refactoring the pipeline for learner progress + + Important models here are: + * StudentModule (SM) + * LearnerCourseGradeMetrics (LCGM) + * CourseEnrollment (CE) + + # Workflow + + * Get date of most recent StudentModule record, for the enrollment record + * Get date of most recent LearnerCourseGradeMetrics record, LCGM, for the + enrollment + * if LCGM does not exist or SM.modified is more recent than LCGM.modified then + * collect course_data via CourseEnrollmentFactory().read(...) + * create new LearnerCourseGradesMetics record with the updated data + * retain reference to tne new record + * else return the existing LearnerCourseGradesMetics record if it exists + * else return None if no existing LearnerCourseGradesMetics record + """ + if not date_for: + date_for = datetime.utcnow().replace(tzinfo=utc).date() + site = get_site_for_course(course_enrollment.course_id) + if not site: + raise UnlinkedCourseError('No site found for course "{}"'.format( + course_enrollment.course_id)) + + most_recent_sm = student_modules_for_course_enrollment( + course_enrollment).order_by('modified').last() + + most_recent_lcgm = LearnerCourseGradeMetrics.objects.filter( + user=course_enrollment.user, + course_id=str(course_enrollment.course_id)).order_by('date_for').last() + + if _enrollment_metrics_needs_update(most_recent_lcgm, most_recent_sm): + progress_data = _collect_progress_data(most_recent_sm) + metrics = _add_enrollment_metrics_record(site=site, + course_enrollment=course_enrollment, + progress_data=progress_data, + date_for=date_for) + else: + metrics = most_recent_lcgm + return metrics + + +def _enrollment_metrics_needs_update(most_recent_lcgm, most_recent_sm): + """Returns True if we need to update our learner progress, False otherwise + + See the #Logic section in this module's docstring + + If we need to check that the records match the same user and course, we + can do something like: + + ``` + class RecordMismatchError(Exception): + pass + + + def rec_matches_user_and_course(lcgm, sm): + return lcgm.user == sm.student and lcgm.course_id == sm.course_id + ``` + + And in this function add the check when we have both records: + + ``` + if not rec_matches_user_and_course(most_recent_lcgm, most_recent_sm): + rec_msg = '{}(user_id={}, course_id="{}"' + msg1 = rec_msg.format('lcgm', + most_recent_lcgm.user.id, + most_recent_lcgm.course_id) + msg2 = rec_msg.format('sm', + most_recent_sm.student.id, + most_recent_sm.course_id) + raise RecordMismatchError(msg1 + ':' + msg2) + ``` + """ + # First assume we need to update the enrollment metrics record + needs_update = True + if not most_recent_lcgm and not most_recent_sm: + # Learner has not started coursework + needs_update = False + elif most_recent_lcgm and most_recent_sm: + # Learner has past course activity + needs_update = most_recent_lcgm.date_for < most_recent_sm.modified.date() + elif not most_recent_lcgm and most_recent_sm: + # No LCGM recs, so Learner started on course after last collection + # This could also happen + # If this is the irst time collection is run for the learner+course + # if an unhandled error prevents LCGM from saving + # if the learner's LCGM recs were deleted + needs_update = True + elif most_recent_lcgm and not most_recent_sm: + # This shouldn't happen. We log this state as an error + + # using 'COURSE_DATA' for the pipeline error type. Although we should + # revisit logging and error tracking in Figures to define a clear + # approach that has clear an intuitive contexts for the logging event + # + # We neede to decide: + # + # 1. Is this always an error state? Could this condition happen naturally? + # + # 2. Which error type should be created and what is the most applicable + # context + # Candidates + # - Enrollment (learner+course) context + # - Data state context - Why would we have an LCGM + # + # So we hold off updating PipelineError error choises initially until + # we can think carefully on how we tag pipeline errors + # + error_data = dict( + msg='LearnerCourseGradeMetrics record exists without StudentModule', + ) + log_error( + error_data=error_data, + error_type=PipelineError.COURSE_DATA, + user=most_recent_lcgm.user, + course_id=str(most_recent_lcgm.course_id) + ) + needs_update = False + return needs_update + + +def _add_enrollment_metrics_record(site, course_enrollment, progress_data, date_for): + """Convenience function to save progress metrics to Figures + """ + return LearnerCourseGradeMetrics.objects.create( + site=site, + user=course_enrollment.user, + course_id=str(course_enrollment.course_id), + date_for=date_for, + points_possible=progress_data['points_possible'], + points_earned=progress_data['points_earned'], + sections_worked=progress_data['sections_worked'], + sections_possible=progress_data['count'] + ) + + +def _collect_progress_data(student_module): + """Get new progress data for the learner/course + + Uses `figures.metrics.LearnerCourseGrades` to retrieve progress data via + `CourseGradeFactory().read(...)` and calculate progress percentage + """ + lcg = LearnerCourseGrades(user_id=student_module.student_id, + course_id=student_module.course_id) + course_progress_details = lcg.progress() + return course_progress_details diff --git a/tests/pipeline/test_enrollment_metrics.py b/tests/pipeline/test_enrollment_metrics.py new file mode 100644 index 00000000..199ab11c --- /dev/null +++ b/tests/pipeline/test_enrollment_metrics.py @@ -0,0 +1,312 @@ + +from datetime import datetime +from dateutil.relativedelta import relativedelta +import mock +import pytest + +from django.utils.timezone import utc + +from courseware.models import StudentModule + +from figures.models import LearnerCourseGradeMetrics + +from figures.pipeline.enrollment_metrics import ( + calculate_average_progress, + bulk_calculate_course_progress_data, + collect_metrics_for_enrollment, + _enrollment_metrics_needs_update, + _add_enrollment_metrics_record, + _collect_progress_data, +) + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + LearnerCourseGradeMetricsFactory, + SiteFactory, + StudentModuleFactory, +) + + +@pytest.mark.django_db +def test_bulk_calculate_course_progress_data_happy_path(db, monkeypatch): + """Tests 'bulk_calculate_course_progress_data' function + + The function under test iterates over a set of course enrollment records, + So we create a couple of records to iterate over and mock the collect + function + """ + course_overview = CourseOverviewFactory() + course_enrollments = [CourseEnrollmentFactory( + course_id=course_overview.id) for i in range(2)] + mapping = {ce.course_id: LearnerCourseGradeMetricsFactory( + course_id=str(ce.course_id), + user=ce.user, + sections_worked=1, + sections_possible=2) for ce in course_enrollments + } + + def mock_metrics(course_enrollment, **_kwargs): + return mapping[course_enrollment.course_id] + + monkeypatch.setattr('figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment', + mock_metrics) + data = bulk_calculate_course_progress_data(course_overview.id) + assert data['average_progress'] == 0.5 + + +@pytest.mark.parametrize('progress_percentages, expected_result', [ + (None, 0.0), + ([], 0.0), + ([0, 0.25, 0.5, 0.75, 1.0], 0.5), +]) +def test_calculate_average_progress_happy_path(progress_percentages, expected_result): + """Tests 'calculate_average_progress_happy_path' function + + The function under test is a simple average calculation with formatting + """ + average_progress = calculate_average_progress(progress_percentages) + assert average_progress == expected_result + + +@pytest.mark.django_db +class TestCollectMetricsForEnrollment(object): + """Tests 'collect_metrics_for_enrollment' function + + The function under test does the following: + + Given a course enrollment, + 1. Get the most recent StudentModule (SM) record, if it exists + 2. Get the most recent LearnerCourseGradeMetrics (LCGM) record, if it exists + 3. Decide if we need to add a new LCGM record for the enrollment or use + the most recent one, if it exists + 4. Collect new data from the platform if we need to update the enrollment's + metrics + 5. Add a new LCGM record if we are collecting new metrics data + 6. Return the new LCGM record if created, else return the existing record + found + """ + @pytest.fixture(autouse=True) + def setup(self, db): + self.date_1 = datetime(2020, 2, 2, tzinfo=utc) + self.date_2 = self.date_1 + relativedelta(months=1) # future of date_1 + self.course_enrollment = CourseEnrollmentFactory() + self.student_modules = [ + StudentModuleFactory(student=self.course_enrollment.user, + course_id=self.course_enrollment.course_id, + modified=self.date_1), + StudentModuleFactory(student=self.course_enrollment.user, + course_id=self.course_enrollment.course_id, + modified=self.date_2)] + self.progress_data = dict(points_possible=100, + points_earned=25, + sections_worked=4, + count=5) + + def check_response_metrics(self, obj, data): + """Helper method checks LCGM object's data + """ + assert obj.points_possible == data['points_possible'] + assert obj.points_earned == data['points_earned'] + assert obj.sections_worked == data['sections_worked'] + assert obj.sections_possible == data['count'] + + def create_lcgm(self, date_for): + """Helper to create an LCGM record with the given `date_for` + """ + return LearnerCourseGradeMetricsFactory( + course_id=str(self.course_enrollment.course_id), + user=self.course_enrollment.user, + date_for=date_for, + points_possible=self.progress_data['points_possible'], + points_earned=self.progress_data['points_earned'], + sections_worked=self.progress_data['sections_worked'], + sections_possible=self.progress_data['count'] + ) + + def test_needs_update_no_lcgm(self, monkeypatch): + """We have an SM record, but no LCGM record + + The function under test should return a new LCGM record + """ + assert not LearnerCourseGradeMetrics.objects.count() + monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course', + lambda val: SiteFactory()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data', + lambda val: self.progress_data) + metrics = collect_metrics_for_enrollment( + course_enrollment=self.course_enrollment, + ) + + self.check_response_metrics(metrics, self.progress_data) + assert LearnerCourseGradeMetrics.objects.count() == 1 + + def test_needs_update_has_lcgm(self, monkeypatch): + """We have an LCGM record, but it is not up to date with the SM + + The function under test should return a new LCGM + + TODO: Add test where we pass in `date_for` to the function under test + """ + lcgm = self.create_lcgm(date_for=self.date_1) + monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course', + lambda val: SiteFactory()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data', + lambda val: self.progress_data) + metrics = collect_metrics_for_enrollment( + course_enrollment=self.course_enrollment, + ) + + self.check_response_metrics(metrics, self.progress_data) + assert metrics != lcgm + + # This works because we pick dates in the past. Better is to fix this + # by either using freezegun or monkeypatching `_add_enrollment_metrics_record + assert metrics.date_for >= self.date_2.date() + + def test_does_not_need_update(self, monkeypatch): + """We have an LCGM record that is up to date with the SM + + The function under test should return the LCGM + + TODO: Add test where we pass in `date_for` to the function under test + """ + lcgm = self.create_lcgm(date_for=self.date_2.date()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course', + lambda val: SiteFactory()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data', + lambda val: self.progress_data) + metrics = collect_metrics_for_enrollment( + course_enrollment=self.course_enrollment, + ) + + self.check_response_metrics(metrics, self.progress_data) + assert metrics == lcgm + assert metrics.date_for == self.date_2.date() + + def test_no_update_no_lcgm_no_sm(self, monkeypatch): + """We have neither an LCGM nor an SM record + + The function under test should return None + """ + monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course', + lambda val: SiteFactory()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data', + lambda val: self.progress_data) + # Create a course enrollment for which we won't have student module records + metrics = collect_metrics_for_enrollment( + course_enrollment=CourseEnrollmentFactory(), + ) + assert not metrics + + def test_no_update_has_lcgm_no_sm(self, monkeypatch): + """We have an LCGM but not an SM record + + The function under test should return the existing LCGM + """ + monkeypatch.setattr('figures.pipeline.enrollment_metrics.get_site_for_course', + lambda val: SiteFactory()) + monkeypatch.setattr('figures.pipeline.enrollment_metrics._collect_progress_data', + lambda val: self.progress_data) + # Create a course enrollment for which we won't have student module records + ce = CourseEnrollmentFactory() + lcgm = LearnerCourseGradeMetricsFactory(course_id=ce.course_id, user=ce.user) + metrics = collect_metrics_for_enrollment(course_enrollment=ce) + assert metrics == lcgm + assert not StudentModule.objects.filter(course_id=ce.course_id) + + +@pytest.mark.django_db +class TestEnrollmentMetricsUpdateCheck(object): + """Tests the `_enrollment_metrics_needs_update` function performs as expected + + Quick test of the different states and expected results. Should be reworked + + We should be able to collapse these tests into one using parametrize. However, + we don't want conditionals and some tests call for testing dates and others + existance. So for now, a little less DRY with individual tests for each + conditional in the function + """ + @pytest.fixture(autouse=True) + def setup(self, db): + self.student_module = StudentModuleFactory() + + def test_existence_no_lcgm_no_sm_is_false(self): + """Test if there is not a LearnerCourseGradeMetrics record and not a + SiteModule, then it returns false + """ + assert not _enrollment_metrics_needs_update(None, None) + + def test_existence_no_lcgm_yes_sm_is_true(self): + assert _enrollment_metrics_needs_update(None, self.student_module) + + def test_existence_yes_lcgm_no_sm_is_false(self): + path = 'figures.pipeline.enrollment_metrics.log_error' + with mock.patch(path) as mock_log_error: + assert not _enrollment_metrics_needs_update(LearnerCourseGradeMetricsFactory(), None) + mock_log_error.assert_called() + + def test_dates_lcgm_is_current_is_false(self): + lcgm = LearnerCourseGradeMetricsFactory( + date_for=self.student_module.modified.date()) + assert not _enrollment_metrics_needs_update(lcgm, self.student_module) + + def test_dates_lcgm_is_future_is_false(self): + """ + Note: This should probably be an error state + """ + lcgm = LearnerCourseGradeMetricsFactory( + date_for=self.student_module.modified.date() + relativedelta(days=1)) + assert not _enrollment_metrics_needs_update(lcgm, self.student_module) + + def test_dates_lcgm_is_past_is_true(self): + lcgm = LearnerCourseGradeMetricsFactory( + date_for=self.student_module.modified.date() - relativedelta(days=1)) + assert _enrollment_metrics_needs_update(lcgm, self.student_module) + + +@pytest.mark.django_db +class TestAddEnrollmentMetricsRecord(object): + """Tests the `_add_enrollment_metrics_record` function + + """ + @pytest.fixture(autouse=True) + def setup(self, db): + self.site = SiteFactory() + self.course_enrollment = CourseEnrollmentFactory() + self.date_for = datetime.utcnow().replace(tzinfo=utc).date() + + def test_happy_path(self): + """Basic creation check + """ + progress_data = dict( + points_possible=10, + points_earned=5, + count=22, # sections possible + sections_worked=11 + ) + obj = _add_enrollment_metrics_record(site=self.site, + course_enrollment=self.course_enrollment, + progress_data=progress_data, + date_for=self.date_for) + assert obj + + +@pytest.mark.django_db +def test_collect_progress_data(db, monkeypatch): + """Tests the `_collect_progress_data` function + + The function under test instantiates a LearnerCourseGrade object, calls its + `progress` + + We can use the default values created from the mock `LearnerCourseGrades` + class + """ + student_module = StudentModuleFactory() + progress_data = _collect_progress_data(student_module) + + # Simply checking the keys + assert set(progress_data.keys()) == set(['count', + 'sections_worked', + 'points_possible', + 'points_earned']) From 57f93c5db2f0df0ce8628524f0b2f9f88579c4c8 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 12 May 2020 20:46:36 -0400 Subject: [PATCH 2/4] Added new functions to `figures.sites` to support query abstractions - Added `course_enrollments_for_course` - Added `student_modules_for_course_enrollment` This is to support the course progress pipeline performance improvement code restructuring. The purpose of adding these functions is to move forward with using `figures.sites` as the source of platform data. To note, there are a number of places in the code that do not follow this. As we rework Figures, we will refactor code to use `figures.sites` instead of directly querying the platform models. Note the function name change from the existing functions. We are moving away from using the `get_` prefix for the site functions. --- figures/sites.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/figures/sites.py b/figures/sites.py index 75ff78a1..dc88a473 100644 --- a/figures/sites.py +++ b/figures/sites.py @@ -39,6 +39,22 @@ class CourseNotInSiteError(CrossSiteResourceError): pass +class UnlinkedCourseError(Exception): + """Raise when we need to fail if we can't get the site for a course + + This will happen in multisite mode if the course is not linked to the site. + In Tahoe Hawthorn, we use the Appsembler fork of `edx-organizations` to map + courses to sites. For community and standalone deployments, we don't expect + courses to map to sites, so we just return the app instance's default site. + + * This is new to support enrollment metrics rework (May 2020) + * We need to evaluate if we want to make sites.get_site_for_course` to + raise this automatically. But first we need to make sure we have test + coverage to make sure we don't introduce new bugs + """ + pass + + def site_to_id(site): """ Helper to cast site or site id to id @@ -181,3 +197,19 @@ def get_student_modules_for_course_in_site(site, course_id): def get_student_modules_for_site(site): course_ids = get_course_keys_for_site(site) return StudentModule.objects.filter(course_id__in=course_ids) + + +def course_enrollments_for_course(course_id): + """Return a queryset of all `CourseEnrollment` records for a course + + Relies on the fact that course_ids are globally unique + """ + return CourseEnrollment.objects.filter(course_id=as_course_key(course_id)) + + +def student_modules_for_course_enrollment(ce): + """Return a queryset of all `StudentModule` records for a `CourseEnrollment`1 + + Relies on the fact that course_ids are globally unique + """ + return StudentModule.objects.filter(student=ce.user, course_id=ce.course_id) From 3d8218482e6df712817d390f1bfd09aaf903b09a Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Wed, 13 May 2020 00:36:54 -0400 Subject: [PATCH 3/4] Course daily metrics pipeline update to support enrollment metrics rework - Updated figures.pipeline.course_daily_metrics loader to use the new enrollment pipeline code - Renamed `get_average_progress` to `get_average_progress_deprecated` and retained if needed to help debug the new enrollment metrics - Updated tests to support these changes --- figures/pipeline/course_daily_metrics.py | 17 ++++++++++---- tests/pipeline/test_course_daily_metrics.py | 25 +++++++++++---------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index 9bc4e2a0..d301b691 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -28,6 +28,7 @@ from figures.models import CourseDailyMetrics, PipelineError from figures.pipeline.logger import log_error import figures.pipeline.loaders +from figures.pipeline.enrollment_metrics import bulk_calculate_course_progress_data from figures.serializers import CourseIndexSerializer from figures.compat import GeneratedCertificate import figures.sites @@ -81,7 +82,7 @@ def get_active_learner_ids_today(course_id, date_for): ).values_list('student__id', flat=True).distinct() -def get_average_progress(course_id, date_for, course_enrollments): +def get_average_progress_deprecated(course_id, date_for, course_enrollments): """Collects and aggregates raw course grades data """ progress = [] @@ -225,6 +226,9 @@ def extract(self, course_id, date_for=None, **_kwargs): average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) + TODO: refactor this class + Add lazy loading method to load course enrollments + - Create a method for each metric field """ # Update args if not assigned @@ -247,18 +251,23 @@ def extract(self, course_id, date_for=None, **_kwargs): # we can do a lambda for course_enrollments to get the count data['enrollment_count'] = course_enrollments.count() + active_learner_ids_today = get_active_learner_ids_today( course_id, date_for,) if active_learner_ids_today: active_learners_today = active_learner_ids_today.count() else: active_learners_today = 0 - data['active_learners_today'] = active_learners_today - data['average_progress'] = get_average_progress( - course_id, date_for, course_enrollments,) + + # Average progress + progress_data = bulk_calculate_course_progress_data(course_id=course_id, + date_for=date_for) + data['average_progress'] = progress_data['average_progress'] + data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) + data['num_learners_completed'] = get_num_learners_completed( course_id, date_for,) diff --git a/tests/pipeline/test_course_daily_metrics.py b/tests/pipeline/test_course_daily_metrics.py index d898c8f7..daa687d6 100644 --- a/tests/pipeline/test_course_daily_metrics.py +++ b/tests/pipeline/test_course_daily_metrics.py @@ -158,7 +158,7 @@ def test_get_active_learner_ids_today(self): course_id=self.course_overview.id, date_for=self.today) assert recs.count() == len(self.course_enrollments) - def test_get_average_progress(self): + def test_get_average_progress_deprecated(self): """ [John] This test needs work. The function it is testing needs work too for testability. We don't want to reproduce the function's behavior, we @@ -167,7 +167,7 @@ def test_get_average_progress(self): """ course_enrollments = CourseEnrollment.objects.filter( course_id=self.course_overview.id) - actual = pipeline_cdm.get_average_progress( + actual = pipeline_cdm.get_average_progress_deprecated( course_id=self.course_overview.id, date_for=self.today, course_enrollments=course_enrollments @@ -183,13 +183,13 @@ def test_get_average_progress(self): 'figures.metrics.LearnerCourseGrades.course_progress', side_effect=PermissionDenied('mock-failure') ) - def test_get_average_progress_error(self, mock_lcg): + def test_get_average_progress_deprecated_has_error(self, mock_lcg): assert PipelineError.objects.count() == 0 course_enrollments = CourseEnrollment.objects.filter( course_id=self.course_overview.id) - results = pipeline_cdm.get_average_progress( + results = pipeline_cdm.get_average_progress_deprecated( course_id=self.course_overview.id, date_for=self.today, course_enrollments=course_enrollments @@ -236,8 +236,12 @@ def setup(self, db): self.course_enrollments = [CourseEnrollmentFactory() for i in range(1, 5)] self.student_module = StudentModuleFactory() - def test_extract(self): + def test_extract(self, monkeypatch): course_id = self.course_enrollments[0].course_id + monkeypatch.setattr(figures.pipeline.course_daily_metrics, + 'bulk_calculate_course_progress_data', + lambda **_kwargs: dict(average_progress=0.5)) + results = pipeline_cdm.CourseDailyMetricsExtractor().extract(course_id) assert results @@ -298,14 +302,11 @@ def test_load_existing(self): @pytest.mark.parametrize('average_progress', [-1.0, -0.01, 1.01]) def test_load_invalid_data(self, monkeypatch, average_progress): - def mock_get_average_progress(course_id, date_for, course_enrollments): - return average_progress - course_id = self.course_enrollments[0].course_id - monkeypatch.setattr( - figures.pipeline.course_daily_metrics, - 'get_average_progress', - mock_get_average_progress) + monkeypatch.setattr(figures.pipeline.course_daily_metrics, + 'bulk_calculate_course_progress_data', + lambda **_kwargs: dict(average_progress=average_progress)) + with pytest.raises(ValidationError) as execinfo: assert CourseDailyMetrics.objects.count() == 0 cdm, created = pipeline_cdm.CourseDailyMetricsLoader(course_id).load() From d40dc7807f88be6888dced2e25c54796ca6fb0e3 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Sat, 16 May 2020 17:47:25 -0400 Subject: [PATCH 4/4] Addresses Pylint failure "Instance of 'list' has no 'order_by' member (no-member)" See: https://github.com/PyCQA/pylint-django/issues/165 Also updated documentation for enrollment metrics --- figures/pipeline/enrollment_metrics.py | 32 ++++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/figures/pipeline/enrollment_metrics.py b/figures/pipeline/enrollment_metrics.py index 1e2479cf..2cb72a3b 100644 --- a/figures/pipeline/enrollment_metrics.py +++ b/figures/pipeline/enrollment_metrics.py @@ -13,22 +13,24 @@ # Logic ``` -for each learner+course + +for each learner+course # See bulk_calculate_course_progress_data get newest lgcm record get newest sm record + # Check if update needed, see _enrollment_metrics_needs_update if not lcgm and not sm # Learner has not started course skip collection for this learner+course else if lcgm and sm # we have saved history so we can check to update - if lcgm.modified is older than sm.modified: - collect new course data + if lcgm.modified is older than sm.modified + collect new course data # See _collect_progress_data return new metrics object created else if not lcgm and sm # Learner started course after last collection action - collect new course data + collect new course data # See _collect_progress_data return new metrics object created else # lcgm and not sm @@ -39,10 +41,9 @@ return None ``` -# NOTE: Change 'lcgm' in this file to either 'enrollment' or 'lp' when an - abbreviation is acceptible. This is to get this module forward ready for - reworking `LearnerCourseGradeMetrics` - +# NOTE: We plan to change 'lcgm' in this file to either 'enrollment' or 'LP' + (for Learner Progress) when an abbreviation is acceptible. This is to get + this module forward ready for reworking `LearnerCourseGradeMetrics` """ from datetime import datetime @@ -148,12 +149,19 @@ def collect_metrics_for_enrollment(course_enrollment, date_for=None, **_kwargs): raise UnlinkedCourseError('No site found for course "{}"'.format( course_enrollment.course_id)) - most_recent_sm = student_modules_for_course_enrollment( - course_enrollment).order_by('modified').last() + # The following are two different ways to avoide the dreaded error + # "Instance of 'list' has no 'order_by' member (no-member)" + # See: https://github.com/PyCQA/pylint-django/issues/165 + student_modules = student_modules_for_course_enrollment(ce=course_enrollment) + if student_modules: + most_recent_sm = student_modules.latest('modified') + else: + most_recent_sm = None - most_recent_lcgm = LearnerCourseGradeMetrics.objects.filter( + lcgm = LearnerCourseGradeMetrics.objects.filter( user=course_enrollment.user, - course_id=str(course_enrollment.course_id)).order_by('date_for').last() + course_id=str(course_enrollment.course_id)) + most_recent_lcgm = lcgm.order_by('date_for').last() # pylint: disable=E1101 if _enrollment_metrics_needs_update(most_recent_lcgm, most_recent_sm): progress_data = _collect_progress_data(most_recent_sm)