From c6693fe12d7873dd94017c438c5ac32cd427f953 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 24 Feb 2022 13:38:02 -0500 Subject: [PATCH] Enrollment model updates + tests + support code This commit contains changes to prepare for adding new daily metrics workflow to speed up daily job execution * Added new EnrollmentData model manager method to handle and encapsulate EnrollmentData/LCGM update logic given a `CourseEnrollment` record as parameter. Added unit tests for this new method. * Adds `collect_elapsed` field to EnrollmentData and LCGM to track how long it takes to collect the progress data for that enrollment. The big cost here is currently to `CourseGradeFactory()` * Added convenience method to StudentModuleFactory to create one form a `CourseEnrollment` record instead of having to pass in two variables (user and course_id) to construct a test StudentModule record to match an already constructed CourseEnrollment record --- ...0016_add_collect_elapsed_to_ed_and_lcgm.py | 24 +++ figures/models.py | 82 ++++++- tests/factories.py | 14 ++ .../test_enrollment_data_update_metrics.py | 201 ++++++++++++++++++ 4 files changed, 316 insertions(+), 5 deletions(-) create mode 100644 figures/migrations/0016_add_collect_elapsed_to_ed_and_lcgm.py create mode 100644 tests/models/test_enrollment_data_update_metrics.py diff --git a/figures/migrations/0016_add_collect_elapsed_to_ed_and_lcgm.py b/figures/migrations/0016_add_collect_elapsed_to_ed_and_lcgm.py new file mode 100644 index 00000000..26de21a2 --- /dev/null +++ b/figures/migrations/0016_add_collect_elapsed_to_ed_and_lcgm.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('figures', '0015_add_enrollment_data_model'), + ] + + operations = [ + migrations.AddField( + model_name='enrollmentdata', + name='collect_elapsed', + field=models.FloatField(null=True), + ), + migrations.AddField( + model_name='learnercoursegrademetrics', + name='collect_elapsed', + field=models.FloatField(null=True), + ), + ] diff --git a/figures/models.py b/figures/models.py index 8ae1482e..81fe1175 100644 --- a/figures/models.py +++ b/figures/models.py @@ -5,6 +5,7 @@ from __future__ import absolute_import from datetime import date +import timeit from django.conf import settings from django.contrib.sites.models import Site from django.core.validators import MaxValueValidator, MinValueValidator @@ -17,7 +18,7 @@ from model_utils.models import TimeStampedModel from figures.compat import CourseEnrollment -from figures.helpers import as_course_key +from figures.helpers import as_course_key, utc_yesterday from figures.progress import EnrollmentProgress @@ -203,7 +204,7 @@ class EnrollmentDataManager(models.Manager): EnrollmentData instances. """ - def set_enrollment_data(self, site, user, course_id, course_enrollment=False): + def set_enrollment_data(self, site, user, course_id, course_enrollment=None): """ This is an expensive call as it needs to call CourseGradeFactory if there is not already a LearnerCourseGradeMetrics record for the learner @@ -259,6 +260,71 @@ def set_enrollment_data(self, site, user, course_id, course_enrollment=False): defaults=defaults) return obj, created + def update_metrics(self, site, course_enrollment, force_update=False): + """ + This is an expensive call as it needs to call CourseGradeFactory if + there is not already a LearnerCourseGradeMetrics record for the learner + + """ + date_for = utc_yesterday() + + # check if we already have a record for the date for EnrollmentData + + # Alternately, we could use a try/except on a 'get' call, however, this + # would be much slower for a bunch of new enrollments + + # Should only be one even if we don't include the site in the query + # because course_id should be globally unique + # If course id is ever NOT globally unique, then we need to add site + # to the query + ed_recs = EnrollmentData.objects.filter( + user_id=course_enrollment.user_id, + course_id=str(course_enrollment.course_id)) + + if not ed_recs or ed_recs[0].date_for < date_for or force_update: + # We do the update + start_time = timeit.default_timer() + # get the progress data + ep = EnrollmentProgress(user=course_enrollment.user, + course_id=str(course_enrollment.course_id)) + defaults = dict( + date_for=date_for, + is_completed=ep.is_completed(), + progress_percent=ep.progress_percent(), + points_possible=ep.progress.get('points_possible', 0), + points_earned=ep.progress.get('points_earned', 0), + sections_possible=ep.progress.get('sections_possible', 0), + sections_worked=ep.progress.get('sections_worked', 0), + is_enrolled=course_enrollment.is_active, + date_enrolled=course_enrollment.created, + ) + elapsed = timeit.default_timer() - start_time + defaults['collect_elapsed'] = elapsed + + ed_rec, created = self.update_or_create( + site=site, + user=course_enrollment.user, + course_id=str(course_enrollment.course_id), + defaults=defaults) + # create a new LCGM record + # if it already exists for the day + LearnerCourseGradeMetrics.objects.update_or_create( + site=ed_rec.site, + user=ed_rec.user, + course_id=ed_rec.course_id, + date_for=date_for, + defaults=dict( + points_possible=ed_rec.points_possible, + points_earned=ed_rec.points_earned, + sections_worked=ed_rec.sections_worked, + sections_possible=ed_rec.sections_possible, + collect_elapsed=elapsed + ) + ) + return ed_rec, created + else: + return ed_recs[0], False + @python_2_unicode_compatible class EnrollmentData(TimeStampedModel): @@ -301,6 +367,9 @@ class EnrollmentData(TimeStampedModel): sections_worked = models.IntegerField() sections_possible = models.IntegerField() + # seconds it took to collect progress data + collect_elapsed = models.FloatField(null=True) + objects = EnrollmentDataManager() class Meta: @@ -324,7 +393,7 @@ def progress_details(self): class LearnerCourseGradeMetricsManager(models.Manager): - """Custom model manager for LearnerCourseGrades model + """Custom model manager for LearnerCourseGradeMetrics model """ def latest_lcgm(self, user, course_id): """Gets the most recent record for the given user and course @@ -414,9 +483,9 @@ class LearnerCourseGradeMetrics(TimeStampedModel): Purpose is primarliy to improve performance for the front end. In addition, data collected can be used for course progress over time - We're capturing data from figures.metrics.LearnerCourseGrades + We're capturing data from figures.progress.EnrollmentProgress - Note: We're probably going to move ``LearnerCourseGrades`` to figures.pipeline + Note: We're probably going to move ``EnrollmentProgress`` to figures.pipeline since that class will only be needed by the pipeline Even though this is for a course enrollment, we're mapping to the user @@ -452,6 +521,9 @@ class LearnerCourseGradeMetrics(TimeStampedModel): sections_worked = models.IntegerField() sections_possible = models.IntegerField() + # seconds it took to collect progress data + collect_elapsed = models.FloatField(null=True) + objects = LearnerCourseGradeMetricsManager() class Meta: diff --git a/tests/factories.py b/tests/factories.py index 66a646ac..90fddda5 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -221,6 +221,20 @@ class Meta: 2018,2,2, tzinfo=factory.compat.UTC)) + @classmethod + def from_course_enrollment(cls, course_enrollment, **kwargs): + """Contruct a StudentModule for the given CourseEnrollment + + kwargs provides for additional optional parameters if you need to + override the default factory assignment + """ + kwargs.update({ + 'student': course_enrollment.user, + 'course_id': course_enrollment.course_id, + }) + return cls(**kwargs) + + if OPENEDX_RELEASE == GINKGO: class CourseEnrollmentFactory(DjangoModelFactory): class Meta: diff --git a/tests/models/test_enrollment_data_update_metrics.py b/tests/models/test_enrollment_data_update_metrics.py new file mode 100644 index 00000000..e4ff46d9 --- /dev/null +++ b/tests/models/test_enrollment_data_update_metrics.py @@ -0,0 +1,201 @@ +""" +Basic tests for figures.models.EnrollmentData model + +Test scenarios we need to help verify the data model + +1. Learner doesn't have a CourseEnrollment (CE) +2. Learner just signed up. Has a CE, no LearnerCourseGradeMetric (LCGM) +3. Learner has CE and one LCGM +4. Learner has CE and multiple LCGM +5. Learner completed the course +6. Learner is no longer active in the course (Note: this is only handled in + our data by the fact that the EnrollmentData.is_active field would be False) +7+ The test scenrios we haven't identified yet + + +Tests fail in Ginkgo due to + "TypeError: 'course' is an invalid keyword argument for this function" + +Which is interesting because other tests use CourseEnrollmentFactory(course=X) +and they do not fail in Ginkgo. For now, skipping test in Ginkgo +""" + +import pytest + +from mock import patch + +from django.contrib.sites.models import Site + +from figures.helpers import ( + as_datetime, days_from, utc_yesterday +) +from figures.models import EnrollmentData, LearnerCourseGradeMetrics + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + EnrollmentDataFactory, + LearnerCourseGradeMetricsFactory, + OrganizationFactory, + OrganizationCourseFactory, + StudentModuleFactory) +from tests.helpers import ( + organizations_support_sites +) + +if organizations_support_sites(): + from tests.factories import UserOrganizationMappingFactory + + def map_users_to_org(org, users): + """Convenience method to simplify test code + """ + [UserOrganizationMappingFactory(user=user, + organization=org) for user in users] + + +# @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Breaks on CourseEnrollmentFactory') +@pytest.mark.django_db +class TestUpdateMetrics(object): + """Test EnrollmentDataManager.update_metrics method + + """ + @pytest.fixture(autouse=True) + def setup(self, db, settings): + + # Set up data that's the same for standalone or multisite + self.date_for = utc_yesterday() + self.site = Site.objects.first() + self.courses = [CourseOverviewFactory(), CourseOverviewFactory()] + + # Two for "our" course, one for another course in the same site + self.enrollments = [ + CourseEnrollmentFactory(course_id=self.courses[0].id), + CourseEnrollmentFactory(course_id=self.courses[0].id), + CourseEnrollmentFactory(course_id=self.courses[1].id), + ] + + self.ce0_sm = StudentModuleFactory.from_course_enrollment( + self.enrollments[0], + created=as_datetime(self.date_for), + modified=as_datetime(self.date_for)) + + # Handle site mode specifices + if organizations_support_sites(): + settings.FEATURES['FIGURES_IS_MULTISITE'] = True + self.org = OrganizationFactory(sites=[self.site]) + for course in self.courses: + OrganizationCourseFactory(organization=self.org, + course_id=str(course.id)) + map_users_to_org(self.org, [ce.user for ce in self.enrollments]) + + # For our tests, we focus on a single enrollment. We should not + # need to stand up other site data, but if we find we do need to, + # then here's the place to do it + else: + self.org = OrganizationFactory() + + def setup_data_for_force_checks(self): + pass + + def test_new_records_yesterday(self): + """Test new enrollment with first activity in the course yesterday + """ + ce = self.enrollments[0] + the_enrollment = { + 'site': self.site, + 'user': self.enrollments[0].user, + 'course_id': str(self.enrollments[0].course_id) + } + assert not EnrollmentData.objects.filter(**the_enrollment) + + ed, created = EnrollmentData.objects.update_metrics(self.site, ce) + + check_ed = EnrollmentData.objects.get(**the_enrollment) + lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id) + assert check_ed == ed + assert created + assert check_ed.date_for == self.date_for + assert lcgm.date_for == self.date_for + + def test_edrec_exists_older_lcgm(self): + ce = self.enrollments[0] + older_date = days_from(self.date_for, -2) + + # Create existing Figures records + EnrollmentDataFactory(site=self.site, + user=ce.user, + course_id=str(ce.course_id), + date_for=older_date) + older_lcgm = LearnerCourseGradeMetricsFactory(site=self.site, + user=ce.user, + course_id=str(ce.course_id), + date_for=older_date) + + # Make sure that the LCGM we created is the most recent one + assert LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, + ce.course_id) == older_lcgm + # assert lcgm1 == older_lcgm + # run our code under test + ed, created = EnrollmentData.objects.update_metrics(self.site, ce) + # verify our Figures records are updated + after_lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id) + after_ed = EnrollmentData.objects.get(site=self.site, + user=ce.user, + course_id=str(ce.course_id)) + assert after_lcgm.date_for == self.date_for + assert after_ed.date_for == self.date_for + + def test_exists_no_force(self): + ce = self.enrollments[0] + construct_kwargs = dict( + site=self.site, + user=ce.user, + course_id=str(ce.course_id), + date_for=self.date_for) + before_ed = EnrollmentDataFactory(**construct_kwargs) + LearnerCourseGradeMetricsFactory(**construct_kwargs) + with patch('figures.models.EnrollmentProgress._get_progress') as get_prog: + ed, created = EnrollmentData.objects.update_metrics(self.site, ce) + assert not get_prog.called + assert ed == before_ed + + def test_force_update(self): + ce = self.enrollments[0] + + # Create existing Figures records + # We only need to assign one progress value but we assign the possible + # and earned for one to make sure that the earned is not more than the + # possible. We arbitrarily choose points. We could have also chosen + # sections or assigned both + construct_kwargs = dict( + site=self.site, + user=ce.user, + course_id=str(ce.course_id), + date_for=self.date_for, + points_earned=5, + points_possible=10) + EnrollmentDataFactory(**construct_kwargs) + before_lcgm = LearnerCourseGradeMetricsFactory(**construct_kwargs) + + fake_progress = dict(points_possible=50, + points_earned=25, + sections_possible=10, + sections_worked=5) + + with patch('figures.models.EnrollmentProgress._get_progress', return_value=fake_progress): + ed, created = EnrollmentData.objects.update_metrics(self.site, ce, force_update=True) + + # verify our Figures records are updated + lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id) + check_ed = EnrollmentData.objects.get(site=self.site, + user=ce.user, + course_id=str(ce.course_id)) + assert check_ed == ed + assert not created + assert check_ed.date_for == self.date_for + assert check_ed.points_earned == fake_progress['points_earned'] + assert lcgm.date_for == self.date_for + assert lcgm.id == before_lcgm.id + # We only need to check one of the progress fields to know it was updated + assert lcgm.points_earned == fake_progress['points_earned'] + # import pdb; pdb.set_trace()