Skip to content

Commit

Permalink
Enrollment model updates + tests + support code
Browse files Browse the repository at this point in the history
This commit contains changes to prepare for adding new daily metrics
workflow to speed up daily job execution

* Added new EnrollmentData model manager method to handle and
encapsulate EnrollmentData/LCGM update logic given a `CourseEnrollment`
record as parameter. Added unit tests for this new method.
* Adds `collect_elapsed` field to EnrollmentData and LCGM to track how long it takes to collect the progress data for that enrollment. The big cost here is currently to `CourseGradeFactory()`
* Added convenience method to StudentModuleFactory to create one form a
`CourseEnrollment` record instead of having to pass in two variables
(user and course_id) to construct a test StudentModule record to match
an already constructed CourseEnrollment record
  • Loading branch information
johnbaldwin committed Feb 24, 2022
1 parent 5a5ff31 commit c6693fe
Show file tree
Hide file tree
Showing 4 changed files with 316 additions and 5 deletions.
24 changes: 24 additions & 0 deletions figures/migrations/0016_add_collect_elapsed_to_ed_and_lcgm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('figures', '0015_add_enrollment_data_model'),
]

operations = [
migrations.AddField(
model_name='enrollmentdata',
name='collect_elapsed',
field=models.FloatField(null=True),
),
migrations.AddField(
model_name='learnercoursegrademetrics',
name='collect_elapsed',
field=models.FloatField(null=True),
),
]
82 changes: 77 additions & 5 deletions figures/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from __future__ import absolute_import
from datetime import date
import timeit
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.validators import MaxValueValidator, MinValueValidator
Expand All @@ -17,7 +18,7 @@
from model_utils.models import TimeStampedModel

from figures.compat import CourseEnrollment
from figures.helpers import as_course_key
from figures.helpers import as_course_key, utc_yesterday
from figures.progress import EnrollmentProgress


Expand Down Expand Up @@ -203,7 +204,7 @@ class EnrollmentDataManager(models.Manager):
EnrollmentData instances.
"""
def set_enrollment_data(self, site, user, course_id, course_enrollment=False):
def set_enrollment_data(self, site, user, course_id, course_enrollment=None):
"""
This is an expensive call as it needs to call CourseGradeFactory if
there is not already a LearnerCourseGradeMetrics record for the learner
Expand Down Expand Up @@ -259,6 +260,71 @@ def set_enrollment_data(self, site, user, course_id, course_enrollment=False):
defaults=defaults)
return obj, created

def update_metrics(self, site, course_enrollment, force_update=False):
"""
This is an expensive call as it needs to call CourseGradeFactory if
there is not already a LearnerCourseGradeMetrics record for the learner
"""
date_for = utc_yesterday()

# check if we already have a record for the date for EnrollmentData

# Alternately, we could use a try/except on a 'get' call, however, this
# would be much slower for a bunch of new enrollments

# Should only be one even if we don't include the site in the query
# because course_id should be globally unique
# If course id is ever NOT globally unique, then we need to add site
# to the query
ed_recs = EnrollmentData.objects.filter(
user_id=course_enrollment.user_id,
course_id=str(course_enrollment.course_id))

if not ed_recs or ed_recs[0].date_for < date_for or force_update:
# We do the update
start_time = timeit.default_timer()
# get the progress data
ep = EnrollmentProgress(user=course_enrollment.user,
course_id=str(course_enrollment.course_id))
defaults = dict(
date_for=date_for,
is_completed=ep.is_completed(),
progress_percent=ep.progress_percent(),
points_possible=ep.progress.get('points_possible', 0),
points_earned=ep.progress.get('points_earned', 0),
sections_possible=ep.progress.get('sections_possible', 0),
sections_worked=ep.progress.get('sections_worked', 0),
is_enrolled=course_enrollment.is_active,
date_enrolled=course_enrollment.created,
)
elapsed = timeit.default_timer() - start_time
defaults['collect_elapsed'] = elapsed

ed_rec, created = self.update_or_create(
site=site,
user=course_enrollment.user,
course_id=str(course_enrollment.course_id),
defaults=defaults)
# create a new LCGM record
# if it already exists for the day
LearnerCourseGradeMetrics.objects.update_or_create(
site=ed_rec.site,
user=ed_rec.user,
course_id=ed_rec.course_id,
date_for=date_for,
defaults=dict(
points_possible=ed_rec.points_possible,
points_earned=ed_rec.points_earned,
sections_worked=ed_rec.sections_worked,
sections_possible=ed_rec.sections_possible,
collect_elapsed=elapsed
)
)
return ed_rec, created
else:
return ed_recs[0], False


@python_2_unicode_compatible
class EnrollmentData(TimeStampedModel):
Expand Down Expand Up @@ -301,6 +367,9 @@ class EnrollmentData(TimeStampedModel):
sections_worked = models.IntegerField()
sections_possible = models.IntegerField()

# seconds it took to collect progress data
collect_elapsed = models.FloatField(null=True)

objects = EnrollmentDataManager()

class Meta:
Expand All @@ -324,7 +393,7 @@ def progress_details(self):


class LearnerCourseGradeMetricsManager(models.Manager):
"""Custom model manager for LearnerCourseGrades model
"""Custom model manager for LearnerCourseGradeMetrics model
"""
def latest_lcgm(self, user, course_id):
"""Gets the most recent record for the given user and course
Expand Down Expand Up @@ -414,9 +483,9 @@ class LearnerCourseGradeMetrics(TimeStampedModel):
Purpose is primarliy to improve performance for the front end. In addition,
data collected can be used for course progress over time
We're capturing data from figures.metrics.LearnerCourseGrades
We're capturing data from figures.progress.EnrollmentProgress
Note: We're probably going to move ``LearnerCourseGrades`` to figures.pipeline
Note: We're probably going to move ``EnrollmentProgress`` to figures.pipeline
since that class will only be needed by the pipeline
Even though this is for a course enrollment, we're mapping to the user
Expand Down Expand Up @@ -452,6 +521,9 @@ class LearnerCourseGradeMetrics(TimeStampedModel):
sections_worked = models.IntegerField()
sections_possible = models.IntegerField()

# seconds it took to collect progress data
collect_elapsed = models.FloatField(null=True)

objects = LearnerCourseGradeMetricsManager()

class Meta:
Expand Down
14 changes: 14 additions & 0 deletions tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,20 @@ class Meta:
2018,2,2, tzinfo=factory.compat.UTC))


@classmethod
def from_course_enrollment(cls, course_enrollment, **kwargs):
"""Contruct a StudentModule for the given CourseEnrollment
kwargs provides for additional optional parameters if you need to
override the default factory assignment
"""
kwargs.update({
'student': course_enrollment.user,
'course_id': course_enrollment.course_id,
})
return cls(**kwargs)


if OPENEDX_RELEASE == GINKGO:
class CourseEnrollmentFactory(DjangoModelFactory):
class Meta:
Expand Down
201 changes: 201 additions & 0 deletions tests/models/test_enrollment_data_update_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
"""
Basic tests for figures.models.EnrollmentData model
Test scenarios we need to help verify the data model
1. Learner doesn't have a CourseEnrollment (CE)
2. Learner just signed up. Has a CE, no LearnerCourseGradeMetric (LCGM)
3. Learner has CE and one LCGM
4. Learner has CE and multiple LCGM
5. Learner completed the course
6. Learner is no longer active in the course (Note: this is only handled in
our data by the fact that the EnrollmentData.is_active field would be False)
7+ The test scenrios we haven't identified yet
Tests fail in Ginkgo due to
"TypeError: 'course' is an invalid keyword argument for this function"
Which is interesting because other tests use CourseEnrollmentFactory(course=X)
and they do not fail in Ginkgo. For now, skipping test in Ginkgo
"""

import pytest

from mock import patch

from django.contrib.sites.models import Site

from figures.helpers import (
as_datetime, days_from, utc_yesterday
)
from figures.models import EnrollmentData, LearnerCourseGradeMetrics

from tests.factories import (
CourseEnrollmentFactory,
CourseOverviewFactory,
EnrollmentDataFactory,
LearnerCourseGradeMetricsFactory,
OrganizationFactory,
OrganizationCourseFactory,
StudentModuleFactory)
from tests.helpers import (
organizations_support_sites
)

if organizations_support_sites():
from tests.factories import UserOrganizationMappingFactory

def map_users_to_org(org, users):
"""Convenience method to simplify test code
"""
[UserOrganizationMappingFactory(user=user,
organization=org) for user in users]


# @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Breaks on CourseEnrollmentFactory')
@pytest.mark.django_db
class TestUpdateMetrics(object):
"""Test EnrollmentDataManager.update_metrics method
"""
@pytest.fixture(autouse=True)
def setup(self, db, settings):

# Set up data that's the same for standalone or multisite
self.date_for = utc_yesterday()
self.site = Site.objects.first()
self.courses = [CourseOverviewFactory(), CourseOverviewFactory()]

# Two for "our" course, one for another course in the same site
self.enrollments = [
CourseEnrollmentFactory(course_id=self.courses[0].id),
CourseEnrollmentFactory(course_id=self.courses[0].id),
CourseEnrollmentFactory(course_id=self.courses[1].id),
]

self.ce0_sm = StudentModuleFactory.from_course_enrollment(
self.enrollments[0],
created=as_datetime(self.date_for),
modified=as_datetime(self.date_for))

# Handle site mode specifices
if organizations_support_sites():
settings.FEATURES['FIGURES_IS_MULTISITE'] = True
self.org = OrganizationFactory(sites=[self.site])
for course in self.courses:
OrganizationCourseFactory(organization=self.org,
course_id=str(course.id))
map_users_to_org(self.org, [ce.user for ce in self.enrollments])

# For our tests, we focus on a single enrollment. We should not
# need to stand up other site data, but if we find we do need to,
# then here's the place to do it
else:
self.org = OrganizationFactory()

def setup_data_for_force_checks(self):
pass

def test_new_records_yesterday(self):
"""Test new enrollment with first activity in the course yesterday
"""
ce = self.enrollments[0]
the_enrollment = {
'site': self.site,
'user': self.enrollments[0].user,
'course_id': str(self.enrollments[0].course_id)
}
assert not EnrollmentData.objects.filter(**the_enrollment)

ed, created = EnrollmentData.objects.update_metrics(self.site, ce)

check_ed = EnrollmentData.objects.get(**the_enrollment)
lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id)
assert check_ed == ed
assert created
assert check_ed.date_for == self.date_for
assert lcgm.date_for == self.date_for

def test_edrec_exists_older_lcgm(self):
ce = self.enrollments[0]
older_date = days_from(self.date_for, -2)

# Create existing Figures records
EnrollmentDataFactory(site=self.site,
user=ce.user,
course_id=str(ce.course_id),
date_for=older_date)
older_lcgm = LearnerCourseGradeMetricsFactory(site=self.site,
user=ce.user,
course_id=str(ce.course_id),
date_for=older_date)

# Make sure that the LCGM we created is the most recent one
assert LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user,
ce.course_id) == older_lcgm
# assert lcgm1 == older_lcgm
# run our code under test
ed, created = EnrollmentData.objects.update_metrics(self.site, ce)
# verify our Figures records are updated
after_lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id)
after_ed = EnrollmentData.objects.get(site=self.site,
user=ce.user,
course_id=str(ce.course_id))
assert after_lcgm.date_for == self.date_for
assert after_ed.date_for == self.date_for

def test_exists_no_force(self):
ce = self.enrollments[0]
construct_kwargs = dict(
site=self.site,
user=ce.user,
course_id=str(ce.course_id),
date_for=self.date_for)
before_ed = EnrollmentDataFactory(**construct_kwargs)
LearnerCourseGradeMetricsFactory(**construct_kwargs)
with patch('figures.models.EnrollmentProgress._get_progress') as get_prog:
ed, created = EnrollmentData.objects.update_metrics(self.site, ce)
assert not get_prog.called
assert ed == before_ed

def test_force_update(self):
ce = self.enrollments[0]

# Create existing Figures records
# We only need to assign one progress value but we assign the possible
# and earned for one to make sure that the earned is not more than the
# possible. We arbitrarily choose points. We could have also chosen
# sections or assigned both
construct_kwargs = dict(
site=self.site,
user=ce.user,
course_id=str(ce.course_id),
date_for=self.date_for,
points_earned=5,
points_possible=10)
EnrollmentDataFactory(**construct_kwargs)
before_lcgm = LearnerCourseGradeMetricsFactory(**construct_kwargs)

fake_progress = dict(points_possible=50,
points_earned=25,
sections_possible=10,
sections_worked=5)

with patch('figures.models.EnrollmentProgress._get_progress', return_value=fake_progress):
ed, created = EnrollmentData.objects.update_metrics(self.site, ce, force_update=True)

# verify our Figures records are updated
lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm(ce.user, ce.course_id)
check_ed = EnrollmentData.objects.get(site=self.site,
user=ce.user,
course_id=str(ce.course_id))
assert check_ed == ed
assert not created
assert check_ed.date_for == self.date_for
assert check_ed.points_earned == fake_progress['points_earned']
assert lcgm.date_for == self.date_for
assert lcgm.id == before_lcgm.id
# We only need to check one of the progress fields to know it was updated
assert lcgm.points_earned == fake_progress['points_earned']
# import pdb; pdb.set_trace()

0 comments on commit c6693fe

Please sign in to comment.