-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #429 from appsembler/john/pipeline-workflow-next
Figures pipeline performance improvement
- Loading branch information
Showing
11 changed files
with
635 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
"""Course specific module for Figures | ||
## This module defined a `Course` class for data retrieval | ||
Initialy created to do the following: | ||
1. Reduce duplication in Figures "pipeline" | ||
2. Build stronger course context to make Figures programming easier | ||
## Background summary | ||
A course id is globally unique as it has the identify of the organization and | ||
organizations are globally unique. | ||
## Design to think about - Enrollment class | ||
Build on Django's lazy eval for querysets to create also an `Enrollment` class | ||
that provides interfaces for `enrollment.date_for` and abstracts this bit of | ||
mess that enrollments and student modules do NOT associate and instead we need | ||
to query back and forth with `user_id` and `course_id`. | ||
""" | ||
from __future__ import absolute_import | ||
from django.db.models import Q | ||
from figures.compat import CourseEnrollment, StudentModule | ||
from figures.helpers import ( | ||
as_course_key, | ||
as_date, | ||
) | ||
from figures.sites import ( | ||
get_site_for_course, | ||
) | ||
|
||
|
||
class Course(object): | ||
"""Representation of a Course. | ||
The impetus for this class was dealing with querying for course enrollment | ||
and student module records for a specific course and for dates and date | ||
ranges for the course | ||
## Architecture goal | ||
**Start simple and don't build the kitchen sink into here right away just | ||
because this class exists** | ||
## Data under consideration to have this class handle | ||
* enrollments created on a date, before, after, between. However this would | ||
just be a convenience as the `.enrollments` property returns a queryset that | ||
can be filtered on `.created` | ||
""" | ||
def __init__(self, course_id): | ||
""" | ||
Initial version, we pass in a course ID and cast to a course key as an | ||
instance attribute. Later on, add `CourseLike` to abstract course identity | ||
so we can stop worrying about "Is it a string repretentation of a course or | ||
is it a CourseKey?" | ||
""" | ||
self.course_key = as_course_key(course_id) | ||
|
||
# Improvement: Consider doing lazy evaluation | ||
self.site = get_site_for_course(self.course_id) | ||
|
||
def __repr__(self): | ||
return '{}.{} <{}>'.format(self.__module__, | ||
self.__class__.__name__, | ||
str(self.course_key)) | ||
|
||
def __str__(self): | ||
return self.__repr__() | ||
|
||
@property | ||
def course_id(self): | ||
"""Returns string representation of the course id | ||
""" | ||
return str(self.course_key) | ||
|
||
@property | ||
def enrollments(self): | ||
"""Returns CourseEnrollment queryset for the course | ||
""" | ||
return CourseEnrollment.objects.filter(course_id=self.course_key) | ||
|
||
@property | ||
def student_modules(self): | ||
"""Returns StudentModule queryset for enrollments in the course | ||
""" | ||
return StudentModule.objects.filter(course_id=self.course_key) | ||
|
||
def student_modules_active_on_date(self, date_for): | ||
"""Returns StudentModule queryset active on the date | ||
Active is if there was a `created` or `modified` field for the given date | ||
NOTE: We need to do this instead of simplly `modified__date=date_for` | ||
because we still have to support Django 1.8/Ginkgo | ||
""" | ||
date_for = as_date(date_for) | ||
q_created = Q(created__year=date_for.year, | ||
created__month=date_for.month, | ||
created__day=date_for.day) | ||
q_modified = Q(modified__year=date_for.year, | ||
modified__month=date_for.month, | ||
modified__day=date_for.day) | ||
return self.student_modules.filter(q_created | q_modified) | ||
|
||
def enrollments_active_on_date(self, date_for): | ||
"""Return CourseEnrollment queryset for enrollments active on the date | ||
Looks for student modules modified on the specified date and returns | ||
matching CourseEnrollment records | ||
""" | ||
sm = self.student_modules_active_on_date(date_for) | ||
user_ids = sm.values('student_id').distinct() | ||
return CourseEnrollment.objects.filter(course_id=self.course_key, | ||
user_id__in=user_ids) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""This module updates Figures enrollment data and calculates aggregate progress | ||
* It updates `EnrollmentData` and `LearnerCourseGradeMetrics` records | ||
* It calculate course progress from EnrollmentData records | ||
This generates the same metrics as the original enrollment_metrics modules, | ||
but does it differently. | ||
## How it differs from the previous version | ||
This module improves on the existing enrollment metrics collection module, | ||
`figures.pipeline.enrollment_metrics` | ||
* It separates the activities to create and update Figures per-enrollment data | ||
collected | ||
* This separation lets Figures run metrics in distinct stages | ||
* First, collect per-enrollment data | ||
* Second, aggregate metrics based on collected data | ||
* This provides a workflow that is easier to resume if interrupted | ||
* This provides workflow that is simpler to debug | ||
* This simplifies and speeds up aggregate progress metrics, collapsing complex | ||
code into a single Django queryset aggregation | ||
* This update lays groundwork for further metrics improvements and enhancements | ||
such as metrics on subsets of learners in a course or progress of subsets of | ||
learners across courses | ||
# Developer Notes | ||
This module provides | ||
""" | ||
from django.db.models import Avg | ||
from figures.course import Course | ||
from figures.helpers import utc_yesterday | ||
from figures.models import EnrollmentData | ||
from figures.sites import UnlinkedCourseError | ||
|
||
|
||
def update_enrollment_data_for_course(course_id): | ||
"""Updates Figures per-enrollment data for enrollments in the course | ||
Checks for and creates new `LearnerCourseGradeMetrics` records and updates | ||
`EnrollmentData` records | ||
Return results are a list of the results returned by `update_enrollment_data` | ||
""" | ||
date_for = utc_yesterday() | ||
the_course = Course(course_id) | ||
if not the_course.site: | ||
raise UnlinkedCourseError('No site found for course "{}"'.format(course_id)) | ||
|
||
# Any updated student module records? if so, then get the unique enrollments | ||
# for each enrollment, check if LGCM is out of date or up to date | ||
active_enrollments = the_course.enrollments_active_on_date(date_for) | ||
return [EnrollmentData.objects.update_metrics(the_course.site, ce) | ||
for ce in active_enrollments] | ||
|
||
|
||
def calculate_course_progress(course_id): | ||
"""Return average progress percentage for all enrollments in the course | ||
""" | ||
results = EnrollmentData.objects.filter(course_id=str(course_id)).aggregate( | ||
average_progress=Avg('progress_percent')) | ||
|
||
# This is a bit of a hack. When we overhaul progress data, we should really | ||
# have None for progress if there's no data. But check how SQL AVG performs | ||
if results['average_progress'] is None: | ||
results['average_progress'] = 0.0 | ||
return results |
Oops, something went wrong.