Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New endpoint for enrollment metrics - provide data captured in LCGM #233

Merged
merged 7 commits into from
Jul 15, 2020
56 changes: 55 additions & 1 deletion devsite/devsite/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
from student.models import CourseAccessRole, CourseEnrollment, UserProfile

from figures.compat import RELEASE_LINE, GeneratedCertificate
from figures.models import CourseDailyMetrics, SiteDailyMetrics
from figures.models import (
CourseDailyMetrics,
LearnerCourseGradeMetrics,
SiteDailyMetrics,
)
from figures.helpers import as_course_key, as_datetime, days_from, prev_day
from figures.pipeline import course_daily_metrics as pipeline_cdm
from figures.pipeline import site_daily_metrics as pipeline_sdm
Expand All @@ -44,6 +48,10 @@ def get_site():
return Site.objects.first()


def today():
return datetime.datetime.utcnow().date()


def days_back_list(days_back):
end_date = prev_day(datetime.datetime.now())
start_date = days_from(end_date, abs(days_back) * -1)
Expand Down Expand Up @@ -85,6 +93,8 @@ def seed_course_overviews(data=None):
display_org_with_default=rec['org'],
number=rec['number'],
created=as_datetime(rec['created']).replace(tzinfo=utc),
start=as_datetime(rec['enrollment_start']).replace(tzinfo=utc),
end=as_datetime(rec['enrollment_end']).replace(tzinfo=utc),
enrollment_start=as_datetime(rec['enrollment_start']).replace(tzinfo=utc),
enrollment_end=as_datetime(rec['enrollment_end']).replace(tzinfo=utc),
)
Expand Down Expand Up @@ -265,13 +275,57 @@ def seed_site_daily_metrics(data=None):
date_for=dt, force_update=True)


def seed_lcgm_for_course(**_kwargs):
"""Quick hack to create a number of LCGM records
Improvement is to add a devsite model for "synthetic course policy". This
model specifies course info: points possible, sections possible, number of
learners or learer range, learner completion/progress curve
"""
date_for = _kwargs.get('date_for', datetime.datetime.utcnow().date())
site = _kwargs.get('site', get_site())
course_id = _kwargs.get('course_id')
points_possible = _kwargs.get('points_possible', 20)
points_earned = _kwargs.get('points_earned', 10)
sections_possible = _kwargs.get('sections_possible', 10)
sections_worked = _kwargs.get('sections_worked', 5)
for ce in CourseEnrollment.objects.filter(course_id=as_course_key(course_id)):
LearnerCourseGradeMetrics.objects.update_or_create(
site=site,
user=ce.user,
course_id=str(course_id),
date_for=date_for,
defaults=dict(
points_possible=points_possible,
points_earned=points_earned,
sections_possible=sections_possible,
sections_worked=sections_worked
)
)


def seed_lcgm_all():
for co in CourseOverview.objects.all():
print('Seeding LCGM for course {}'.format(str(co.id)))
for i, date_for in enumerate(days_back_list(10)):
seed_args = dict(
date_for=date_for,
course_id=str(co.id),
points_possible=100,
points_earned=i*5,
sections_possible=20,
sections_worked=i*2,
)
seed_lcgm_for_course(**seed_args)


def wipe():
clear_non_admin_users()
CourseEnrollment.objects.all().delete()
StudentModule.objects.all().delete()
CourseOverview.objects.all().delete()
CourseDailyMetrics.objects.all().delete()
SiteDailyMetrics.objects.all().delete()
LearnerCourseGradeMetrics.all().delete()


def seed_all():
Expand Down
93 changes: 89 additions & 4 deletions figures/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
See the following for breaking changes when upgrading to Django Filter 1.0:

https://django-filter.readthedocs.io/en/master/guide/migration.html#migrating-to-1-0

TODO: Rename classes so they eiher all end with "Filter" or "FilterSet" then
update the test class names in "tests/test_filters.py" to match.
"""

from django.contrib.auth import get_user_model
from django.contrib.sites.models import Site
from django.db.models import F

import django_filters

from opaque_keys.edx.keys import CourseKey
Expand All @@ -26,23 +31,33 @@
CourseDailyMetrics,
SiteDailyMetrics,
CourseMauMetrics,
LearnerCourseGradeMetrics,
SiteMauMetrics,
)


def char_method_filter(method):
"""
method is the method name string
Check if old style first

Pre v1:
"method" is the method name string
First check for old style (pre version 1 Django Filters)
"""
if hasattr(django_filters, 'MethodFilter'):
return django_filters.MethodFilter(action=method) # pylint: disable=no-member
else:
return django_filters.CharFilter(method=method)


def boolean_method_filter(method):
"""
"method" is the method name string
First check for old style (pre version 1 Django Filters)
"""
if hasattr(django_filters, 'MethodFilter'):
return django_filters.MethodFilter(action=method) # pylint: disable=no-member
else:
return django_filters.BooleanFilter(method=method)


class CourseOverviewFilter(django_filters.FilterSet):
'''Provides filtering for CourseOverview model objects

Expand Down Expand Up @@ -101,6 +116,76 @@ class Meta:
fields = ['course_id', 'user_id', 'is_active', ]


class EnrollmentMetricsFilter(CourseEnrollmentFilter):
"""Filter query params for enrollment metrics

Consider making 'user_ids' and 'course_ids' be mixins for `user` foreign key
and 'course_id' respectively. Perhaps a class decorator if there's some
unforseen issue with doing a mixin for each

Filters

"course_ids" filters on a set of comma delimited course id strings
"user_ids" filters on a set of comma delimited integer user ids
"only_completed" shows only completed records. Django Filter 1.0.4 appears
to only support capitalized "True" as the value in the query string

The "only_completed" filter is subject to change. We want to be able to
filter on: "hide completed", "show only completed", "show everything"
So we may go with a "choice field"

Use ``date_for`` for retrieving a specific date
Use ``date_0`` and ``date_1`` for retrieving values in a date range, inclusive
each of these can be used singly to get:
* ``date_0`` to get records greater than or equal
* ``date_1`` to get records less than or equal

TODO: Add 'is_active' filter - need to find matches in CourseEnrollment
"""
course_ids = char_method_filter(method='filter_course_ids')
user_ids = char_method_filter(method='filter_user_ids')
date = django_filters.DateFromToRangeFilter(name='date_for')
only_completed = boolean_method_filter(method='filter_only_completed')
exclude_completed = boolean_method_filter(method='filter_exclude_completed')

class Meta:
"""
Allow all field and related filtering except for "site"
"""
model = LearnerCourseGradeMetrics
exclude = ['site']

def filter_course_ids(self, queryset, name, value): # pylint: disable=unused-argument
course_ids = [cid.replace(' ', '+') for cid in value.split(',')]
return queryset.filter(course_id__in=course_ids)

def filter_user_ids(self, queryset, name, value): # pylint: disable=unused-argument
"""
"""
user_ids = [user_id for user_id in value.split(',') if user_id.isdigit()]
return queryset.filter(user_id__in=user_ids)

def filter_only_completed(self, queryset, name, value): # pylint: disable=unused-argument
"""
The "value" parameter is either `True` or `False`
"""
if value is True:
return queryset.filter(sections_possible__gt=0,
sections_worked=F('sections_possible'))
else:
return queryset

def filter_exclude_completed(self, queryset, name, value): # pylint: disable=unused-argument
"""
The "value" parameter is either `True` or `False`
"""
if value is True:
# This is a hack until we add `completed` field to LCGM
return queryset.filter(sections_worked__lt=F('sections_possible'))
else:
return queryset


class UserFilterSet(django_filters.FilterSet):
'''Provides filtering for User model objects

Expand Down
81 changes: 80 additions & 1 deletion figures/models.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Defines Figures models

TODO: Create a base "SiteModel" or a "SiteModelMixin"
"""

from datetime import date
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.db.models import F
from django.utils.encoding import python_2_unicode_compatible

from jsonfield import JSONField
Expand Down Expand Up @@ -186,7 +188,70 @@ class LearnerCourseGradeMetricsManager(models.Manager):
"""
def most_recent_for_learner_course(self, user, course_id):
queryset = self.filter(user=user, course_id=str(course_id))
return queryset.order_by('-date_for').first() # pylint: disable=no-member
if queryset:
return queryset.order_by('-date_for')[0] # pylint: disable=E1101
else:
return None

def most_recent_for_course(self, course_id):
statement = """ \
SELECT id, user_id, course_id, MAX(date_for)
FROM figures_learnercoursegrademetrics lcgm
WHERE course_id = {course_id} AND
GROUP BY user_id, course_id
"""
return self.raw(statement.format(course_id=str(course_id)))

def completed_for_site(self, site, **_kwargs):
"""Return course_id/user_id pairs that have completed
Initial filters on list of users, listr of course ids

User IDs can be filtered by passing `user_id=` list of user ids

Course IDs can be filtered by passing `course_ids=` list of course ids

Returns a distinct QuerySet dict list of values with keys
'course_id' and 'user_id'

We will consider adding a "completed" field to the model for faster
filtering, since we can index on the field. However, we need to evaluate
the additional storage need
"""
qs = self.filter(site=site,
sections_possible__gt=0,
sections_worked=F('sections_possible'))

# Build out filter. Note, we don't check if the var is iterable
# we let it fail of invalid values passed in
filter_args = dict()
user_ids = _kwargs.get('user_ids', None)
if user_ids:
filter_args['user_id__in'] = user_ids
course_ids = _kwargs.get('course_ids', None)
if course_ids:
# We do the string casting in case couse_ids are CourseKey instance
filter_args['course_id__in'] = [str(key) for key in course_ids]
if filter_args:
qs = qs.filter(**filter_args) # pylint: disable=E1101
return qs

def completed_ids_for_site(self, site, **_kwargs):
qs = self.completed_for_site(site, **_kwargs)
return qs.values('course_id', 'user_id').distinct()

def completed_raw_for_site(self, site, **_kwargs):
"""Experimental
"""
statement = """ \
SELECT id, user_id, course_id, MAX(date_for)
FROM figures_learnercoursegrademetrics lcgm
WHERE site_id = {site} AND
lcgm.sections_possible > 0 AND
lcgm.sections_worked = lcgm.sections_possible
GROUP BY user_id, course_id
ORDER BY user_id, course_id
"""
return self.raw(statement.format(site=site))


@python_2_unicode_compatible
Expand All @@ -213,6 +278,12 @@ class LearnerCourseGradeMetrics(TimeStampedModel):
But for now, using float, as I'm not entirely sure how many decimal places are
actually needed and edx-platform uses FloatField in its grades models


TODO: Add fields
`is_active` - get the 'is_active' value from the enrollment at the time
this record is created
`completed` - This lets us filter on a table column instead of calculating it
TODO: Add index on 'course_id', 'date_for', 'completed'
"""
site = models.ForeignKey(Site)
date_for = models.DateField()
Expand All @@ -227,6 +298,10 @@ class LearnerCourseGradeMetrics(TimeStampedModel):
objects = LearnerCourseGradeMetricsManager()

class Meta:
"""
Do we want to add 'site' to the `unique_together` set?
Open edX Course IDs are globally unique, so it is not required
"""
unique_together = ('user', 'course_id', 'date_for',)
ordering = ('date_for', 'user__username', 'course_id',)

Expand Down Expand Up @@ -260,6 +335,10 @@ def progress_details(self):
sections_possible=self.sections_possible,
)

@property
def completed(self):
return self.sections_worked > 0 and self.sections_worked == self.sections_possible


@python_2_unicode_compatible
class PipelineError(TimeStampedModel):
Expand Down
23 changes: 23 additions & 0 deletions figures/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,3 +787,26 @@ class CourseMauLiveMetricsSerializer(serializers.Serializer):
count = serializers.IntegerField()
course_id = serializers.CharField()
domain = serializers.CharField()


class EnrollmentMetricsSerializer(serializers.ModelSerializer):
"""Serializer for LearnerCourseGradeMetrics
"""
user = UserIndexSerializer(read_only=True)
progress_percent = serializers.DecimalField(max_digits=3,
decimal_places=2,
min_value=0.00,
max_value=1.00)

class Meta:
model = LearnerCourseGradeMetrics
editable = False
fields = ('id', 'user', 'course_id', 'date_for', 'completed',
'points_earned', 'points_possible',
'sections_worked', 'sections_possible',
'progress_percent')


class CourseCompletedSerializer(serializers.Serializer):
course_id = serializers.CharField()
user_id = serializers.IntegerField()
6 changes: 6 additions & 0 deletions figures/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@
views.UserIndexViewSet,
base_name='user-index')

# Experimental

router.register(
r'enrollment-metrics',
views.EnrollmentMetricsViewSet,
base_name='enrollment-metrics')

urlpatterns = [

Expand Down
Loading