Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance and test improvement for LearnerMetricsViewSet #260

Merged
merged 12 commits into from
Sep 28, 2020
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ coverage.xml
# Django stuff:
*.log
local_settings.py
devsite/devsite/.env

# Flask stuff:
instance/
Expand Down
8 changes: 8 additions & 0 deletions devsite/devsite/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Figures devsite environment settings example file

# Set to true to enable Figures multisite environment in devsite
FIGURES_IS_MULTISITE=true

# Set synthetic data seed options
SEED_DAYS_BACK=60
SEED_NUM_LEARNERS_PER_COURSE=25
67 changes: 61 additions & 6 deletions devsite/devsite/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import faker
import random

from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.sites.models import Site
from django.db.utils import IntegrityError
Expand All @@ -19,28 +20,45 @@
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from student.models import CourseAccessRole, CourseEnrollment, UserProfile

from organizations.models import Organization, OrganizationCourse

from figures.compat import RELEASE_LINE, GeneratedCertificate
from figures.models import (
CourseDailyMetrics,
LearnerCourseGradeMetrics,
SiteDailyMetrics,
)
from figures.helpers import as_course_key, as_datetime, days_from, prev_day
from figures.helpers import (
as_course_key,
as_datetime,
days_from,
prev_day,
is_multisite,
)
from figures.pipeline import course_daily_metrics as pipeline_cdm
from figures.pipeline import site_daily_metrics as pipeline_sdm

from devsite import cans

if is_multisite():
# First trying this without capturing 'ImportError'
from organizations.models import UserOrganizationMapping


FAKE = faker.Faker()
LAST_DAY = days_from(datetime.datetime.now(), -2).replace(tzinfo=utc)

DAYS_BACK = 180
NO_LEARNERS_PER_COURSE = 50

DAYS_BACK = settings.DEVSITE_SEED['DAYS_BACK']
NUM_LEARNERS_PER_COURSE = settings.DEVSITE_SEED['NUM_LEARNERS_PER_COURSE']

# Quick and dirty debuging
VERBOSE = False


FOO_ORG = 'FOO'


def get_site():
"""
In demo mode, we have just one site (for now)
Expand Down Expand Up @@ -82,7 +100,7 @@ def seed_course_overviews(data=None):
if not data:
data = cans.COURSE_OVERVIEW_DATA
# append with randomly generated course overviews to test pagination
new_courses = [generate_course_overview(i, org='FOO') for i in xrange(20)]
new_courses = [generate_course_overview(i, org=FOO_ORG) for i in xrange(20)]
data += new_courses

for rec in data:
Expand Down Expand Up @@ -166,7 +184,7 @@ def seed_course_enrollments():
TODO: make the number of users variable
"""
for co in CourseOverview.objects.all():
users = seed_users(cans.users.UserGenerator(NO_LEARNERS_PER_COURSE))
users = seed_users(cans.users.UserGenerator(NUM_LEARNERS_PER_COURSE))
seed_course_enrollments_for_course(co.id, users, DAYS_BACK)


Expand Down Expand Up @@ -318,14 +336,46 @@ def seed_lcgm_all():
seed_lcgm_for_course(**seed_args)


def hotwire_multisite():
"""
This is a quick and dirty implementation of a single site in multisite mode
"""
params = dict(
name='Foo Organization',
short_name='FOO',
description='Foo org description',
logo=None,
active=True,
)
org = Organization.objects.create(**params)
if is_multisite():
org.sites = [get_site()]
org.save()

for course in CourseOverview.objects.all():
OrganizationCourse.objects.create(course_id=str(course.id),
organization=org,
active=True)
for user in get_user_model().objects.all():
# For now, not setting is_amc_admin roles
UserOrganizationMapping.objects.create(user=user,
organization=org,
is_active=True)


def wipe():
print('Wiping synthetic data...')
clear_non_admin_users()
CourseEnrollment.objects.all().delete()
StudentModule.objects.all().delete()
CourseOverview.objects.all().delete()
CourseDailyMetrics.objects.all().delete()
SiteDailyMetrics.objects.all().delete()
LearnerCourseGradeMetrics.all().delete()
LearnerCourseGradeMetrics.objects.all().delete()
Organization.objects.all().delete()
OrganizationCourse.objects.all().delete()
if is_multisite():
UserOrganizationMapping.objects.all().delete()


def seed_all():
Expand All @@ -335,9 +385,14 @@ def seed_all():
seed_course_overviews()
print("seeding users...")
seed_users()

print("seeding course enrollments...")
seed_course_enrollments()

if is_multisite():
print("Hotwiring multisite...")
hotwire_multisite()

print("- skipping seeding seed_course_access_roles, broken")
# print("seeding course enrollments...")
# seed_course_access_roles()
Expand Down
22 changes: 20 additions & 2 deletions devsite/devsite/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,25 @@
import os
import sys

import environ

from figures.settings.lms_production import (
update_webpack_loader,
update_celerybeat_schedule,
)

OPENEDX_RELEASE = os.environ.get('OPENEDX_RELEASE', 'HAWTHORN').upper()


env = environ.Env(
FIGURES_IS_MULTISITE=(bool, False),
SEED_DAYS_BACK=(int, 60),
SEED_NUM_LEARNERS_PER_COURSE=(int, 25)
)

environ.Env.read_env()


if OPENEDX_RELEASE == 'GINKGO':
MOCKS_DIR = 'mocks/ginkgo'
else:
Expand Down Expand Up @@ -199,9 +211,10 @@
# Included here for completeness in having this settings file match behavior in
# the LMS settings
CELERYBEAT_SCHEDULE = {}
FEATURES = {}
FEATURES = {
'FIGURES_IS_MULTISITE': env('FIGURES_IS_MULTISITE')
}

FEATURES = {}

# The LMS defines ``ENV_TOKENS`` to load settings declared in `lms.env.json`
# We have an empty dict here to replicate behavior in the LMS
Expand All @@ -214,3 +227,8 @@
INTERNAL_IPS = [
'127.0.0.1'
]

DEVSITE_SEED = {
'DAYS_BACK': env('SEED_DAYS_BACK'),
'NUM_LEARNERS_PER_COURSE': env('SEED_NUM_LEARNERS_PER_COURSE')
}
1 change: 1 addition & 0 deletions devsite/requirements/ginkgo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ django-webpack-loader==0.4.1
# appsembler/gingko/master users 0.4.1

django-model-utils==2.3.1
django-environ==0.4.5

jsonfield==1.0.3 # Version used in Ginkgo. Hawthorn uses version 2.0.2

Expand Down
1 change: 1 addition & 0 deletions devsite/requirements/hawthorn_community.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ django-countries==4.6.1
django-filter==1.0.4
django-webpack-loader==0.6.0
django-model-utils==3.0.0
django-environ==0.4.5

jsonfield==2.0.2

Expand Down
1 change: 1 addition & 0 deletions devsite/requirements/hawthorn_multisite.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ django-countries==4.6.1
django-filter==1.0.4
django-webpack-loader==0.6.0
django-model-utils==3.0.0
django-environ==0.4.5

jsonfield==2.0.2

Expand Down
18 changes: 13 additions & 5 deletions figures/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,19 @@ class LearnerCourseGradeMetricsManager(models.Manager):
"""Custom model manager for LearnerCourseGrades model
"""
def most_recent_for_learner_course(self, user, course_id):
queryset = self.filter(user=user, course_id=str(course_id))
if queryset:
return queryset.order_by('-date_for')[0]
else:
return None
"""Gets the most recent record for the given user and course

We have this because we implement sparse data, meaning we only create
new records when data has changed. this means that for a given course,
learners may not have the same "most recent date"

This means we have to be careful of where we use this method in our
API as it costs a query per call. We will likely require restructuring
or augmenting our data if we need to bulk retrieve
"""
queryset = self.filter(user=user,
course_id=str(course_id)).order_by('-date_for')
return queryset[0] if queryset else None

def most_recent_for_course(self, course_id):
statement = """ \
Expand Down
39 changes: 33 additions & 6 deletions figures/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,25 @@ def get_progress_details(self, obj): # pylint: disable=unused-argument
return self._lcgm.progress_details if self._lcgm else None


class LearnerMetricsListSerializer(serializers.ListSerializer):
"""
See if we need to add to class: # pylint: disable=abstract-method
"""
def __init__(self, instance=None, data=empty, **kwargs):
"""instance is a queryset of users

TODO: Ensure that we only have our own site's course keys
"""
self.site = kwargs['context'].get('site')
self.course_keys = kwargs['context'].get('course_keys')

if not self.course_keys:
self.course_keys = figures.sites.get_course_keys_for_site(self.site)

super(LearnerMetricsListSerializer, self).__init__(
instance=instance, data=data, **kwargs)


class LearnerMetricsSerializer(serializers.ModelSerializer):
fullname = serializers.CharField(source='profile.name', default=None)
# enrollments = EnrollmentMetricsSerializerV2(source='courseenrollment_set',
Expand All @@ -847,16 +866,24 @@ class LearnerMetricsSerializer(serializers.ModelSerializer):

class Meta:
model = get_user_model()
# list_serializer_class = LearnerMetricsListSerializer
fields = ('id', 'username', 'email', 'fullname', 'is_active',
'date_joined', 'enrollments')
read_only_fields = fields

def get_enrollments(self, user):
site_enrollments = figures.sites.get_course_enrollments_for_site(
self.context.get('site'))
user_enrollments = site_enrollments.filter(user=user)
course_keys = self.context.get('course_keys')
if course_keys:
user_enrollments = user_enrollments.filter(course_id__in=course_keys)
"""
Rely on the caller (the view) to filter users and prefetch related
"""

user_enrollments = user.courseenrollment_set.all()

# Still in testing, and remarked out to get the first pass PR through:
# This is where the ListSerializer helps, by doing the database hit in
# one set of queries at the top, then using the results for each. But
# it still needs work

# user_enrollments = user.courseenrollment_set.filter(
# course_id__in=self.parent.course_keys)

return EnrollmentMetricsSerializerV2(user_enrollments, many=True).data
7 changes: 7 additions & 0 deletions figures/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ def get_organizations_for_site(site):


def get_course_keys_for_site(site):
"""

Developer note: We could improve this function with caching
Question is which is the most efficient way to know cache expiry

We may also be able to reduce the queries here to also improve performance
"""
if figures.helpers.is_multisite():
orgs = organizations.models.Organization.objects.filter(sites__in=[site])
org_courses = organizations.models.OrganizationCourse.objects.filter(
Expand Down
41 changes: 34 additions & 7 deletions figures/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,17 +427,44 @@ def query_param_course_keys(self):
cid_list = self.request.GET.getlist('course')
return [CourseKey.from_string(elem.replace(' ', '+')) for elem in cid_list]

def get_enrolled_users(self, site, course_keys):
"""Get users enrolled in the specific courses for the specified site

Args:
site: The site for which is being called
course_keys: list of Open edX course keys

Returns:
Django QuerySet of users enrolled in the specified courses

Note:
We should move this to `figures.sites`
"""
qs = figures.sites.get_users_for_site(site).filter(
courseenrollment__course_id__in=course_keys
).select_related('profile').prefetch_related('courseenrollment_set')
return qs

def get_queryset(self):
"""
This function has a hack to filter users until we can get the `filter_class`
working
If one or more course keys are given as query parameters, then
* Course key filtering mode is ued. Any invalid keys are filtered out
from the list
* If no valid course keys are found, then an empty list is returned from
this view
"""
site = django.contrib.sites.shortcuts.get_current_site(self.request)
queryset = figures.sites.get_users_for_site(site)
course_keys = self.query_param_course_keys()
if course_keys:
queryset = figures.sites.users_enrolled_in_courses(course_keys)
return queryset
course_keys = figures.sites.get_course_keys_for_site(site)
try:
param_course_keys = self.query_param_course_keys()
except InvalidKeyError:
raise NotFound()
if param_course_keys:
if not set(param_course_keys).issubset(set(course_keys)):
raise NotFound()
else:
course_keys = param_course_keys
return self.get_enrolled_users(site=site, course_keys=course_keys)

def get_serializer_context(self):
context = super(LearnerMetricsViewSet, self).get_serializer_context()
Expand Down
Loading