Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Figures 0.4 learner progress overview performance boost #301

Merged
merged 27 commits into from
Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
509e7db
0.4 LPO performance update - figures.compat
johnbaldwin Dec 9, 2020
7748be1
0.4 LPO performance update - new EnrollmentProgress class
johnbaldwin Dec 9, 2020
7d27ab9
Light refactoring of pipeline 'enrollment_metrics' module
johnbaldwin Dec 9, 2020
76716eb
0.4 LPO performance update - New EnrollmentData model
johnbaldwin Dec 9, 2020
96ada6d
0.4 LPO performance update - Updated views and serializers
johnbaldwin Dec 9, 2020
2d089ad
0.4 LPO performance update - figures.models - comment string
johnbaldwin Dec 9, 2020
f29c556
0.4 LPO performance update - backfill EnrollmentData first pass
johnbaldwin Dec 9, 2020
35bbc6a
0.4 LPO performance update - Frontend API call change
johnbaldwin Dec 9, 2020
ff5e375
0.4 LPO performance update - devsite flake8 fix
johnbaldwin Dec 10, 2020
1edae0b
0.4 LPO performance update - Add exception handling to compat
johnbaldwin Dec 10, 2020
1b6b09e
0.4 LPO performance update - Add backfill exception handling
johnbaldwin Dec 10, 2020
37c3990
0.4 LPO performance update - EnrollmentData update task and command
johnbaldwin Dec 10, 2020
a018f98
0.4 - performance update - Fix 0015 migration Ginkgo compat issue
johnbaldwin Dec 10, 2020
9f29ea6
0.4 LPO performance update - figures.compat suppress pylint error
johnbaldwin Dec 10, 2020
fb90299
0.4 LPO performance improvement figures.serializers fix pylint error
johnbaldwin Dec 10, 2020
5ec48d5
0.4 LPO performance update - Fixed enrollment data management command
johnbaldwin Dec 10, 2020
c2e6f69
0.4 LPO performance update - Skip Ginkgo test - CourseEnrollmentFactory
johnbaldwin Dec 10, 2020
8bf3bf9
Merge branch 'master' into john/0.4-lpo-performance-boost
johnbaldwin Dec 10, 2020
071e13a
Merge branch 'master' into john/0.4-lpo-performance-boost
johnbaldwin Dec 10, 2020
ac7fb39
Merge branch 'master' into john/0.4-lpo-performance-boost
johnbaldwin Dec 10, 2020
5ed449f
0.4 LPO performance update - fix command update_figures_enrollment_data
johnbaldwin Dec 10, 2020
4f63c45
Added missing 'sqlite' schema to devsite.settings
johnbaldwin Dec 10, 2020
f7bfa4a
0.4 LP performance update - pylint fix on figures.compat
johnbaldwin Dec 10, 2020
cc7ab5e
0.4 performance update - Fixed missing 'id' keyword arg figures.tasks
johnbaldwin Dec 10, 2020
a73fcbd
.gitignore update
johnbaldwin Dec 10, 2020
82a9f42
0.4 LPO progress update - fixed task typo
johnbaldwin Dec 11, 2020
65174d3
0.4 LPO performance update - Fixed dupe learners
johnbaldwin Dec 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions devsite/devsite/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@

from organizations.models import Organization, OrganizationCourse

from figures.backfill import backfill_enrollment_data_for_site
from figures.compat import RELEASE_LINE, GeneratedCertificate
from figures.models import (
CourseDailyMetrics,
EnrollmentData,
LearnerCourseGradeMetrics,
SiteDailyMetrics,
SiteMonthlyMetrics,
)
from figures.helpers import (
as_course_key,
Expand Down Expand Up @@ -407,20 +410,37 @@ def hotwire_multisite():
is_active=True)


def backfill_figures_ed():
results = dict()
for site in Site.objects.all():
print('Backfilling enrollment data for site "{}"'.format(site.domain))
site_ed = backfill_enrollment_data_for_site(site)
results[site.id] = site_ed
return results


def wipe():
print('Wiping synthetic data...')
clear_non_admin_users()

# edx-platform models
CourseEnrollment.objects.all().delete()
StudentModule.objects.all().delete()
CourseOverview.objects.all().delete()
CourseDailyMetrics.objects.all().delete()
SiteDailyMetrics.objects.all().delete()
LearnerCourseGradeMetrics.objects.all().delete()

# edx-organizations models
Organization.objects.all().delete()
OrganizationCourse.objects.all().delete()
if is_multisite():
UserOrganizationMapping.objects.all().delete()

# Figures models
CourseDailyMetrics.objects.all().delete()
EnrollmentData.objects.all().delete()
LearnerCourseGradeMetrics.objects.all().delete()
SiteDailyMetrics.objects.all().delete()
SiteMonthlyMetrics.objects.all().delete()


def seed_all():
print("\nseeding mock platform models")
Expand Down Expand Up @@ -452,3 +472,5 @@ def seed_all():
seed_course_daily_metrics()
print("seeding site daily metrics...")
seed_site_daily_metrics()
print('Backfilling Figures EnrollmentData models...')
backfill_figures_ed()
21 changes: 21 additions & 0 deletions figures/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ class SiteMonthlyMetricsAdmin(admin.ModelAdmin):
'month_for')


@admin.register(figures.models.EnrollmentData)
class EnrollmentDataAdmin(admin.ModelAdmin):
"""Defines the admin interface for the EnrollmentData model
"""
list_display = (
'id', 'site', 'user_link', 'course_id', 'date_for', 'date_enrolled',
'is_enrolled', 'is_completed', 'progress_percent', 'points_possible',
'points_earned', 'sections_worked', 'sections_possible'
)
read_only_fields = ('site', 'user', 'user_link', 'course_id')

def user_link(self, obj):
if obj.user:
return format_html(
'<a href="{}">{}</a>',
reverse("admin:auth_user_change", args=(obj.user.pk,)),
obj.user.email)
else:
return 'Missing user'


@admin.register(figures.models.LearnerCourseGradeMetrics)
class LearnerCourseGradeMetricsAdmin(admin.ModelAdmin):
"""Defines the admin interface for the LearnerCourseGradeMetrics model
Expand Down
36 changes: 35 additions & 1 deletion figures/backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@

from django.utils.timezone import utc

from figures.sites import get_student_modules_for_site
from figures.compat import CourseNotFound
from figures.sites import (
get_course_enrollments_for_site,
get_student_modules_for_site
)
from figures.pipeline.site_monthly_metrics import fill_month
from figures.models import EnrollmentData


def backfill_monthly_metrics_for_site(site, overwrite=False):
Expand All @@ -37,3 +42,32 @@ def backfill_monthly_metrics_for_site(site, overwrite=False):
backfilled.append(dict(obj=obj, created=created, dt=dt))

return backfilled


def backfill_enrollment_data_for_site(site):
"""Convenience function to fill EnrollmentData records

This backfills EnrollmentData records for existing CourseEnrollment
and LearnerCourseGradeMetrics records

```

Potential improvements: iterate by course id within site, have a function
specific to a course. more queries, but breaks up the work
"""
enrollment_data = []
errors = []
site_course_enrollments = get_course_enrollments_for_site(site)
for rec in site_course_enrollments:
try:
obj, created = EnrollmentData.objects.set_enrollment_data(
site=site,
user=rec.user,
course_id=rec.course_id)
enrollment_data.append((obj, created))
except CourseNotFound:
errors.append('CourseNotFound for course "{}". '
' CourseEnrollment ID='.format(str(rec.course_id,
rec.id)))

return dict(results=enrollment_data, errors=errors)
54 changes: 51 additions & 3 deletions figures/compat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''Figures compatability module
"""Figures compatability module

This module serves to provide a common access point to functionality that
differs from different named Open edX releases
Expand All @@ -7,16 +7,34 @@
value from openedx.core.release.RELEASE_LINE. This will be the release name as
a lowercase string, such as 'ginkgo' or 'hawthorn'

'''
# pylint: disable=ungrouped-imports,useless-suppression
TODO: Consider wrapping edx-platform's `get_course_by_id` in a function as it
raises a django.http.Http404 if the course if not found, which is weird.
We can then raise our own `CourseNotFound` which makes exception handling
in Figures clearer to handle with a specific exception. Our callers could
do the following:
```
try:
return get_course_by_id(id)
except CourseNotFound:
handle exception
```
"""
# pylint: disable=ungrouped-imports,useless-suppression,wrong-import-position

from __future__ import absolute_import
from django.http import Http404
from figures.helpers import as_course_key


class UnsuportedOpenedXRelease(Exception):
pass


class CourseNotFound(Exception):
"""Raised when edx-platform 'course' structure is not found
"""
pass

# Pre-Ginkgo does not define `RELEASE_LINE`
try:
from openedx.core.release import RELEASE_LINE
Expand Down Expand Up @@ -52,6 +70,12 @@ class UnsuportedOpenedXRelease(Exception):
from opaque_keys.edx.django.models import CourseKeyField # noqa pylint: disable=unused-import,import-error


# preemptive addition. Added it here to avoid adding to figures.models
# In fact, we should probably do a refactoring that makes all Figures import it
# from here
from student.models import CourseEnrollment # noqa pylint: disable=unused-import,import-error


def course_grade(learner, course):
"""
Compatibility function to retrieve course grades
Expand All @@ -64,6 +88,30 @@ def course_grade(learner, course):
return CourseGradeFactory().read(learner, course)


def course_grade_from_course_id(learner, course_id):
"""Get the edx-platform's course grade for this enrollment

IMPORTANT: Do not use in API calls as this is an expensive operation.
Only use in async or pipeline.

We handle the exception so that we return a specific `CourseNotFound`
instead of the non-specific `Http404`
edx-platform `get_course_by_id` function raises a generic `Http404` if it
cannot find a course in modulestore. We trap this and raise our own
`CourseNotFound` exception as it is more specific.

TODO: Consider optional kwarg param or Figures setting to log performance.
Bonus points: Make id a decorator
"""
try:
course = get_course_by_id(course_key=as_course_key(course_id))
except Http404:
raise CourseNotFound('{}'.format(str(course_id)))
course._field_data_cache = {} # pylint: disable=protected-access
course.set_grading_policy(course.grading_policy)
return course_grade(learner, course)


def chapter_grade_values(chapter_grades):
'''

Expand Down
59 changes: 59 additions & 0 deletions figures/management/commands/update_figures_enrollment_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""This Django management command updates Figures EnrollmentData records

Running this will trigger figures.tasks.update_enrollment_data for every site
unless the '--site' option is used. Then it will update just that site
"""
from __future__ import print_function
from __future__ import absolute_import
from textwrap import dedent
from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand
from figures.tasks import update_enrollment_data


def get_site(identifier):
"""Quick-n-dirty function to let the caller choose the site id or domain
Let the 'get' fail if record can't be found from the identifier
"""
try:
filter_arg = dict(pk=int(identifier))
except ValueError:
filter_arg = dict(domain=identifier)
return Site.objects.get(**filter_arg)


class Command(BaseCommand):
"""Populate Figures metrics models

Improvements
"""
help = dedent(__doc__).strip()

def add_arguments(self, parser):
parser.add_argument('--no-delay',
action='store_true',
default=False,
help='Disable the celery "delay" directive')
parser.add_argument('--site',
help='backfill a specific site. provide id or domain name')

def handle(self, *args, **options):
print('BEGIN: Update Figures EnrollmentData')

if options['site']:
sites = [get_site(options['site'])]
else:
# Would be great to be able to filter out dead sites
# Would be really great to be able to filter out dead sites
# Would be really Really great to be able to filter out dead sites
# Would be really Really REALLY great to be able to filter out dead sites
Comment on lines +46 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


sites = Site.objects.all()
for site in sites:
print('Updating EnrollmentData for site "{}"'.format(site.domain))
if options['no_delay']:
update_enrollment_data(site)
else:
update_enrollment_data.delay(site=site) # pragma: no cover

print('DONE: Update Figures EnrollmentData')
1 change: 0 additions & 1 deletion figures/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def period_str(month_tuple, fmt='%Y/%m'):
"""
return datetime.date(*month_tuple).strftime(fmt)


#
# Learner specific data/metrics
#
Expand Down
53 changes: 53 additions & 0 deletions figures/migrations/0015_add_enrollment_data_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.29 on 2020-12-09 14:17
from __future__ import unicode_literals

from django import VERSION as DJANGO_VERSION
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import model_utils.fields


class Migration(migrations.Migration):

if DJANGO_VERSION[0:2] == (1,8):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('sites', '0001_initial'),
('figures', '0014_add_indexes_to_daily_metrics'),
]
else: # Assuming 1.11+
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('sites', '0002_alter_domain_unique'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should work, no need to specify the latest migration from sites, so if you'd like it's possible to remove this if statement

Suggested change
('sites', '0002_alter_domain_unique'),
('sites', '0001_initial'),

('figures', '0014_add_indexes_to_daily_metrics'),
]

operations = [
migrations.CreateModel(
name='EnrollmentData',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')),
('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')),
('course_id', models.CharField(db_index=True, max_length=255)),
('date_for', models.DateField(db_index=True)),
('date_enrolled', models.DateField(db_index=True)),
('is_enrolled', models.BooleanField()),
('is_completed', models.BooleanField()),
('progress_percent', models.FloatField(default=0.0)),
('points_possible', models.FloatField()),
('points_earned', models.FloatField()),
('sections_worked', models.IntegerField()),
('sections_possible', models.IntegerField()),
('site', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='sites.Site')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
),
migrations.AlterUniqueTogether(
name='enrollmentdata',
unique_together=set([('site', 'user', 'course_id')]),
),
]
Loading