diff --git a/.gitignore b/.gitignore index 3aaa9302..a372036f 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.pytest_cache # Translations *.mo @@ -53,7 +54,7 @@ coverage.xml # Django stuff: *.log local_settings.py -devsite/devsite/.env +devsite/.env # Flask stuff: instance/ @@ -126,4 +127,4 @@ sandbox /scratch.py /scratch.txt /notes -*.bak \ No newline at end of file +*.bak diff --git a/devsite/devsite/seed.py b/devsite/devsite/seed.py index cac6e5a8..0e5543ac 100644 --- a/devsite/devsite/seed.py +++ b/devsite/devsite/seed.py @@ -25,11 +25,14 @@ from organizations.models import Organization, OrganizationCourse +from figures.backfill import backfill_enrollment_data_for_site from figures.compat import RELEASE_LINE, GeneratedCertificate from figures.models import ( CourseDailyMetrics, + EnrollmentData, LearnerCourseGradeMetrics, SiteDailyMetrics, + SiteMonthlyMetrics, ) from figures.helpers import ( as_course_key, @@ -407,20 +410,37 @@ def hotwire_multisite(): is_active=True) +def backfill_figures_ed(): + results = dict() + for site in Site.objects.all(): + print('Backfilling enrollment data for site "{}"'.format(site.domain)) + site_ed = backfill_enrollment_data_for_site(site) + results[site.id] = site_ed + return results + + def wipe(): print('Wiping synthetic data...') clear_non_admin_users() + + # edx-platform models CourseEnrollment.objects.all().delete() StudentModule.objects.all().delete() CourseOverview.objects.all().delete() - CourseDailyMetrics.objects.all().delete() - SiteDailyMetrics.objects.all().delete() - LearnerCourseGradeMetrics.objects.all().delete() + + # edx-organizations models Organization.objects.all().delete() OrganizationCourse.objects.all().delete() if is_multisite(): UserOrganizationMapping.objects.all().delete() + # Figures models + CourseDailyMetrics.objects.all().delete() + EnrollmentData.objects.all().delete() + LearnerCourseGradeMetrics.objects.all().delete() + SiteDailyMetrics.objects.all().delete() + SiteMonthlyMetrics.objects.all().delete() + def seed_all(): print("\nseeding mock platform models") @@ -452,3 +472,5 @@ def seed_all(): seed_course_daily_metrics() print("seeding site daily metrics...") seed_site_daily_metrics() + print('Backfilling Figures EnrollmentData models...') + backfill_figures_ed() diff --git a/devsite/devsite/settings.py b/devsite/devsite/settings.py index 1f0e7a43..7c086e0e 100644 --- a/devsite/devsite/settings.py +++ b/devsite/devsite/settings.py @@ -169,9 +169,9 @@ # https://docs.djangoproject.com/en/1.8/ref/settings/#databases # https://github.com/joke2k/django-environ/tree/v0.4.5 -DEFAULT_SQLITE_DB_URL = os.path.join(DEVSITE_BASE_DIR, - 'figures-{release}-db.sqlite3'.format( - release=OPENEDX_RELEASE.lower())) +DEFAULT_SQLITE_DB_URL = 'sqlite:///{}'.format(os.path.join( + DEVSITE_BASE_DIR, + 'figures-{release}-db.sqlite3'.format(release=OPENEDX_RELEASE.lower()))) DATABASES = { 'default': env.db(default=DEFAULT_SQLITE_DB_URL) diff --git a/figures/admin.py b/figures/admin.py index d8f6c7e6..5bc21586 100644 --- a/figures/admin.py +++ b/figures/admin.py @@ -67,6 +67,27 @@ class SiteMonthlyMetricsAdmin(admin.ModelAdmin): 'month_for') +@admin.register(figures.models.EnrollmentData) +class EnrollmentDataAdmin(admin.ModelAdmin): + """Defines the admin interface for the EnrollmentData model + """ + list_display = ( + 'id', 'site', 'user_link', 'course_id', 'date_for', 'date_enrolled', + 'is_enrolled', 'is_completed', 'progress_percent', 'points_possible', + 'points_earned', 'sections_worked', 'sections_possible' + ) + read_only_fields = ('site', 'user', 'user_link', 'course_id') + + def user_link(self, obj): + if obj.user: + return format_html( + '<a href="{}">{}</a>', + reverse("admin:auth_user_change", args=(obj.user.pk,)), + obj.user.email) + else: + return 'Missing user' + + @admin.register(figures.models.LearnerCourseGradeMetrics) class LearnerCourseGradeMetricsAdmin(admin.ModelAdmin): """Defines the admin interface for the LearnerCourseGradeMetrics model diff --git a/figures/backfill.py b/figures/backfill.py index d918390e..438ad0b5 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -10,8 +10,13 @@ from django.utils.timezone import utc -from figures.sites import get_student_modules_for_site +from figures.compat import CourseNotFound +from figures.sites import ( + get_course_enrollments_for_site, + get_student_modules_for_site +) from figures.pipeline.site_monthly_metrics import fill_month +from figures.models import EnrollmentData def backfill_monthly_metrics_for_site(site, overwrite=False): @@ -37,3 +42,32 @@ def backfill_monthly_metrics_for_site(site, overwrite=False): backfilled.append(dict(obj=obj, created=created, dt=dt)) return backfilled + + +def backfill_enrollment_data_for_site(site): + """Convenience function to fill EnrollmentData records + + This backfills EnrollmentData records for existing CourseEnrollment + and LearnerCourseGradeMetrics records + + ``` + + Potential improvements: iterate by course id within site, have a function + specific to a course. more queries, but breaks up the work + """ + enrollment_data = [] + errors = [] + site_course_enrollments = get_course_enrollments_for_site(site) + for rec in site_course_enrollments: + try: + obj, created = EnrollmentData.objects.set_enrollment_data( + site=site, + user=rec.user, + course_id=rec.course_id) + enrollment_data.append((obj, created)) + except CourseNotFound: + errors.append('CourseNotFound for course "{}". ' + ' CourseEnrollment ID='.format(str(rec.course_id, + rec.id))) + + return dict(results=enrollment_data, errors=errors) diff --git a/figures/compat.py b/figures/compat.py index 1bcb29fe..50edafcf 100644 --- a/figures/compat.py +++ b/figures/compat.py @@ -1,4 +1,4 @@ -'''Figures compatability module +"""Figures compatability module This module serves to provide a common access point to functionality that differs from different named Open edX releases @@ -7,16 +7,35 @@ value from openedx.core.release.RELEASE_LINE. This will be the release name as a lowercase string, such as 'ginkgo' or 'hawthorn' -''' -# pylint: disable=ungrouped-imports,useless-suppression +TODO: Consider wrapping edx-platform's `get_course_by_id` in a function as it + raises a django.http.Http404 if the course if not found, which is weird. + We can then raise our own `CourseNotFound` which makes exception handling + in Figures clearer to handle with a specific exception. Our callers could + do the following: + ``` + try: + return get_course_by_id(id) + except CourseNotFound: + handle exception + ``` +""" +# pylint: disable=ungrouped-imports,useless-suppression,wrong-import-position from __future__ import absolute_import +from django.http import Http404 +from figures.helpers import as_course_key class UnsuportedOpenedXRelease(Exception): pass +class CourseNotFound(Exception): + """Raised when edx-platform 'course' structure is not found + """ + pass + + # Pre-Ginkgo does not define `RELEASE_LINE` try: from openedx.core.release import RELEASE_LINE @@ -52,6 +71,12 @@ class UnsuportedOpenedXRelease(Exception): from opaque_keys.edx.django.models import CourseKeyField # noqa pylint: disable=unused-import,import-error +# preemptive addition. Added it here to avoid adding to figures.models +# In fact, we should probably do a refactoring that makes all Figures import it +# from here +from student.models import CourseEnrollment # noqa pylint: disable=unused-import,import-error + + def course_grade(learner, course): """ Compatibility function to retrieve course grades @@ -64,6 +89,30 @@ def course_grade(learner, course): return CourseGradeFactory().read(learner, course) +def course_grade_from_course_id(learner, course_id): + """Get the edx-platform's course grade for this enrollment + + IMPORTANT: Do not use in API calls as this is an expensive operation. + Only use in async or pipeline. + + We handle the exception so that we return a specific `CourseNotFound` + instead of the non-specific `Http404` + edx-platform `get_course_by_id` function raises a generic `Http404` if it + cannot find a course in modulestore. We trap this and raise our own + `CourseNotFound` exception as it is more specific. + + TODO: Consider optional kwarg param or Figures setting to log performance. + Bonus points: Make id a decorator + """ + try: + course = get_course_by_id(course_key=as_course_key(course_id)) + except Http404: + raise CourseNotFound('{}'.format(str(course_id))) + course._field_data_cache = {} # pylint: disable=protected-access + course.set_grading_policy(course.grading_policy) + return course_grade(learner, course) + + def chapter_grade_values(chapter_grades): ''' diff --git a/figures/management/commands/update_figures_enrollment_data.py b/figures/management/commands/update_figures_enrollment_data.py new file mode 100644 index 00000000..78de102f --- /dev/null +++ b/figures/management/commands/update_figures_enrollment_data.py @@ -0,0 +1,59 @@ +"""This Django management command updates Figures EnrollmentData records + +Running this will trigger figures.tasks.update_enrollment_data for every site +unless the '--site' option is used. Then it will update just that site +""" +from __future__ import print_function +from __future__ import absolute_import +from textwrap import dedent +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand +from figures.tasks import update_enrollment_data + + +def get_site(identifier): + """Quick-n-dirty function to let the caller choose the site id or domain + Let the 'get' fail if record can't be found from the identifier + """ + try: + filter_arg = dict(pk=int(identifier)) + except ValueError: + filter_arg = dict(domain=identifier) + return Site.objects.get(**filter_arg) + + +class Command(BaseCommand): + """Populate Figures metrics models + + Improvements + """ + help = dedent(__doc__).strip() + + def add_arguments(self, parser): + parser.add_argument('--no-delay', + action='store_true', + default=False, + help='Disable the celery "delay" directive') + parser.add_argument('--site', + help='backfill a specific site. provide id or domain name') + + def handle(self, *args, **options): + print('BEGIN: Update Figures EnrollmentData') + + if options['site']: + sites = [get_site(options['site'])] + else: + # Would be great to be able to filter out dead sites + # Would be really great to be able to filter out dead sites + # Would be really Really great to be able to filter out dead sites + # Would be really Really REALLY great to be able to filter out dead sites + + sites = Site.objects.all() + for site in sites: + print('Updating EnrollmentData for site "{}"'.format(site.domain)) + if options['no_delay']: + update_enrollment_data(site_id=site.id) + else: + update_enrollment_data.delay(site_id=site.id) # pragma: no cover + + print('DONE: Update Figures EnrollmentData') diff --git a/figures/metrics.py b/figures/metrics.py index a94e444a..bd1a7778 100644 --- a/figures/metrics.py +++ b/figures/metrics.py @@ -64,7 +64,6 @@ def period_str(month_tuple, fmt='%Y/%m'): """ return datetime.date(*month_tuple).strftime(fmt) - # # Learner specific data/metrics # diff --git a/figures/migrations/0015_add_enrollment_data_model.py b/figures/migrations/0015_add_enrollment_data_model.py new file mode 100644 index 00000000..75f7e68c --- /dev/null +++ b/figures/migrations/0015_add_enrollment_data_model.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2020-12-09 14:17 +from __future__ import unicode_literals + +from django import VERSION as DJANGO_VERSION +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import model_utils.fields + + +class Migration(migrations.Migration): + + if DJANGO_VERSION[0:2] == (1,8): + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('sites', '0001_initial'), + ('figures', '0014_add_indexes_to_daily_metrics'), + ] + else: # Assuming 1.11+ + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('sites', '0002_alter_domain_unique'), + ('figures', '0014_add_indexes_to_daily_metrics'), + ] + + operations = [ + migrations.CreateModel( + name='EnrollmentData', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', model_utils.fields.AutoCreatedField(default=django.utils.timezone.now, editable=False, verbose_name='created')), + ('modified', model_utils.fields.AutoLastModifiedField(default=django.utils.timezone.now, editable=False, verbose_name='modified')), + ('course_id', models.CharField(db_index=True, max_length=255)), + ('date_for', models.DateField(db_index=True)), + ('date_enrolled', models.DateField(db_index=True)), + ('is_enrolled', models.BooleanField()), + ('is_completed', models.BooleanField()), + ('progress_percent', models.FloatField(default=0.0)), + ('points_possible', models.FloatField()), + ('points_earned', models.FloatField()), + ('sections_worked', models.IntegerField()), + ('sections_possible', models.IntegerField()), + ('site', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='sites.Site')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + migrations.AlterUniqueTogether( + name='enrollmentdata', + unique_together=set([('site', 'user', 'course_id')]), + ), + ] diff --git a/figures/models.py b/figures/models.py index 1d874f4e..6012aeb6 100644 --- a/figures/models.py +++ b/figures/models.py @@ -16,6 +16,10 @@ from model_utils.models import TimeStampedModel +from figures.compat import CourseEnrollment +from figures.helpers import as_course_key +from figures.progress import EnrollmentProgress + def default_site(): """ @@ -190,10 +194,137 @@ def add_month(cls, site, year, month, active_user_count, overwrite=False): defaults=defaults) +class EnrollmentDataManager(models.Manager): + """Custom model manager for EnrollmentData + + Initial purpose is to handle the logic of creating and updating + EnrollmentData instances. + + """ + def set_enrollment_data(self, site, user, course_id, course_enrollment=False): + """ + This is an expensive call as it needs to call CourseGradeFactory if + there is not already a LearnerCourseGradeMetrics record for the learner + """ + if not course_enrollment: + # For now, let it raise a `CourseEnrollment.DoesNotExist + # Later on we can add a try block and raise out own custom + # exception + course_enrollment = CourseEnrollment.objects.get( + user=user, + course_id=as_course_key(course_id)) + + defaults = dict( + is_enrolled=course_enrollment.is_active, + date_enrolled=course_enrollment.created, + ) + + # Note: doesn't use site for filtering + lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm( + user=user, + course_id=str(course_id)) + if lcgm: + # do we already have an enrollment data record + # We may change this to use + progress_data = dict( + date_for=lcgm.date_for, + is_completed=lcgm.completed, + progress_percent=lcgm.progress_percent, + points_possible=lcgm.points_possible, + points_earned=lcgm.points_earned, + sections_possible=lcgm.sections_possible, + sections_worked=lcgm.sections_worked + ) + else: + ep = EnrollmentProgress(user=user, course_id=course_id) + # TODO: If we get progress worked and there is no LCGM, then we have + # a bug OR there was progress after the last daily metrics collection + progress_data = dict( + date_for=date.today(), + is_completed=ep.is_completed(), + progress_percent=ep.progress_percent(), + points_possible=ep.progress.get('points_possible', 0), + points_earned=ep.progress.get('points_earned', 0), + sections_possible=ep.progress.get('sections_possible', 0), + sections_worked=ep.progress.get('sections_worked', 0) + ) + defaults.update(progress_data) + + obj, created = self.update_or_create( + site=site, + user=user, + course_id=str(course_id), + defaults=defaults) + return obj, created + + +@python_2_unicode_compatible +class EnrollmentData(TimeStampedModel): + """Tracks most recent enrollment data for an enrollment + + An enrollment is a unique site + user + course + + This model stores basic enrollment information and latest progress + The purpose of this class is for query performance for the 'learner-metrics' + API endpoint which is needed for the learner progress overview page. + + This is an intial take on caching current enrollment data with the dual + purposes of speeding up the learner-metrics endpoint needed for the LPO page + as well as doing so in clear maintainable code. + + At some point in the future, we'll probably have to construct a key-value + high performance storage, but for now, we'd like to see how far we can get + with the basic Django architecture. Plus this simplifies running Figures on + small Open edX LMS deployments + """ + site = models.ForeignKey(Site, on_delete=models.CASCADE) + user = models.ForeignKey(settings.AUTH_USER_MODEL, + on_delete=models.CASCADE) + course_id = models.CharField(max_length=255, db_index=True) + date_for = models.DateField(db_index=True) + + # Date enrolled is from CourseEnrollment.created + date_enrolled = models.DateField(db_index=True) + + # From CourseEnrollment.is_active + is_enrolled = models.BooleanField() + + # From LCGM.completed property methods + is_completed = models.BooleanField() + progress_percent = models.FloatField(default=0.00) + + # from LCGM fields + points_possible = models.FloatField() + points_earned = models.FloatField() + sections_worked = models.IntegerField() + sections_possible = models.IntegerField() + + objects = EnrollmentDataManager() + + class Meta: + unique_together = ('site', 'user', 'course_id') + + def __str__(self): + return '{} {} {} {}'.format( + self.id, self.site.domain, self.user.email, self.course_id) + + @property + def progress_details(self): + """This method gets the progress details + This method is a temporary fix until the serializers are updated. + """ + return dict( + points_possible=self.points_possible, + points_earned=self.points_earned, + sections_worked=self.sections_worked, + sections_possible=self.sections_possible, + ) + + class LearnerCourseGradeMetricsManager(models.Manager): """Custom model manager for LearnerCourseGrades model """ - def most_recent_for_learner_course(self, user, course_id): + def latest_lcgm(self, user, course_id): """Gets the most recent record for the given user and course We have this because we implement sparse data, meaning we only create @@ -203,6 +334,9 @@ def most_recent_for_learner_course(self, user, course_id): This means we have to be careful of where we use this method in our API as it costs a query per call. We will likely require restructuring or augmenting our data if we need to bulk retrieve + + TODO: Consider if we want to add 'site' as a parameter and update the + uniqueness constraint to be: site, course_id, user, date_for """ queryset = self.filter(user=user, course_id=str(course_id)).order_by('-date_for') @@ -285,9 +419,9 @@ class LearnerCourseGradeMetrics(TimeStampedModel): Even though this is for a course enrollment, we're mapping to the user and providing course id instead of an FK relationship to the courseenrollment - Reason is we're likely more interested in the learner and the course than the specific - course enrollment. Also means that the Figures models do not have a hard - dependency on models in edx-platform + Reason is we're likely more interested in the learner and the course than + the specific course enrollment. Also means that the Figures models do not + have a hard dependency on models in edx-platform Considered using DecimalField for points as we can control the decimal places But for now, using float, as I'm not entirely sure how many decimal places are @@ -297,7 +431,8 @@ class LearnerCourseGradeMetrics(TimeStampedModel): TODO: Add fields `is_active` - get the 'is_active' value from the enrollment at the time this record is created - `completed` - This lets us filter on a table column instead of calculating it + `completed` - This lets us filter on a table column instead of + calculating it TODO: Add index on 'course_id', 'date_for', 'completed' """ # TODO: Review the most appropriate on_delete behaviour @@ -357,7 +492,8 @@ def progress_details(self): @property def completed(self): - return self.sections_worked > 0 and self.sections_worked == self.sections_possible + return (self.sections_worked > 0 and + self.sections_worked == self.sections_possible) @python_2_unicode_compatible @@ -388,7 +524,9 @@ class PipelineError(TimeStampedModel): blank=True, null=True, on_delete=models.CASCADE) - site = models.ForeignKey(Site, blank=True, null=True, on_delete=models.CASCADE) + site = models.ForeignKey(Site, blank=True, + null=True, + on_delete=models.CASCADE) class Meta: ordering = ['-created'] @@ -422,7 +560,9 @@ def latest_for_site_month(self, site, year, month): """Return the latest record for the given site, month, and year If no record found, returns 'None' """ - queryset = self.filter(site=site, date_for__year=year, date_for__month=month) + queryset = self.filter(site=site, + date_for__year=year, + date_for__month=month) return queryset.order_by('-modified').first() diff --git a/figures/pipeline/enrollment_metrics.py b/figures/pipeline/enrollment_metrics.py index b1b972a7..bec00a0e 100644 --- a/figures/pipeline/enrollment_metrics.py +++ b/figures/pipeline/enrollment_metrics.py @@ -120,8 +120,10 @@ def calculate_average_progress(progress_percentages): TODO: How do we want to handle malformed data? """ if progress_percentages: - average_progress = float(sum(progress_percentages)) / float(len(progress_percentages)) - average_progress = float(Decimal(average_progress).quantize(Decimal('.00'))) + average_progress = float( + sum(progress_percentages)) / float(len(progress_percentages)) + average_progress = float( + Decimal(average_progress).quantize(Decimal('.00'))) else: average_progress = 0.0 return average_progress @@ -171,7 +173,7 @@ def collect_metrics_for_enrollment(site, course_enrollment, course_sm, date_for= if _enrollment_metrics_needs_update(most_recent_lcgm, most_recent_sm): progress_data = _collect_progress_data(most_recent_sm) - metrics = _add_enrollment_metrics_record(site=site, + metrics = _new_enrollment_metrics_record(site=site, course_enrollment=course_enrollment, progress_data=progress_data, date_for=date_for) @@ -259,7 +261,7 @@ def rec_matches_user_and_course(lcgm, sm): return needs_update -def _add_enrollment_metrics_record(site, course_enrollment, progress_data, date_for): +def _new_enrollment_metrics_record(site, course_enrollment, progress_data, date_for): """Convenience function to save progress metrics to Figures """ return LearnerCourseGradeMetrics.objects.create( diff --git a/figures/progress.py b/figures/progress.py new file mode 100644 index 00000000..515106ce --- /dev/null +++ b/figures/progress.py @@ -0,0 +1,100 @@ +""" +This module exists as a quick fix to prevent cyclical dependencies +""" +from figures.compat import ( + chapter_grade_values, + course_grade_from_course_id +) + + +class EnrollmentProgress(object): + """ + Quick hack class adapted from LearnerCourseGrades. Purpose is encapsulating + logic we need to get the total sections and points of a course. + + The proper fix is to have an API to query the course structure and cache + those data into a Figures "CourseData" type model to get course level data + like "sections_possible" and "points_possible" Non-learner specific course + data. + + But until we do that, doing a stripped down version of LearnerCourseGrades + that does fewer operations to get to a learner's progress (grades) data + + TODO: After our performance improvement rollout, rework this functionality. + Perhaps rework this class as a convenience wrapper around the platform's + 'course_grade' structure, then get rid of metrics.LearnerCourseGrades + """ + def __init__(self, user, course_id, **_kwargs): + """ + If figures.compat.course_grade is unable to retrieve the course blocks, + it raises: + + django.core.exceptions.PermissionDenied( + "User does not have access to this course") + """ + self.course_grade = course_grade_from_course_id(learner=user, + course_id=course_id) + self.progress = self._get_progress() + + # Can be a class method instead of instance + def is_section_graded(self, section): + # just being defensive, might not need to check if + # all_total exists and if all_total.possible exists + return bool( + hasattr(section, 'all_total') + and hasattr(section.all_total, 'possible') + and section.all_total.possible > 0 + ) + + def sections(self, only_graded=False, **_kwargs): + """ + yields objects of type: + lms.djangoapps.grades.new.subsection_grade.SubsectionGrade + + Compatibility: + + In Ficus, at least in the default devstack data, chapter_grades is a list + of dicts + """ + + for chapter_grade in chapter_grade_values(self.course_grade.chapter_grades): + for section in chapter_grade['sections']: + if not only_graded or (only_graded and self.is_section_graded(section)): + yield section + + def is_completed(self): + return self.progress['sections_worked'] > 0 and \ + self.progress['sections_worked'] == self.progress['sections_possible'] + + def progress_percent(self): + if self.progress['sections_possible'] > 0: + return float(self.progress['sections_worked']) / float( + self.progress['sections_possible']) + else: + return 0.0 + + def _get_progress(self): + """ + TODO: FIGURE THIS OUT + There are two ways we can go about measurig progress: + + The percentage grade points toward the total grade points + OR + the number of sections completed toward the total number of sections + """ + sections_possible = points_possible = points_earned = sections_worked = 0 + + for section in self.sections(only_graded=True): + if section.all_total.earned > 0: + sections_worked += 1 + points_earned += section.all_total.earned + sections_possible += 1 + points_possible += section.all_total.possible + + # How about a named tuple instead? + return dict( + points_possible=points_possible, + points_earned=points_earned, + sections_worked=sections_worked, + sections_possible=sections_possible, + ) diff --git a/figures/serializers.py b/figures/serializers.py index 3de839dc..aceca6eb 100644 --- a/figures/serializers.py +++ b/figures/serializers.py @@ -44,6 +44,7 @@ from figures.models import ( CourseDailyMetrics, CourseMauMetrics, + EnrollmentData, SiteDailyMetrics, SiteMauMetrics, LearnerCourseGradeMetrics, @@ -594,7 +595,7 @@ def get_progress_data(self, course_enrollment): course_progress_details = None try: - obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + obj = LearnerCourseGradeMetrics.objects.latest_lcgm( user=course_enrollment.user, course_id=str(course_enrollment.course_id)) if obj: @@ -818,7 +819,7 @@ def to_representation(self, instance): """ Get the most recent LCGM record for the enrollment, if it exists """ - self._lcgm = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + self._lcgm = LearnerCourseGradeMetrics.objects.latest_lcgm( user=instance.user, course_id=str(instance.course_id)) return super(EnrollmentMetricsSerializerV2, self).to_representation(instance) @@ -873,3 +874,42 @@ def get_enrollments(self, user): course_id__in=self.parent.course_keys) return EnrollmentMetricsSerializerV2(user_enrollments, many=True).data + +# For LPO performance improvement + + +class EnrollmentDataSerializer(serializers.ModelSerializer): + """Provides serialization for an enrollment + + This serializer note not identify the learner. It is used in + LearnerMetricsSerializer + """ + # course_id = serializers.CharField() + date_enrolled = serializers.DateTimeField(format="%Y-%m-%d") + progress_details = serializers.SerializerMethodField() + + class Meta: + model = EnrollmentData + fields = [ + 'id', 'course_id', 'date_enrolled', 'date_enrolled', + 'is_enrolled', 'is_completed', + 'progress_percent', 'progress_details', + ] + read_only_fields = fields + + def get_progress_details(self, obj): + """Get progress data for a single enrollment + """ + return obj.progress_details + + +class LearnerMetricsSerializerV2(serializers.ModelSerializer): + fullname = serializers.CharField(source='profile.name', default=None) + enrollmentdata_set = EnrollmentDataSerializer(many=True, read_only=True) + + class Meta: + model = get_user_model() + list_serializer_class = LearnerMetricsListSerializer + fields = ('id', 'username', 'email', 'fullname', 'is_active', + 'date_joined', 'enrollmentdata_set') + read_only_fields = fields diff --git a/figures/tasks.py b/figures/tasks.py index bfa16ba5..335029ff 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -16,6 +16,7 @@ from openedx.core.djangoapps.content.course_overviews.models import CourseOverview # noqa pylint: disable=import-error from student.models import CourseEnrollment # pylint: disable=import-error +from figures.backfill import backfill_enrollment_data_for_site from figures.helpers import as_course_key, as_date from figures.log import log_exec_time from figures.models import PipelineError @@ -74,7 +75,24 @@ def populate_site_daily_metrics(site_id, **kwargs): force_update=kwargs.get('force_update', False), ) logger.debug( - 'done running populate_site_daily_metrics for site_id={}"'.format(site_id)) + 'done running populate_site_daily_metrics for site_id={}'.format(site_id)) + + +@shared_task +def update_enrollment_data(site_id, **_kwargs): + """ + This can be an expensive task as it iterates over all th + """ + try: + site = Site.objects.get(id=site_id) + results = backfill_enrollment_data_for_site(site) + if results.get('errors'): + for rec in results['errors']: + logger.error('figures.tasks.update_enrollment_data. Error:{}'.format(rec)) + except Site.DoesNotExist: + logger.error( + 'figurs.tasks.update_enrollment_data: site_id={} does not exist'.format( + site_id)) @shared_task @@ -135,6 +153,10 @@ def populate_daily_metrics(date_for=None, force_update=False): site_id=site.id, date_for=date_for, force_update=force_update) + + # Until we implement signal triggers + update_enrollment_data(site_id=site.id) + logger.info("figures.populate_daily_metrics: finished Site {:04d} of {:04d}".format( i, sites_count)) diff --git a/figures/urls.py b/figures/urls.py index 5a35e3a8..062abe65 100644 --- a/figures/urls.py +++ b/figures/urls.py @@ -113,12 +113,16 @@ views.EnrollmentMetricsViewSet, base_name='enrollment-metrics') +router.register( + r'learner-metrics-v1', + views.LearnerMetricsViewSetV1, + base_name='learner-metrics-v1') + router.register( r'learner-metrics', - views.LearnerMetricsViewSet, + views.LearnerMetricsViewSetV2, base_name='learner-metrics') - urlpatterns = [ # UI Templates diff --git a/figures/views.py b/figures/views.py index aac684e3..0e3fe8eb 100644 --- a/figures/views.py +++ b/figures/views.py @@ -40,8 +40,8 @@ # Directly including edx-platform objects for early development # Follow-on, we'll likely consolidate edx-platform model imports to an adapter from openedx.core.djangoapps.content.course_overviews.models import CourseOverview # noqa pylint: disable=import-error -from student.models import CourseEnrollment # pylint: disable=import-error +from figures.compat import CourseEnrollment from figures.filters import ( CourseDailyMetricsFilter, CourseEnrollmentFilter, @@ -72,6 +72,7 @@ GeneralCourseDataSerializer, LearnerDetailsSerializer, LearnerMetricsSerializer, + LearnerMetricsSerializerV2, SiteDailyMetricsSerializer, SiteMauMetricsSerializer, SiteMauLiveMetricsSerializer, @@ -406,10 +407,11 @@ def get_serializer_context(self): return context -class LearnerMetricsViewSet(CommonAuthMixin, viewsets.ReadOnlyModelViewSet): +class LearnerMetricsViewSetV1(CommonAuthMixin, viewsets.ReadOnlyModelViewSet): """Provides user identity and nested enrollment data - This view is unders active development and subject to change + Renamed class by appending 'V1' as we are retaining it only to compare for + performance testing and verify identical results from the new viewset TODO: After we get this class tests running, restructure this module: * Group all user model based viewsets together @@ -450,7 +452,82 @@ def get_enrolled_users(self, site, course_keys): qs = figures.sites.get_users_for_site(site).filter( courseenrollment__course_id__in=course_keys ).select_related('profile').prefetch_related('courseenrollment_set') - return qs + return qs.distinct() + + def get_queryset(self): + """ + If one or more course keys are given as query parameters, then + * Course key filtering mode is ued. Any invalid keys are filtered out + from the list + * If no valid course keys are found, then an empty list is returned from + this view + """ + site = django.contrib.sites.shortcuts.get_current_site(self.request) + course_keys = figures.sites.get_course_keys_for_site(site) + try: + param_course_keys = self.query_param_course_keys() + except InvalidKeyError: + raise NotFound() + if param_course_keys: + if not set(param_course_keys).issubset(set(course_keys)): + raise NotFound() + else: + course_keys = param_course_keys + return self.get_enrolled_users(site=site, course_keys=course_keys) + + def get_serializer_context(self): + context = super(LearnerMetricsViewSetV1, self).get_serializer_context() + context['site'] = django.contrib.sites.shortcuts.get_current_site(self.request) + context['course_keys'] = self.query_param_course_keys() + return context + + +class LearnerMetricsViewSetV2(CommonAuthMixin, viewsets.ReadOnlyModelViewSet): + """Provides user identity and nested enrollment data + + Version 2 of this viewset. We'll remove the old view + This view is unders active development and subject to change + + TODO: After we get this class tests running, restructure this module: + * Group all user model based viewsets together + * Make a base user viewset with the `get_queryset` and `get_serializer_context` + methods + """ + model = get_user_model() + pagination_class = FiguresLimitOffsetPagination + serializer_class = LearnerMetricsSerializerV2 + filter_backends = (SearchFilter, DjangoFilterBackend, OrderingFilter) + + # TODO: Improve this filter + filter_class = UserFilterSet + + search_fields = ['username', 'email', 'profile__name'] + ordering_fields = ['username', 'email', 'profile__name', 'is_active', 'date_joined'] + + def query_param_course_keys(self): + """ + TODO: Mixin this + """ + cid_list = self.request.GET.getlist('course') + return [CourseKey.from_string(elem.replace(' ', '+')) for elem in cid_list] + + def get_enrolled_users(self, site, course_keys): + """Get users enrolled in the specific courses for the specified site + + Args: + site: The site for which is being called + course_keys: list of Open edX course keys + + Returns: + Django QuerySet of users enrolled in the specified courses + + Note: + We should move this to `figures.sites` + """ + qs = figures.sites.get_users_for_site(site).filter( + enrollmentdata__course_id__in=course_keys + ).select_related('profile').prefetch_related('enrollmentdata_set') + return qs.distinct() def get_queryset(self): """ @@ -474,7 +551,7 @@ def get_queryset(self): return self.get_enrolled_users(site=site, course_keys=course_keys) def get_serializer_context(self): - context = super(LearnerMetricsViewSet, self).get_serializer_context() + context = super(LearnerMetricsViewSetV2, self).get_serializer_context() context['site'] = django.contrib.sites.shortcuts.get_current_site(self.request) context['course_keys'] = self.query_param_course_keys() return context diff --git a/frontend/src/views/ProgressOverview.js b/frontend/src/views/ProgressOverview.js index 911ddae6..271908c9 100644 --- a/frontend/src/views/ProgressOverview.js +++ b/frontend/src/views/ProgressOverview.js @@ -194,7 +194,7 @@ class ProgressOverview extends Component { singleRecord['date_joined'] = user['date_joined']; const coursesFilter = this.state.selectedCourses.length ? this.state.selectedCourses : this.state.coursesFilterOptions; - const userCoursesImmutable = Immutable.fromJS(user['enrollments']); + const userCoursesImmutable = Immutable.fromJS(user['enrollmentdata_set']); coursesFilter.forEach((course, i) => { const userProgress = userCoursesImmutable.find(singleCourse => singleCourse.get('course_id') === course.id); if (userProgress) { @@ -261,7 +261,7 @@ class ProgressOverview extends Component { const scrollingListItems = this.state.learnersList.map((user, index) => { - const userCoursesImmutable = Immutable.fromJS(user['enrollments']); + const userCoursesImmutable = Immutable.fromJS(user['enrollmentdata_set']); const userCoursesRender = coursesFilter.map((course, i) => { const userProgress = userCoursesImmutable.find(singleCourse => singleCourse.get('course_id') === course.id); return ( diff --git a/tests/factories.py b/tests/factories.py index 53e11d74..66a646ac 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -38,6 +38,7 @@ from figures.models import ( CourseDailyMetrics, CourseMauMetrics, + EnrollmentData, LearnerCourseGradeMetrics, SiteDailyMetrics, SiteMonthlyMetrics, @@ -303,6 +304,29 @@ class Meta: num_learners_completed = 5 +class EnrollmentDataFactory(DjangoModelFactory): + class Meta: + model = EnrollmentData + + site = factory.SubFactory(SiteFactory) + user = factory.SubFactory(UserFactory) + course_id = factory.Sequence(lambda n: + 'course-v1:StarFleetAcademy+SFA{}+2161'.format(n)) + date_for = factory.Sequence(lambda n: + (datetime.datetime(2018, 1, 1) + datetime.timedelta( + days=n)).replace(tzinfo=utc).date()) + date_enrolled = factory.Sequence(lambda n: + (datetime.datetime(2018, 1, 1) + datetime.timedelta( + days=n)).replace(tzinfo=utc).date()) + is_enrolled = True + is_completed = False + progress_percent = 0.50 + points_possible = 30.0 + points_earned = 15.0 + sections_worked = 5 + sections_possible = 10 + + class LearnerCourseGradeMetricsFactory(DjangoModelFactory): class Meta: model = LearnerCourseGradeMetrics diff --git a/tests/models/test_enrollment_data_model.py b/tests/models/test_enrollment_data_model.py new file mode 100644 index 00000000..501cc3c9 --- /dev/null +++ b/tests/models/test_enrollment_data_model.py @@ -0,0 +1,152 @@ +""" +Basic tests for figures.models.EnrollmentData model + +Test scenarios we need to help verify the data model + +1. Learner doesn't have a CourseEnrollment (CE) +2. Learner just signed up. Has a CE, no LearnerCourseGradeMetric (LCGM) +3. Learner has CE and one LCGM +4. Learner has CE and multiple LCGM +5. Learner completed the course +6. Learner is no longer active in the course (Note: this is only handled in + our data by the fact that the EnrollmentData.is_active field would be False) +7+ The test scenrios we haven't identified yet + + +Tests fail in Ginkgo due to + "TypeError: 'course' is an invalid keyword argument for this function" + +Which is interesting because other tests use CourseEnrollmentFactory(course=X) +and they do not fail in Ginkgo. For now, skipping test in Ginkgo +""" + +import pytest + +from figures.models import EnrollmentData + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + EnrollmentDataFactory, + LearnerCourseGradeMetricsFactory, + OrganizationFactory, + OrganizationCourseFactory, + SiteFactory, + UserFactory) +from tests.helpers import ( + OPENEDX_RELEASE, + GINKGO, + organizations_support_sites +) + +if organizations_support_sites(): + from tests.factories import UserOrganizationMappingFactory + + def map_users_to_org(org, users): + [UserOrganizationMappingFactory(user=user, + organization=org) for user in users] + + +@pytest.mark.django_db +def make_site_data(num_users=3, num_courses=2): + """ + This is a copy-n-paste hack from figures/tests/conftest.py + """ + site = SiteFactory() + if organizations_support_sites(): + org = OrganizationFactory(sites=[site]) + else: + org = OrganizationFactory() + courses = [CourseOverviewFactory() for i in range(num_courses)] + users = [UserFactory() for i in range(num_users)] + enrollments = [] + + users = [UserFactory() for i in range(num_users)] + + enrollments = [] + for i, user in enumerate(users): + # Create increasing number of enrollments for each user, maximum to one less + # than the number of courses + for j in range(i): + enrollments.append( + CourseEnrollmentFactory(course=courses[j-1], user=user) + ) + + if organizations_support_sites(): + for course in courses: + OrganizationCourseFactory(organization=org, + course_id=str(course.id)) + + # Set up user mappings + map_users_to_org(org, users) + + return dict( + site=site, + org=org, + courses=courses, + users=users, + enrollments=enrollments, + ) + + +@pytest.fixture +@pytest.mark.django_db +def site_data(db, settings): + """Simple fake site data + """ + if organizations_support_sites(): + settings.FEATURES['FIGURES_IS_MULTISITE'] = True + site_data = make_site_data() + + ce = site_data['enrollments'][0] + + lcgm = [ + LearnerCourseGradeMetricsFactory(site=site_data['site'], + user=ce.user, + course_id=str(ce.course_id), + date_for='2020-10-01'), + ] + + site_data['lcgm'] = lcgm + return site_data + + +@pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Breaks on CourseEnrollmentFactory') +def test_set_enrollment_data_new_record(site_data): + """Test we create a new EnrollmentData record + + Barebone test on creating a new EnrollmentData record for an existing + LCGM record. + """ + site = site_data['site'] + ce = site_data['enrollments'][0] + lcgm = site_data['lcgm'][0] + + assert lcgm.course_id == str(ce.course_id) + assert lcgm.user_id == ce.user_id + assert not EnrollmentData.objects.count() + obj, created = EnrollmentData.objects.set_enrollment_data( + site=site, + user=ce.user, + course_id=ce.course_id) + + assert obj and created + assert obj.user == lcgm.user + + +@pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Breaks on CourseEnrollmentFactory') +def test_set_enrollment_data_update_existing(site_data): + """Test we update an existing EnrollmentData record + """ + site = site_data['site'] + ce = site_data['enrollments'][0] + lcgm = site_data['lcgm'][0] + ed = EnrollmentDataFactory(site=site, + course_id=str(ce.course_id), + user=ce.user) + assert EnrollmentData.objects.count() == 1 + obj, created = EnrollmentData.objects.set_enrollment_data( + site=site, + user=ce.user, + course_id=ce.course_id) + assert EnrollmentData.objects.count() == 1 diff --git a/tests/models/test_learner_course_grade_metrics_model.py b/tests/models/test_learner_course_grade_metrics_model.py index 8aa9ba3f..30d405f2 100644 --- a/tests/models/test_learner_course_grade_metrics_model.py +++ b/tests/models/test_learner_course_grade_metrics_model.py @@ -73,7 +73,7 @@ def create_sample_completed_lcgm(site, user_count, course_count): @pytest.mark.django_db -def test_most_recent_with_data(db): +def test_latest_lcgm_with_data(db): """Make sure the query works with a couple of existing models We create two LearnerCourseGradeMetrics models and test that the function @@ -90,13 +90,13 @@ def test_most_recent_with_data(db): course_id=str(course_overview.id), date_for=second_date) assert older_lcgm.date_for != newer_lcgm.date_for - obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + obj = LearnerCourseGradeMetrics.objects.latest_lcgm( user=user, course_id=course_overview.id) assert obj == newer_lcgm @pytest.mark.django_db -def test_most_recent_with_empty_table(db): +def test_latest_lcgm_with_empty_table(db): """Make sure the query works when there are no models to find Tests that the function returns None and does not fail when it cannot find @@ -105,7 +105,7 @@ def test_most_recent_with_empty_table(db): assert not LearnerCourseGradeMetrics.objects.count() user = UserFactory() course_overview = CourseOverviewFactory() - obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + obj = LearnerCourseGradeMetrics.objects.latest_lcgm( user=user, course_id=course_overview.id) assert not obj @@ -228,15 +228,15 @@ def test_create(self): self.create_rec['user'].username, self.create_rec['course_id']) - def test_most_recent_for_learner_course_one_rec(self): + def test_latest_lcgm_one_rec(self): rec = LearnerCourseGradeMetrics.objects.create(**self.create_rec) assert LearnerCourseGradeMetrics.objects.count() == 1 - obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + obj = LearnerCourseGradeMetrics.objects.latest_lcgm( user=self.course_enrollment.user, course_id=str(self.course_enrollment.course_id)) assert rec == obj - def test_most_recent_for_learner_course_multiple_dates(self): + def test_latest_lcgm_multiple_dates(self): assert LearnerCourseGradeMetrics.objects.count() == 0 for date in [datetime.date(2018, 2, day) for day in range(1, 4)]: rec = self.create_rec.copy() @@ -244,7 +244,7 @@ def test_most_recent_for_learner_course_multiple_dates(self): rec['date_for'] = date LearnerCourseGradeMetrics.objects.create(**rec) - obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course( + obj = LearnerCourseGradeMetrics.objects.latest_lcgm( user=self.course_enrollment.user, course_id=str(self.course_enrollment.course_id)) assert last_day diff --git a/tests/pipeline/test_enrollment_metrics.py b/tests/pipeline/test_enrollment_metrics.py index f80e86c3..0e5d309e 100644 --- a/tests/pipeline/test_enrollment_metrics.py +++ b/tests/pipeline/test_enrollment_metrics.py @@ -15,7 +15,7 @@ bulk_calculate_course_progress_data, collect_metrics_for_enrollment, _enrollment_metrics_needs_update, - _add_enrollment_metrics_record, + _new_enrollment_metrics_record, _collect_progress_data, ) from figures.sites import UnlinkedCourseError @@ -211,7 +211,7 @@ def test_needs_update_has_lcgm(self, monkeypatch): assert metrics != lcgm # This works because we pick dates in the past. Better is to fix this - # by either using freezegun or monkeypatching `_add_enrollment_metrics_record + # by either using freezegun or monkeypatching `_new_enrollment_metrics_record assert metrics.date_for >= self.date_2.date() def test_does_not_need_update(self, monkeypatch): @@ -323,7 +323,7 @@ def test_dates_lcgm_is_past_is_true(self): @pytest.mark.django_db class TestAddEnrollmentMetricsRecord(object): - """Tests the `_add_enrollment_metrics_record` function + """Tests the `_new_enrollment_metrics_record` function """ @pytest.fixture(autouse=True) @@ -341,7 +341,7 @@ def test_happy_path(self): count=22, # sections possible sections_worked=11 ) - obj = _add_enrollment_metrics_record(site=self.site, + obj = _new_enrollment_metrics_record(site=self.site, course_enrollment=self.course_enrollment, progress_data=progress_data, date_for=self.date_for) diff --git a/tests/views/test_learner_metrics_viewset.py b/tests/views/test_learner_metrics_viewset_v1.py similarity index 98% rename from tests/views/test_learner_metrics_viewset.py rename to tests/views/test_learner_metrics_viewset_v1.py index 877ab200..5d336de6 100644 --- a/tests/views/test_learner_metrics_viewset.py +++ b/tests/views/test_learner_metrics_viewset_v1.py @@ -12,7 +12,7 @@ get_course_keys_for_site, users_enrolled_in_courses, ) -from figures.views import LearnerMetricsViewSet +from figures.views import LearnerMetricsViewSetV1 from tests.helpers import organizations_support_sites from tests.views.base import BaseViewTest @@ -25,7 +25,7 @@ def filter_enrollments(enrollments, courses): @pytest.mark.django_db -class TestLearnerMetricsViewSet(BaseViewTest): +class TestLearnerMetricsViewSetV1(BaseViewTest): """Tests the learner metrics viewset The tests are incomplete @@ -58,14 +58,14 @@ class TestLearnerMetricsViewSet(BaseViewTest): } ``` """ - base_request_path = 'api/learner-metrics/' - view_class = LearnerMetricsViewSet + base_request_path = 'api/learner-metrics-v1/' + view_class = LearnerMetricsViewSetV1 @pytest.fixture(autouse=True) def setup(self, db, settings): if organizations_support_sites(): settings.FEATURES['FIGURES_IS_MULTISITE'] = True - super(TestLearnerMetricsViewSet, self).setup(db) + super(TestLearnerMetricsViewSetV1, self).setup(db) def make_request(self, monkeypatch, request_path, site, caller, action): """Convenience method to make the API request diff --git a/tests/views/test_learner_metrics_viewset_v2.py b/tests/views/test_learner_metrics_viewset_v2.py new file mode 100644 index 00000000..f6f6f2f4 --- /dev/null +++ b/tests/views/test_learner_metrics_viewset_v2.py @@ -0,0 +1,303 @@ +"""Tests Figures learner-metrics viewset +""" + +import pytest + +import django.contrib.sites.shortcuts +from rest_framework import status +from rest_framework.test import APIRequestFactory, force_authenticate + +from figures.helpers import as_course_key +from figures.sites import ( + get_course_keys_for_site, + users_enrolled_in_courses, +) +from figures.views import LearnerMetricsViewSetV2 + +from tests.helpers import organizations_support_sites +from tests.views.base import BaseViewTest +from tests.views.helpers import is_response_paginated, make_caller + + +def filter_enrollments(enrollments, courses): + course_ids = [elem.id for elem in courses] + return [elem for elem in enrollments if elem.course_id in course_ids] + +@pytest.mark.skip(reason='We need to add EnrollmentData mock records') +@pytest.mark.django_db +class TestLearnerMetricsViewSetV2(BaseViewTest): + """Tests the learner metrics viewset + + The tests are incomplete + + The list action will return a list of the following records: + + ``` + { + "id": 109, + "username": "chasecynthia", + "email": "msnyder@gmail.com", + "fullname": "Brandon Meyers", + "is_active": true, + "date_joined": "2020-06-03T00:00:00Z", + "enrollments": [ + { + "id": 9, + "course_id": "course-v1:StarFleetAcademy+SFA01+2161", + "date_enrolled": "2020-02-24", + "is_enrolled": true, + "progress_percent": 1.0, + "progress_details": { + "sections_worked": 20, + "points_possible": 100.0, + "sections_possible": 20, + "points_earned": 50.0 + } + } + ] + } + ``` + """ + base_request_path = 'api/learner-metrics/' + view_class = LearnerMetricsViewSetV2 + + @pytest.fixture(autouse=True) + def setup(self, db, settings): + if organizations_support_sites(): + settings.FEATURES['FIGURES_IS_MULTISITE'] = True + super(TestLearnerMetricsViewSetV2, self).setup(db) + + def make_request(self, monkeypatch, request_path, site, caller, action): + """Convenience method to make the API request + + Returns the response object + """ + request = APIRequestFactory().get(request_path) + request.META['HTTP_HOST'] = site.domain + monkeypatch.setattr(django.contrib.sites.shortcuts, + 'get_current_site', + lambda req: site) + force_authenticate(request, user=caller) + view = self.view_class.as_view({'get': action}) + return view(request) + + def matching_enrollment_set_to_course_ids(self, enrollments, course_ids): + """ + enrollment course ids need to be a subset of course_ids + It is ok if there are none or fewer enrollments than course_ids because + a learner might not be enrolled in all the courses on which we are + filtering + """ + enroll_course_ids = set([rec['course_id'] for rec in enrollments]) + return enroll_course_ids.issubset(set([str(rec) for rec in course_ids])) + + def test_list_method_all(self, monkeypatch, lm_test_data): + """Partial test coverage to check we get all site users + + Checks returned user ids against all user ids for the site + Checks top level keys + + Does NOT check values in the `enrollments` key. This should be done as + follow up work + """ + us = lm_test_data['us'] + them = lm_test_data['them'] + our_courses = us['courses'] + caller = make_caller(us['org']) + assert us['site'].domain != them['site'].domain + assert len(our_courses) > 1 + + response = self.make_request(request_path=self.base_request_path, + monkeypatch=monkeypatch, + site=us['site'], + caller=caller, + action='list') + + assert response.status_code == status.HTTP_200_OK + assert is_response_paginated(response.data) + results = response.data['results'] + # Check users + result_ids = [obj['id'] for obj in results] + # Get all enrolled users + course_keys = get_course_keys_for_site(site=us['site']) + users = users_enrolled_in_courses(course_keys) + user_ids = [user.id for user in users] + assert set(result_ids) == set(user_ids) + # Spot check the first record + top_keys = ['id', 'username', 'email', 'fullname', 'is_active', + 'date_joined', 'enrollments'] + assert set(results[0].keys()) == set(top_keys) + + def test_course_param_single(self, monkeypatch, lm_test_data): + """Test that the 'course' query parameter works + + """ + us = lm_test_data['us'] + them = lm_test_data['them'] + our_enrollments = us['enrollments'] + our_courses = us['courses'] + + caller = make_caller(us['org']) + assert us['site'].domain != them['site'].domain + assert len(our_courses) > 1 + query_params = '?course={}'.format(str(our_courses[0].id)) + + request_path = self.base_request_path + query_params + response = self.make_request(request_path=request_path, + monkeypatch=monkeypatch, + site=us['site'], + caller=caller, + action='list') + + assert response.status_code == status.HTTP_200_OK + assert is_response_paginated(response.data) + results = response.data['results'] + # Check user ids + result_ids = [obj['id'] for obj in results] + + course_enrollments = [elem for elem in our_enrollments + if elem.course_id == our_courses[0].id] + expected_user_ids = [obj.user.id for obj in course_enrollments] + assert set(result_ids) == set(expected_user_ids) + + for rec in results: + assert self.matching_enrollment_set_to_course_ids( + rec['enrollments'], [our_courses[0].id]) + + def test_course_param_multiple(self, monkeypatch, lm_test_data): + """Test that the 'course' query parameter works + + """ + us = lm_test_data['us'] + them = lm_test_data['them'] + our_enrollments = us['enrollments'] + our_courses = us['courses'] + caller = make_caller(us['org']) + assert us['site'].domain != them['site'].domain + assert len(our_courses) > 1 + + filtered_courses = our_courses[:2] + + # TODO: build params from 'filtered_courses' + query_params = '?course={}&course={}'.format(str(our_courses[0].id), + str(our_courses[1].id)) + + request_path = self.base_request_path + query_params + response = self.make_request(request_path=request_path, + monkeypatch=monkeypatch, + site=us['site'], + caller=caller, + action='list') + + # Continue updating here + assert response.status_code == status.HTTP_200_OK + assert is_response_paginated(response.data) + results = response.data['results'] + # Check user ids + result_ids = [obj['id'] for obj in results] + expected_enrollments = filter_enrollments(enrollments=our_enrollments, + courses=filtered_courses) + expected_user_ids = [obj.user.id for obj in expected_enrollments] + assert set(result_ids) == set(expected_user_ids) + for rec in results: + assert self.matching_enrollment_set_to_course_ids( + rec['enrollments'], [rec.id for rec in filtered_courses]) + + @pytest.mark.parametrize('query_param, field_name', [ + ('username', 'username'), + ('email', 'email'), + ]) + def test_distinct_user(self, monkeypatch, lm_test_data, query_param, field_name): + """ + + Test data setup: + We need to have a user enrolled in multiple courses + + We expect to have just one user record returned with each of the + courses for which the user is enrolled + """ + # Set up data + us = lm_test_data['us'] + our_enrollments = us['enrollments'] + caller = make_caller(us['org']) + our_site_users = lm_test_data['us']['users'] + our_user = our_site_users[-1] + user_ce = [rec for rec in our_enrollments if rec.user_id == our_user.id] + query_val = getattr(our_user, field_name) + query_str = '?{}={}'.format(query_param, query_val) + + # Run test + request_path = self.base_request_path + query_str + response = self.make_request(request_path=request_path, + monkeypatch=monkeypatch, + site=us['site'], + caller=caller, + action='list') + # Check results + # Continue updating here + assert response.status_code == status.HTTP_200_OK + assert is_response_paginated(response.data) + results = response.data['results'] + found_ce_ids = set([rec['id'] for rec in results[0]['enrollments']]) + assert len(results) == 1 + assert results[0]['id'] == our_user.id + assert found_ce_ids == set([rec.id for rec in user_ce]) + + def invalid_course_ids_raise_404(self, monkeypatch, lm_test_data, query_params): + """ + Helper method to test expected 404 calls + """ + us = lm_test_data['us'] + them = lm_test_data['them'] + + caller = make_caller(us['org']) + assert us['site'].domain != them['site'].domain + request_path = self.base_request_path + query_params + response = self.make_request(request_path=request_path, + monkeypatch=monkeypatch, + site=us['site'], + caller=caller, + action='list') + return response.status_code == status.HTTP_404_NOT_FOUND + + @pytest.mark.skipif(not organizations_support_sites(), + reason='Organizations support sites') + def test_valid_and_course_param_from_other_site_invalid(self, + monkeypatch, + lm_test_data): + """Test that the 'course' query parameter works + + """ + our_courses = lm_test_data['us']['courses'] + their_courses = lm_test_data['them']['courses'] + query_params = '?course={}&course={}'.format(str(our_courses[0].id), + str(their_courses[0].id)) + assert self.invalid_course_ids_raise_404(monkeypatch, + lm_test_data, + query_params) + + def test_valid_and_mangled_course_param_invalid(self, + monkeypatch, + lm_test_data): + """Test that the 'course' query parameter works + + """ + our_courses = lm_test_data['us']['courses'] + mangled_course_id = 'she-sell-seashells-by-the-seashore' + query_params = '?course={}&course={}'.format(str(our_courses[0].id), + mangled_course_id) + assert self.invalid_course_ids_raise_404(monkeypatch, + lm_test_data, + query_params) + + def test_unlinked_course_id_param_invalid(self, monkeypatch, lm_test_data): + """Test that the 'course' query parameter works + + """ + our_courses = lm_test_data['us']['courses'] + unlinked_course_id = as_course_key('course-v1:UnlinkedCourse+UMK+1999') + query_params = '?course={}&course={}'.format(str(our_courses[0].id), + unlinked_course_id) + assert self.invalid_course_ids_raise_404(monkeypatch, + lm_test_data, + query_params)