diff --git a/figures/course.py b/figures/course.py new file mode 100644 index 00000000..207921fd --- /dev/null +++ b/figures/course.py @@ -0,0 +1,115 @@ +"""Course specific module for Figures + +## This module defined a `Course` class for data retrieval + +Initialy created to do the following: + +1. Reduce duplication in Figures "pipeline" +2. Build stronger course context to make Figures programming easier + +## Background summary + +A course id is globally unique as it has the identify of the organization and +organizations are globally unique. + +## Design to think about - Enrollment class + +Build on Django's lazy eval for querysets to create also an `Enrollment` class +that provides interfaces for `enrollment.date_for` and abstracts this bit of +mess that enrollments and student modules do NOT associate and instead we need +to query back and forth with `user_id` and `course_id`. +""" +from __future__ import absolute_import +from django.db.models import Q +from figures.compat import CourseEnrollment, StudentModule +from figures.helpers import ( + as_course_key, + as_date, +) +from figures.sites import ( + get_site_for_course, +) + + +class Course(object): + """Representation of a Course. + + The impetus for this class was dealing with querying for course enrollment + and student module records for a specific course and for dates and date + ranges for the course + + ## Architecture goal + + **Start simple and don't build the kitchen sink into here right away just + because this class exists** + + ## Data under consideration to have this class handle + + * enrollments created on a date, before, after, between. However this would + just be a convenience as the `.enrollments` property returns a queryset that + can be filtered on `.created` + """ + def __init__(self, course_id): + """ + Initial version, we pass in a course ID and cast to a course key as an + instance attribute. Later on, add `CourseLike` to abstract course identity + so we can stop worrying about "Is it a string repretentation of a course or + is it a CourseKey?" + """ + self.course_key = as_course_key(course_id) + + # Improvement: Consider doing lazy evaluation + self.site = get_site_for_course(self.course_id) + + def __repr__(self): + return '{}.{} <{}>'.format(self.__module__, + self.__class__.__name__, + str(self.course_key)) + + def __str__(self): + return self.__repr__() + + @property + def course_id(self): + """Returns string representation of the course id + """ + return str(self.course_key) + + @property + def enrollments(self): + """Returns CourseEnrollment queryset for the course + """ + return CourseEnrollment.objects.filter(course_id=self.course_key) + + @property + def student_modules(self): + """Returns StudentModule queryset for enrollments in the course + """ + return StudentModule.objects.filter(course_id=self.course_key) + + def student_modules_active_on_date(self, date_for): + """Returns StudentModule queryset active on the date + Active is if there was a `created` or `modified` field for the given date + + NOTE: We need to do this instead of simplly `modified__date=date_for` + because we still have to support Django 1.8/Ginkgo + """ + date_for = as_date(date_for) + q_created = Q(created__year=date_for.year, + created__month=date_for.month, + created__day=date_for.day) + q_modified = Q(modified__year=date_for.year, + modified__month=date_for.month, + modified__day=date_for.day) + return self.student_modules.filter(q_created | q_modified) + + def enrollments_active_on_date(self, date_for): + """Return CourseEnrollment queryset for enrollments active on the date + + Looks for student modules modified on the specified date and returns + matching CourseEnrollment records + """ + sm = self.student_modules_active_on_date(date_for) + user_ids = sm.values('student_id').distinct() + return CourseEnrollment.objects.filter(course_id=self.course_key, + user_id__in=user_ids) diff --git a/figures/management/commands/backfill_figures_daily_metrics.py b/figures/management/commands/backfill_figures_daily_metrics.py index b30f9d91..cdfd9766 100644 --- a/figures/management/commands/backfill_figures_daily_metrics.py +++ b/figures/management/commands/backfill_figures_daily_metrics.py @@ -70,16 +70,11 @@ def handle(self, *args, **options): del kwargs['site_id'] # not implemented for experimental else: metrics_func = populate_daily_metrics - # try: + if options['no_delay']: metrics_func(**kwargs) else: metrics_func.delay(**kwargs) # pragma: no cover - # except Exception as e: # pylint: disable=bare-except - # if options['ignore_exceptions']: - # self.print_exc("daily", dt, e.message) - # else: - # raise print('END: Backfill Figures daily metrics metrics for: {}'.format(dt)) diff --git a/figures/management/commands/backfill_figures_enrollment_data.py b/figures/management/commands/backfill_figures_enrollment_data.py index e22ad166..7363219e 100644 --- a/figures/management/commands/backfill_figures_enrollment_data.py +++ b/figures/management/commands/backfill_figures_enrollment_data.py @@ -9,7 +9,7 @@ from textwrap import dedent from figures.management.base import BaseBackfillCommand -from figures.tasks import update_enrollment_data +from figures.tasks import update_enrollment_data_for_site class Command(BaseBackfillCommand): @@ -23,8 +23,8 @@ def handle(self, *args, **options): for site_id in self.get_site_ids(options['site']): print('Updating EnrollmentData for site {}'.format(site_id)) if options['no_delay']: - update_enrollment_data(site_id=site_id) + update_enrollment_data_for_site(site_id=site_id) else: - update_enrollment_data.delay(site_id=site_id) # pragma: no cover + update_enrollment_data_for_site.delay(site_id=site_id) # pragma: no cover print('DONE: Backfill Figures EnrollmentData') diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index 08ae36d0..bd2e5a8b 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -30,6 +30,10 @@ from figures.pipeline.logger import log_error import figures.pipeline.loaders from figures.pipeline.enrollment_metrics import bulk_calculate_course_progress_data +from figures.pipeline.enrollment_metrics_next import ( + calculate_course_progress as calculate_course_progress_next +) + from figures.serializers import CourseIndexSerializer import figures.sites from figures.pipeline.helpers import pipeline_date_for_rule @@ -235,16 +239,32 @@ class CourseDailyMetricsExtractor(object): BUT, we will then need to find a transform """ - def extract(self, course_id, date_for, **_kwargs): - """ - defaults = dict( + def extract(self, course_id, date_for, ed_next=False, **_kwargs): + """Extracts (collects) aggregated course level data + + Args: + course_id (:obj:`str` or :obj:`CourseKey`): The course for which we collect data + date_for (str or date): Deprecated. Was to backfill data. + Specialized TBD backfill data will be called instead. + ed_next (bool, optional): "Enrollment Data Next" flag. If set to `True` + then we collect metrics with our updated workflow. See here: + https://github.com/appsembler/figures/issues/428 + + Returns: + dict with aggregate course level metrics + + ``` + dict( enrollment_count=data['enrollment_count'], active_learners_today=data['active_learners_today'], average_progress=data.get('average_progress', None), average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) - TODO: refactor this class + ``` + + TODO: refactor this class. It doesn't need to be a class. Can be a + standalone function Add lazy loading method to load course enrollments - Create a method for each metric field """ @@ -290,17 +310,31 @@ def extract(self, course_id, date_for, **_kwargs): logger.debug(msg.format(date_for=date_for, course_id=course_id)) else: try: - progress_data = bulk_calculate_course_progress_data(course_id=course_id, - date_for=date_for) + # This conditional check is an interim solution until we make + # the progress function configurable and able to run Figures + # plugins + if ed_next: + progress_data = calculate_course_progress_next(course_id=course_id) + else: + progress_data = bulk_calculate_course_progress_data(course_id=course_id, + date_for=date_for) data['average_progress'] = progress_data['average_progress'] except Exception: # pylint: disable=broad-except # Broad exception for starters. Refine as we see what gets caught # Make sure we set the average_progres to None so that upstream # does not think things are normal data['average_progress'] = None - msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' + + if ed_next: + prog_func = 'calculate_course_progress_next' + else: + prog_func = 'bulk_calculate_course_progress_data' + + msg = ('FIGURES:FAIL {prog_func}' ' date_for={date_for}, course_id="{course_id}"') - logger.exception(msg.format(date_for=date_for, course_id=course_id)) + logger.exception(msg.format(prog_func=prog_func, + date_for=date_for, + course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) @@ -319,10 +353,11 @@ def __init__(self, course_id): self.extractor = CourseDailyMetricsExtractor() self.site = figures.sites.get_site_for_course(self.course_id) - def get_data(self, date_for): + def get_data(self, date_for, ed_next=False): return self.extractor.extract( course_id=self.course_id, - date_for=date_for) + date_for=date_for, + ed_next=ed_next) @transaction.atomic def save_metrics(self, date_for, data): @@ -350,7 +385,7 @@ def save_metrics(self, date_for, data): cdm.clean_fields() return (cdm, created,) - def load(self, date_for=None, force_update=False, **_kwargs): + def load(self, date_for=None, ed_next=False, force_update=False, **_kwargs): """ TODO: clean up how we do this. We want to be able to call the loader with an existing data set (not having to call the extractor) but we @@ -376,5 +411,5 @@ def load(self, date_for=None, force_update=False, **_kwargs): # record not found, move on to creating pass - data = self.get_data(date_for=date_for) + data = self.get_data(date_for=date_for, ed_next=ed_next) return self.save_metrics(date_for=date_for, data=data) diff --git a/figures/pipeline/enrollment_metrics_next.py b/figures/pipeline/enrollment_metrics_next.py new file mode 100644 index 00000000..7b00fa07 --- /dev/null +++ b/figures/pipeline/enrollment_metrics_next.py @@ -0,0 +1,68 @@ +"""This module updates Figures enrollment data and calculates aggregate progress + +* It updates `EnrollmentData` and `LearnerCourseGradeMetrics` records +* It calculate course progress from EnrollmentData records + +This generates the same metrics as the original enrollment_metrics modules, +but does it differently. + +## How it differs from the previous version + +This module improves on the existing enrollment metrics collection module, +`figures.pipeline.enrollment_metrics` + +* It separates the activities to create and update Figures per-enrollment data + collected +* This separation lets Figures run metrics in distinct stages + * First, collect per-enrollment data + * Second, aggregate metrics based on collected data +* This provides a workflow that is easier to resume if interrupted +* This provides workflow that is simpler to debug +* This simplifies and speeds up aggregate progress metrics, collapsing complex + code into a single Django queryset aggregation +* This update lays groundwork for further metrics improvements and enhancements + such as metrics on subsets of learners in a course or progress of subsets of + learners across courses + +# Developer Notes + +This module provides + +""" +from django.db.models import Avg +from figures.course import Course +from figures.helpers import utc_yesterday +from figures.models import EnrollmentData +from figures.sites import UnlinkedCourseError + + +def update_enrollment_data_for_course(course_id): + """Updates Figures per-enrollment data for enrollments in the course + Checks for and creates new `LearnerCourseGradeMetrics` records and updates + `EnrollmentData` records + + Return results are a list of the results returned by `update_enrollment_data` + """ + date_for = utc_yesterday() + the_course = Course(course_id) + if not the_course.site: + raise UnlinkedCourseError('No site found for course "{}"'.format(course_id)) + + # Any updated student module records? if so, then get the unique enrollments + # for each enrollment, check if LGCM is out of date or up to date + active_enrollments = the_course.enrollments_active_on_date(date_for) + return [EnrollmentData.objects.update_metrics(the_course.site, ce) + for ce in active_enrollments] + + +def calculate_course_progress(course_id): + """Return average progress percentage for all enrollments in the course + """ + results = EnrollmentData.objects.filter(course_id=str(course_id)).aggregate( + average_progress=Avg('progress_percent')) + + # This is a bit of a hack. When we overhaul progress data, we should really + # have None for progress if there's no data. But check how SQL AVG performs + if results['average_progress'] is None: + results['average_progress'] = 0.0 + return results diff --git a/figures/tasks.py b/figures/tasks.py index 5cb537e8..4cb59b4d 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -28,6 +28,7 @@ from figures.pipeline.mau_pipeline import collect_course_mau from figures.pipeline.helpers import DateForCannotBeFutureError from figures.pipeline.site_monthly_metrics import fill_last_month as fill_last_smm_month +from figures.pipeline.enrollment_metrics_next import update_enrollment_data_for_course logger = get_task_logger(__name__) @@ -47,7 +48,7 @@ @shared_task -def populate_single_cdm(course_id, date_for=None, force_update=False): +def populate_single_cdm(course_id, date_for=None, ed_next=False, force_update=False): """Populates a CourseDailyMetrics record for the given date and course The calling function is responsible for error handling calls to this @@ -66,7 +67,7 @@ def populate_single_cdm(course_id, date_for=None, force_update=False): start_time = time.time() cdm_obj, _created = CourseDailyMetricsLoader( - course_id).load(date_for=date_for, force_update=force_update) + course_id).load(date_for=date_for, ed_next=ed_next, force_update=force_update) elapsed_time = time.time() - start_time logger.debug('done. Elapsed time (seconds)={}. cdm_obj={}'.format( elapsed_time, cdm_obj)) @@ -90,7 +91,7 @@ def populate_single_sdm(site_id, date_for, force_update=False): @shared_task -def populate_daily_metrics_for_site(site_id, date_for, force_update=False): +def populate_daily_metrics_for_site(site_id, date_for, ed_next=False, force_update=False): """Collect metrics for the given site and date """ try: @@ -103,8 +104,12 @@ def populate_daily_metrics_for_site(site_id, date_for, force_update=False): for course_id in site_course_ids(site): try: + if ed_next: + update_enrollment_data_for_course(course_id) + populate_single_cdm(course_id=course_id, date_for=date_for, + ed_next=ed_next, force_update=force_update) except Exception as e: # pylint: disable=broad-except msg = ('{prefix}:SITE:COURSE:FAIL:populate_daily_metrics_for_site.' @@ -121,14 +126,17 @@ def populate_daily_metrics_for_site(site_id, date_for, force_update=False): @shared_task -def update_enrollment_data(site_id, **_kwargs): - """ - This can be an expensive task as it iterates over all the enrollments in a - site +def update_enrollment_data_for_site(site_id, **_kwargs): + """Original task to collect `EnrollmentData` records + + This tasks collects `EnrollmentData` records at the site level with the + context of 'backfill'. This means it goes through all enrollments on the + site and checks if the `EnrollmentData` record needs to be updated + This can be an expensive task as it iterates over all the enrollments in a + site. We can reduce the records for which we need to iterate if we filter on CourseEnrollment.objects.filter(is_actie=True) - However, we have to ensure that we don't exclude learners who have just completed a course and are awaiting post course activities, like being awarded a certificate @@ -138,20 +146,17 @@ def update_enrollment_data(site_id, **_kwargs): results = backfill_enrollment_data_for_site(site) if results.get('errors'): for rec in results['errors']: - logger.error('figures.tasks.update_enrollment_data. Error:{}'.format(rec)) + logger.error('figures.tasks.update_enrollment_data_for_site. Error:{}'.format(rec)) except Site.DoesNotExist: logger.error( - 'figurs.tasks.update_enrollment_data: site_id={} does not exist'.format( + 'figurs.tasks.update_enrollment_data_for_site: site_id={} does not exist'.format( site_id)) except Exception: # pylint: disable=broad-except - msg = ('FIGURES:DAILYLFAIL daily metrics:update_enrollment_data' + msg = ('FIGURES:DAILYLFAIL daily metrics:update_enrollment_data_for_site' ' for site_id={}'.format(site_id)) logger.exception(msg) -# TODO: Sites iterator with entry and exit logging - - @shared_task def populate_daily_metrics(site_id=None, date_for=None, force_update=False): """Runs Figures daily metrics collection @@ -237,9 +242,9 @@ def populate_daily_metrics(site_id=None, date_for=None, force_update=False): # Until we implement signal triggers if do_update_enrollment_data: try: - update_enrollment_data(site_id=site.id) + update_enrollment_data_for_site(site_id=site.id) except Exception: # pylint: disable=broad-except - msg = ('{prefix}:FAIL figures.tasks update_enrollment_data ' + msg = ('{prefix}:FAIL figures.tasks update_enrollment_data_for_site ' ' unhandled exception. site[{site_id}]:{domain}') logger.exception(msg.format(prefix=FPD_LOG_PREFIX, site_id=site.id, @@ -258,6 +263,64 @@ def populate_daily_metrics(site_id=None, date_for=None, force_update=False): site_count=sites_count)) +@shared_task +def populate_daily_metrics_next(site_id=None, force_update=False): + """Next iteration to collect daily metrics for all sites in a deployment + + This is a top level Celery task run every 24 hours to update Figures data. + + * It updates Figures per-enrollment data and collect daily aggregate metrics + * It's purpose is to collect new metrics on an ongoing basis and not serve + dual purpose of collecting ongoing data AND backfilling data. + * The driver for this change is to improve performance of the daily Celery jobs + + What's different? + + * Figures collects the enrollment data first, then aggregates daily data. + + TODO: Draft up public architecture docs and reference them here + """ + if waffle.switch_is_active(WAFFLE_DISABLE_PIPELINE): + logger.warning('Figures pipeline is disabled due to %s being active.', + WAFFLE_DISABLE_PIPELINE) + return + + date_for = datetime.datetime.utcnow().date() + if site_id is not None: + sites = get_sites_by_id((site_id, )) + else: + sites = get_sites() + sites_count = sites.count() + # This is our task entry log message + msg = '{prefix}:START:date_for={date_for}, site_count={site_count}' + logger.info(msg.format(prefix=FPD_LOG_PREFIX, + date_for=date_for, + site_count=sites_count)) + for i, site in enumerate(sites): + msg = '{prefix}:SITE:START:{id}:{domain} - Site {i:04d} of {n:04d}' + logger.info(msg.format(prefix=FPD_LOG_PREFIX, + id=site.id, + domain=site.domain, + i=i, + n=sites_count)) + try: + populate_daily_metrics_for_site(site_id=site.id, + date_for=date_for, + ed_next=True, + force_update=force_update) + except Exception: # pylint: disable=broad-except + msg = ('{prefix}:FAIL populate_daily_metrics unhandled site level' + ' exception for site[{site_id}]={domain}') + logger.exception(msg.format(prefix=FPD_LOG_PREFIX, + site_id=site.id, + domain=site.domain)) + + msg = '{prefix}:END:date_for={date_for}, site_count={site_count}' + logger.info(msg.format(prefix=FPD_LOG_PREFIX, + date_for=date_for, + site_count=sites_count)) + + # # Daily Metrics Experimental Tasks # diff --git a/tests/helpers.py b/tests/helpers.py index 31d0c2ee..1a028f03 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -9,6 +9,7 @@ from django.utils.timezone import utc +from opaque_keys.edx.keys import CourseKey from organizations.models import Organization @@ -37,6 +38,10 @@ def make_course_key_str(org, number, run='test-run'): return 'course-v1:{}+{}+{}'.format(org, number, run) +def fake_course_key(num): + return CourseKey.from_string('course-v1:TestOrg+TO+{}'.format(num)) + + def create_metrics_model_timeseries(factory, first_day, last_day): """ Convenience method to create a set of time series factory objects diff --git a/tests/pipeline/test_course_daily_metrics.py b/tests/pipeline/test_course_daily_metrics.py index b51833ab..5e2058e0 100644 --- a/tests/pipeline/test_course_daily_metrics.py +++ b/tests/pipeline/test_course_daily_metrics.py @@ -239,34 +239,58 @@ def setup(self, db): self.student_module = StudentModuleFactory() self.date_for = datetime.datetime.utcnow().date() - def test_extract(self, monkeypatch): + def test_extract_default(self, monkeypatch): + """Default progress calculator is called when `ed_next` param not set + """ + expected_avg_prog = 'fake-average-progress' course_id = self.course_enrollments[0].course_id monkeypatch.setattr(figures.pipeline.course_daily_metrics, 'bulk_calculate_course_progress_data', - lambda **_kwargs: dict(average_progress=0.5)) + lambda **_kwargs: dict(average_progress=expected_avg_prog)) results = pipeline_cdm.CourseDailyMetricsExtractor().extract( course_id, self.date_for) + assert results['average_progress'] == expected_avg_prog + + @pytest.mark.parametrize('prog_func, ed_next', [ + ('bulk_calculate_course_progress_data', False), + ('calculate_course_progress_next', True) + ]) + def test_extract_ed_next(self, monkeypatch, prog_func, ed_next): + """Tests default and alternate progress calculators + """ + course_id = self.course_enrollments[0].course_id + prog_func_str = 'figures.pipeline.course_daily_metrics.{}'.format(prog_func) + with mock.patch(prog_func_str) as prog_mock: + results = pipeline_cdm.CourseDailyMetricsExtractor().extract( + course_id, self.date_for, ed_next=ed_next) + assert prog_mock.called assert results - def test_when_bulk_calculate_course_progress_data_fails(self, - monkeypatch, - caplog): + @pytest.mark.parametrize('prog_func, ed_next', [ + ('bulk_calculate_course_progress_data', False), + ('calculate_course_progress_next', True) + ]) + def test_when_calculate_course_progress_data_fails(self, + monkeypatch, + caplog, + prog_func, + ed_next): course_id = self.course_enrollments[0].course_id - def mock_bulk(**_kwargs): + def prog_func_mock(**_kwargs): raise Exception('fake exception') monkeypatch.setattr(figures.pipeline.course_daily_metrics, - 'bulk_calculate_course_progress_data', - mock_bulk) + prog_func, + prog_func_mock) results = pipeline_cdm.CourseDailyMetricsExtractor().extract( - course_id, self.date_for) + course_id, self.date_for, ed_next) last_log = caplog.records[-1] assert last_log.message.startswith( - 'FIGURES:FAIL bulk_calculate_course_progress_data') + 'FIGURES:FAIL {}'.format(prog_func)) assert not results['average_progress'] @@ -296,7 +320,7 @@ def test_load(self, monkeypatch): else: course_id = self.course_enrollments[0].course.id - def get_data(self, date_for): + def get_data(self, date_for, ed_next=False): return { 'average_progress': 1.0, 'num_learners_completed': 2, diff --git a/tests/pipeline/test_enrollment_metrics_next.py b/tests/pipeline/test_enrollment_metrics_next.py new file mode 100644 index 00000000..b48fe44a --- /dev/null +++ b/tests/pipeline/test_enrollment_metrics_next.py @@ -0,0 +1,118 @@ +"""Test next iteration to collect enrollment metrics + +These tests exercies the module, `figures.pipeline.enrollment_metrics_next`. + +See the module docstring for details. +""" +import pytest +from mock import patch + +from django.contrib.sites.models import Site + +from figures.compat import CourseEnrollment +from figures.pipeline.enrollment_metrics_next import ( + update_enrollment_data_for_course, + calculate_course_progress, +) +from figures.sites import UnlinkedCourseError + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + EnrollmentDataFactory, +) + + +@pytest.mark.django_db +class TestUpdateMetrics(object): + """Tests `update_enrollment_data_for_course` + + Since `figures.models.EnrollmentDataManager.update_metrics` is tested in + `tests/models/test_enrollment_data_update_metrics.py`, we can mock this + method in our tests here. + """ + + @pytest.fixture(autouse=True) + def setup(self, db, settings): + self.course_overview = CourseOverviewFactory() + self.site = Site.objects.first() + + def test_course_has_no_enrollments(self, monkeypatch): + """We have a new course with no enrollments + """ + monkeypatch.setattr('figures.course.get_site_for_course', lambda val: self.site) + result = update_enrollment_data_for_course(self.course_overview.id) + assert result == [] + + def test_course_has_enrollments_but_no_active_for_yesterday(self, monkeypatch): + """We have enrollments, but none were active yesterday + """ + monkeypatch.setattr('figures.course.get_site_for_course', lambda val: self.site) + [CourseEnrollmentFactory(course_id=self.course_overview.id) + for _ in range(2)] + result = update_enrollment_data_for_course(self.course_overview.id) + assert result == [] + + def test_course_has_active_enrollments_for_yesterday(self): + """We have enrollments who were active yesterday + """ + expected_ce = [CourseEnrollmentFactory(course_id=self.course_overview.id) + for _ in range(2)] + ce = CourseEnrollment.objects.filter(course_id=self.course_overview.id) + + def mock_update_metrics(site, ce): + return ce + + with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class: + with patch('figures.pipeline.enrollment_metrics_next.EnrollmentData.objects') as edm: + course_class.return_value.enrollments_active_on_date.return_value = ce + course_class.return_value.site = self.site + edm.update_metrics = mock_update_metrics + result = update_enrollment_data_for_course(self.course_overview.id) + assert set(result) == set(expected_ce) + + def test_course_is_unlinked(self, monkeypatch): + """Function should raise `UnlinkedCourseError` if there's not a site match + + For Tahoe's multisite implementation, this can happen if the course's + organization is not linked to a site + For standalone sites, this should never happen + + To learn more, see the `figures.sites.get_site_for_course` function. + """ + monkeypatch.setattr('figures.course.get_site_for_course', lambda val: None) + with pytest.raises(UnlinkedCourseError) as excinfo: + update_enrollment_data_for_course(self.course_overview.id) + # with patch('figures.pipeline.enrollment_metrics_next.Course') as course_class: + # course_class.return_value.site = None + # with pytest.raises(UnlinkedCourseError) as excinfo: + # update_enrollment_data_for_course(self.course_overview.id) + expected_msg = 'No site found for course "{}"'.format(str(self.course_overview.id)) + assert str(excinfo.value) == expected_msg + + +@pytest.mark.django_db +class TestCalculateProgress(object): + """Tests `calculate_course_progress` + """ + @pytest.fixture(autouse=True) + def setup(self, db, settings): + self.course_overview = CourseOverviewFactory() + + def test_calc_course_progress_empty(self): + """The course doesn't have any EnrollmentData records + """ + results = calculate_course_progress(self.course_overview.id) + assert results['average_progress'] == pytest.approx(0.0) + + def test_calc_course_progress(self): + """The course has EnrollmentData records + """ + some_percentages = [0.0, 25.0, 50.0] + expected_average = sum(some_percentages)/len(some_percentages) + [ + EnrollmentDataFactory(course_id=str(self.course_overview.id), progress_percent=pp) + for pp in some_percentages + ] + results = calculate_course_progress(self.course_overview.id) + assert results['average_progress'] == pytest.approx(expected_average) diff --git a/tests/tasks/test_daily_tasks.py b/tests/tasks/test_daily_tasks.py index 904b30d1..41b29347 100644 --- a/tests/tasks/test_daily_tasks.py +++ b/tests/tasks/test_daily_tasks.py @@ -58,7 +58,6 @@ """ from __future__ import absolute_import from datetime import date -import logging import pytest from six.moves import range from django.contrib.sites.models import Site @@ -72,15 +71,17 @@ populate_single_cdm, populate_single_sdm, populate_daily_metrics_for_site, - populate_daily_metrics) + populate_daily_metrics, + populate_daily_metrics_next) from tests.factories import (CourseDailyMetricsFactory, CourseOverviewFactory, SiteDailyMetricsFactory, SiteFactory) -from tests.helpers import OPENEDX_RELEASE, GINKGO, FakeException +from tests.helpers import OPENEDX_RELEASE, GINKGO, FakeException, fake_course_key -def test_populate_single_cdm(transactional_db, monkeypatch): +@pytest.mark.parametrize('extra_params', [{}, {'ed_next': True}]) +def test_populate_single_cdm(transactional_db, monkeypatch, extra_params): """Test figures.tasks.populate_single_cdm nominal case This tests the normal execution to popluate a single CourseDailyMetrics @@ -100,31 +101,33 @@ def mock_cdm_load(self, date_for, **kwargs): 'figures.pipeline.course_daily_metrics.CourseDailyMetricsLoader.load', mock_cdm_load) - populate_single_cdm(course_id, date_for) + populate_single_cdm(course_id, date_for, **extra_params) assert CourseDailyMetrics.objects.count() == 1 assert as_date(CourseDailyMetrics.objects.first().date_for) == as_date(date_for) @pytest.mark.django_db -def test_disable_populate_daily_metrics(caplog): +@pytest.mark.parametrize('func', [populate_daily_metrics, populate_daily_metrics_next]) +def test_disable_populate_daily_metrics(caplog, func): """Test figures.tasks.populate_daily_metrics Tests that when WAFFLE_DISABLE_PIPELINE is active, the disabled warning msg is logged """ with override_switch('figures.disable_pipeline', active=True): - populate_daily_metrics() + func() assert 'disabled' in caplog.text @pytest.mark.django_db -def test_enable_populate_daily_metrics(caplog): +@pytest.mark.parametrize('func', [populate_daily_metrics, populate_daily_metrics_next]) +def test_enable_populate_daily_metrics(caplog, func): """Test figures.tasks.populate_daily_metrics Tests that when WAFFLE_DISABLE_PIPELINE is not active, the disabled warning msg is not logged """ with override_switch('figures.disable_pipeline', active=False): - populate_daily_metrics() + func() assert 'disabled' not in caplog.text @@ -156,11 +159,13 @@ def mock_sdm_load(self, site, date_for, **kwargs): as_date('2020-12-12'), as_datetime('2020-12-12') ]) +@pytest.mark.parametrize('extra_params', [{}, {'ed_next': True}]) def test_populate_daily_metrics_for_site_basic(transactional_db, monkeypatch, - date_for): + date_for, + extra_params): site = SiteFactory() - course_ids = ['fake-course-1', 'fake-course-2'] + course_ids = [fake_course_key(i) for i in range(2)] collected_course_ids = [] def fake_populate_single_cdm(course_id, **_kwargs): @@ -169,37 +174,49 @@ def fake_populate_single_cdm(course_id, **_kwargs): def fake_populate_single_sdm(site_id, **_kwargs): assert site_id == site.id + def fake_update_enrollment_data_for_course(course_id): + assert extra_params['ed_next'] + monkeypatch.setattr('figures.tasks.site_course_ids', lambda site: course_ids) monkeypatch.setattr('figures.tasks.populate_single_cdm', fake_populate_single_cdm) monkeypatch.setattr('figures.tasks.populate_single_sdm', fake_populate_single_sdm) + monkeypatch.setattr('figures.tasks.update_enrollment_data_for_course', + fake_update_enrollment_data_for_course) - populate_daily_metrics_for_site(site_id=site.id, date_for=date_for) + populate_daily_metrics_for_site(site_id=site.id, date_for=date_for, **extra_params) assert set(collected_course_ids) == set(course_ids) @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Apparent Django 1.8 incompatibility') +@pytest.mark.parametrize('extra_params', [{}, {'ed_next': True}]) def test_populate_daily_metrics_for_site_error_on_cdm(transactional_db, monkeypatch, - caplog): + caplog, + extra_params): date_for = date.today() site = SiteFactory() - fake_course_ids = ['fake-course-id-1'] + fake_course_ids = [fake_course_key(1)] def fake_pop_single_cdm_fails(**kwargs): # TODO: test with different exceptions # At least one with and without `message_dict` raise FakeException('Hey!') + def fake_update_enrollment_data_for_course(course_id): + assert extra_params['ed_next'] + monkeypatch.setattr('figures.tasks.site_course_ids', lambda site: fake_course_ids) monkeypatch.setattr('figures.tasks.populate_single_cdm', fake_pop_single_cdm_fails) + monkeypatch.setattr('figures.tasks.update_enrollment_data_for_course', + fake_update_enrollment_data_for_course) - populate_daily_metrics_for_site(site_id=site.id, date_for=date_for) + populate_daily_metrics_for_site(site_id=site.id, date_for=date_for, **extra_params) last_log = caplog.records[-1] expected_msg = ('{prefix}:SITE:COURSE:FAIL:populate_daily_metrics_for_site. ' @@ -215,9 +232,11 @@ def fake_pop_single_cdm_fails(**kwargs): @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Apparent Django 1.8 incompatibility') +@pytest.mark.parametrize('extra_params', [{}, {'ed_next': True}]) def test_populate_daily_metrics_for_site_site_dne(transactional_db, monkeypatch, - caplog): + caplog, + extra_params): """ If there is an invalid site id, logs error and raises it """ @@ -226,7 +245,9 @@ def test_populate_daily_metrics_for_site_site_dne(transactional_db, assert not Site.objects.filter(id=bad_site_id).exists() with pytest.raises(Exception) as e: - populate_daily_metrics_for_site(site_id=bad_site_id, date_for=date_for) + populate_daily_metrics_for_site(site_id=bad_site_id, + date_for=date_for, + **extra_params) assert str(e.value) == 'Site matching query does not exist.' last_log = caplog.records[-1] @@ -237,9 +258,13 @@ def test_populate_daily_metrics_for_site_site_dne(transactional_db, @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Apparent Django 1.8 incompatibility') +@pytest.mark.parametrize('func', [ + populate_daily_metrics, populate_daily_metrics_next +]) def test_populate_daily_metrics_site_level_error(transactional_db, monkeypatch, - caplog): + caplog, + func): """ Generic test that the first site fails but we can process the second site """ @@ -249,7 +274,6 @@ def test_populate_daily_metrics_site_level_error(transactional_db, bad_site = SiteFactory() populated_site_ids = [] failed_site_ids = [] - date_for = date.today() def fake_populate_daily_metrics_for_site(site_id, **_kwargs): """ @@ -263,7 +287,8 @@ def fake_populate_daily_metrics_for_site(site_id, **_kwargs): monkeypatch.setattr('figures.tasks.populate_daily_metrics_for_site', fake_populate_daily_metrics_for_site) - populate_daily_metrics(date_for=date_for) + func() + assert set(populated_site_ids) == set([good_site.id]) assert set(failed_site_ids) == set([bad_site.id]) @@ -297,13 +322,13 @@ def fake_update_enrollment_data_fails(**kwargs): monkeypatch.setattr('figures.tasks.populate_daily_metrics_for_site', fake_populate_daily_metrics_for_site) - monkeypatch.setattr('figures.tasks.update_enrollment_data', + monkeypatch.setattr('figures.tasks.update_enrollment_data_for_site', fake_update_enrollment_data_fails) populate_daily_metrics(date_for=date_for) last_log = caplog.records[-1] - expected_msg = ('{prefix}:FAIL figures.tasks update_enrollment_data ' + expected_msg = ('{prefix}:FAIL figures.tasks update_enrollment_data_for_site ' ' unhandled exception. site[{site_id}]:{domain}').format( prefix=FPD_LOG_PREFIX, site_id=site.id, @@ -313,9 +338,11 @@ def fake_update_enrollment_data_fails(**kwargs): @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Broken test. Apparent Django 1.8 incompatibility') -def test_populate_daily_metrics_multisite(transactional_db, monkeypatch): +@pytest.mark.parametrize('func', [ + populate_daily_metrics, populate_daily_metrics_next +]) +def test_populate_daily_metrics_multisite(transactional_db, monkeypatch, caplog, func): # Stand up test data - date_for = '2019-01-02' site_links = [] for domain in ['alpha.domain', 'bravo.domain']: site_links.append(dict( @@ -323,4 +350,6 @@ def test_populate_daily_metrics_multisite(transactional_db, monkeypatch): courses=[CourseOverviewFactory() for i in range(2)], )) - populate_daily_metrics(date_for=date_for) + func() + + assert not caplog.records diff --git a/tests/test_course.py b/tests/test_course.py new file mode 100644 index 00000000..7415dfee --- /dev/null +++ b/tests/test_course.py @@ -0,0 +1,110 @@ +"""Test `figures.course` module + +""" +from __future__ import absolute_import +import pytest +from faker import Faker +from opaque_keys.edx.keys import CourseKey + +from figures.course import Course +from figures.helpers import days_from +from figures.sites import get_site_for_course + +from tests.factories import ( + CourseEnrollmentFactory, + CourseOverviewFactory, + StudentModuleFactory, +) + +fake = Faker() + + +@pytest.mark.django_db +class TestCourse: + """ + Starting with just basic tests + """ + @pytest.fixture(autouse=True) + def setup(self, db): + self.course_overview = CourseOverviewFactory() + # Little sanity check making sure our Factory has the right class type + # for the course key + assert isinstance(self.course_overview.id, CourseKey) + + def assert_construction(self, course): + assert course.course_key == self.course_overview.id + assert course.course_id == str(self.course_overview.id) + assert course.site == get_site_for_course(self.course_overview.id) + + def simple_test_property_course_id_association(self, factory_class, property_name): + """Helper method to DRY tests for property methods on simple associations + + This method can be used to test the Course class property methods return + expected results for property methods that return objects that can be + associated with a course with just the course id. Initially implemented + to test the enrollments and student modules property methods. Could also + be used to test for other models like CourseAccessGroup, CourseAccessRole, + Figures CourseDailyMetrics, + EnrollmentData, or LearnerCourseGradeMetrics if we wanted to implement + property methods for those + + If we did use this for models that use a string course id instead of a + CourseKey, then we'll need to make sure the handled course id form is + used. We could do this with a method parameter that either casts to the + expected form or defines which one to use and we can explicitly cast in + this method + """ + our_objects = [factory_class(course_id=self.course_overview.id) + for _ in range(2)] + other_object = factory_class() + assert not other_object.course_id == self.course_overview.id + course = Course(self.course_overview.id) + assert set(our_objects) == set(getattr(course, property_name)) + + def test_str_constructor(self): + self.assert_construction(Course(str(self.course_overview.id))) + + def test_course_key_constructor(self): + self.assert_construction(Course(self.course_overview.id)) + + def test_enrollments(self): + self.simple_test_property_course_id_association(CourseEnrollmentFactory, + 'enrollments') + + def test_student_modules(self): + self.simple_test_property_course_id_association(StudentModuleFactory, + 'student_modules') + + def test_student_modules_active_on_date(self): + our_date_for = fake.date_this_year() + our_created_sm = [StudentModuleFactory(course_id=self.course_overview.id, + created=our_date_for) for _ in range(2)] + our_modified_sm = [StudentModuleFactory(course_id=self.course_overview.id, + modified=our_date_for) for _ in range(2)] + # Create record with a different date + StudentModuleFactory(course_id=self.course_overview.id, + created=days_from(our_date_for, -2), + modified=days_from(our_date_for, -1)) + course = Course(self.course_overview.id) + found_sm = course.student_modules_active_on_date(our_date_for) + assert set(our_created_sm + our_modified_sm) == set(found_sm) + + def test_enrollments_active_on_date(self): + our_date_for = fake.date_this_year() + other_date_for = days_from(our_date_for, -1) + our_ce = [CourseEnrollmentFactory(course_id=self.course_overview.id) + for _ in range(2)] + our_sm = [] + for ce in our_ce: + our_sm.extend([ + StudentModuleFactory.from_course_enrollment(ce, modified=our_date_for), + StudentModuleFactory.from_course_enrollment(ce, created=our_date_for) + ]) + # Create enrollment we should not get in our results + other_ce = CourseEnrollmentFactory(course_id=self.course_overview.id) + StudentModuleFactory.from_course_enrollment(other_ce, + created=other_date_for, + modified=other_date_for) + course = Course(self.course_overview.id) + found_ce = course.enrollments_active_on_date(our_date_for) + assert set(found_ce) == set(our_ce)