From 651d29f551fbd96fbae1c776baf1d8d0bd645a85 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 17 Dec 2020 11:23:49 +0100 Subject: [PATCH 1/4] Add pipeline exception handling - course daily metrics The pipeline function, bulk_calculate_course_progress_data, does a lot of work and a potential point of failure to fail all the metrics for the course for the day on collecting just one metric. So this commit traps for exceptions when the function "bulk_calculate_course_progress_data" is called and sets the course daily metrics 'average_progress' to None if it fails. Being 'None' helps identify pipeline erors rather than setting progress to zero. Added test to make sure we are logging when it fails --- figures/pipeline/course_daily_metrics.py | 17 +++++++++++++---- tests/pipeline/test_course_daily_metrics.py | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index cfe28e5c..694b7c99 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -56,7 +56,7 @@ def get_enrolled_in_exclude_admins(course_id, date_for=None): staff = CourseStaffRole(course_locator).users_with_role() admins = CourseInstructorRole(course_locator).users_with_role() coaches = CourseCcxCoachRole(course_locator).users_with_role() - filter_args = dict(course_id=course_id, is_active=1) + filter_args = dict(course_id=course_locator, is_active=1) if date_for: filter_args.update(dict(created__lt=as_datetime(next_day(date_for)))) @@ -269,9 +269,18 @@ def extract(self, course_id, date_for=None, **_kwargs): data['active_learners_today'] = active_learners_today # Average progress - progress_data = bulk_calculate_course_progress_data(course_id=course_id, - date_for=date_for) - data['average_progress'] = progress_data['average_progress'] + try: + progress_data = bulk_calculate_course_progress_data(course_id=course_id, + date_for=date_for) + data['average_progress'] = progress_data['average_progress'] + except Exception: + # Broad exception for starters. Refine as we see what gets caught + # Make sure we set the average_progres to None so that upstream + # does not think things are normal + data['average_progress'] = None + msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' + ' date_for={date_for}, course_id="{course_id}"') + logger.exception(msg.format(date_for=date_for, course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) diff --git a/tests/pipeline/test_course_daily_metrics.py b/tests/pipeline/test_course_daily_metrics.py index a9a6fc91..100750d0 100644 --- a/tests/pipeline/test_course_daily_metrics.py +++ b/tests/pipeline/test_course_daily_metrics.py @@ -247,6 +247,25 @@ def test_extract(self, monkeypatch): results = pipeline_cdm.CourseDailyMetricsExtractor().extract(course_id) assert results + def test_when_bulk_calculate_course_progress_data_fails(self, + monkeypatch, + caplog): + course_id = self.course_enrollments[0].course_id + + def mock_bulk(**_kwargs): + raise Exception('fake exception') + + monkeypatch.setattr(figures.pipeline.course_daily_metrics, + 'bulk_calculate_course_progress_data', + mock_bulk) + + results = pipeline_cdm.CourseDailyMetricsExtractor().extract(course_id) + + last_log = caplog.records[-1] + assert last_log.message.startswith( + 'FIGURES:FAIL bulk_calculate_course_progress_data') + assert not results['average_progress'] + @pytest.mark.django_db class TestCourseDailyMetricsLoader(object): From ebb623152a6ec2202e38eb51bd128d356725939f Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 17 Dec 2020 11:57:58 +0100 Subject: [PATCH 2/4] Add figures.tasks exception handling cdm and enrollment data Added exception handling and logging to populate_single_cdm, the call to update_enrollment_data in 'populate_daily_metrics' and exception handling for each site so that one site does not fail the remaining sites in the chain * Updated tests to handle when update_enrollment_data raises an exception * Updated tests to handle when a site level exception is raised --- figures/tasks.py | 82 +++++++++++++++++++++++++++------------------ tests/test_tasks.py | 63 +++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 34 deletions(-) diff --git a/figures/tasks.py b/figures/tasks.py index 335029ff..21e5370a 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -13,6 +13,7 @@ from celery.app import shared_task from celery.utils.log import get_task_logger +# TODO: import CourseOverview from figures.compat from openedx.core.djangoapps.content.course_overviews.models import CourseOverview # noqa pylint: disable=import-error from student.models import CourseEnrollment # pylint: disable=import-error @@ -43,6 +44,8 @@ @shared_task def populate_single_cdm(course_id, date_for=None, force_update=False): '''Populates a CourseDailyMetrics record for the given date and course + + TODO: cdm needs to handle course_id as the string ''' if date_for: date_for = as_date(date_for) @@ -93,6 +96,10 @@ def update_enrollment_data(site_id, **_kwargs): logger.error( 'figurs.tasks.update_enrollment_data: site_id={} does not exist'.format( site_id)) + except Exception: + msg = ('FIGURES:FAIL daily metrics:update_enrollment_data' + ' for site_id={}'.format(site_id)) + logger.exception(msg) @shared_task @@ -124,42 +131,51 @@ def populate_daily_metrics(date_for=None, force_update=False): sites_count = Site.objects.count() for i, site in enumerate(Site.objects.all()): - for course in figures.sites.get_courses_for_site(site): + try: + for course in figures.sites.get_courses_for_site(site): + try: + populate_single_cdm( + course_id=course.id, + date_for=date_for, + force_update=force_update) + except Exception as e: # pylint: disable=broad-except + logger.exception('figures.tasks.populate_daily_metrics failed') + # Always capture CDM load exceptions to the Figures pipeline + # error table + error_data = dict( + date_for=date_for, + msg='figures.tasks.populate_daily_metrics failed', + exception_class=e.__class__.__name__, + ) + if hasattr(e, 'message_dict'): + error_data['message_dict'] = e.message_dict # pylint: disable=no-member + log_error_to_db( + error_data=error_data, + error_type=PipelineError.COURSE_DATA, + course_id=str(course.id), + site=site, + logger=logger, + log_pipeline_errors_to_db=True, + ) + populate_site_daily_metrics( + site_id=site.id, + date_for=date_for, + force_update=force_update) + + # Until we implement signal triggers try: - populate_single_cdm( - course_id=course.id, - date_for=date_for, - force_update=force_update) - except Exception as e: # pylint: disable=broad-except - logger.exception('figures.tasks.populate_daily_metrics failed') - # Always capture CDM load exceptions to the Figures pipeline - # error table - error_data = dict( - date_for=date_for, - msg='figures.tasks.populate_daily_metrics failed', - exception_class=e.__class__.__name__, - ) - if hasattr(e, 'message_dict'): - error_data['message_dict'] = e.message_dict # pylint: disable=no-member - log_error_to_db( - error_data=error_data, - error_type=PipelineError.COURSE_DATA, - course_id=str(course.id), - site=site, - logger=logger, - log_pipeline_errors_to_db=True, - ) - populate_site_daily_metrics( - site_id=site.id, - date_for=date_for, - force_update=force_update) - - # Until we implement signal triggers - update_enrollment_data(site_id=site.id) - + update_enrollment_data(site_id=site.id) + except Exception: + msg = ('FIGURES:FAIL figures.tasks update_enrollment_data ' + ' unhandled exception. site[{}]:{}') + logger.exception(msg.format(site.id, site.domain)) + + except Exception: + msg = ('FIGURES:FAIL populate_daily_metrics unhandled site level' + ' exception for site[{}]={}') + logger.exception(msg.format(site.id, site.domain)) logger.info("figures.populate_daily_metrics: finished Site {:04d} of {:04d}".format( i, sites_count)) - logger.info('Finished task "figures.populate_daily_metrics" for date "{}"'.format( date_for)) diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 9a426b71..e96993b8 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -70,6 +70,30 @@ def mock_sdm_load(self, site, date_for, **kwargs): assert SiteDailyMetrics.objects.count() == 1 +@pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, + reason='Broken test. Apparent Django 1.8 incompatibility') +def test_populate_daily_metrics_site_level_error(transactional_db, + monkeypatch, + caplog): + date_for = '2019-01-02' + error_message = dict(message=[u'expected failure']) + assert not CourseOverview.objects.count() + + def mock_get_courses_fail(site): + raise Exception(message=error_message) + + assert SiteDailyMetrics.objects.count() == 0 + assert CourseDailyMetrics.objects.count() == 0 + monkeypatch.setattr( + figures.sites, 'get_courses_for_site', mock_get_courses_fail) + + figures.tasks.populate_daily_metrics(date_for=date_for) + + last_log = caplog.records[-1] + assert last_log.message.startswith( + 'FIGURES:FAIL populate_daily_metrics unhandled site level exception for site') + + @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Broken test. Apparent Django 1.8 incompatibility') def test_populate_daily_metrics_error(transactional_db, monkeypatch): @@ -99,6 +123,42 @@ def mock_pop_single_cdm_fails(**kwargs): assert error_data['message_dict']['message'] == error_message['message'] +@pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, + reason='Broken test. Apparent Django 1.8 incompatibility') +def test_populate_daily_metrics_enrollment_data_error(transactional_db, + monkeypatch, + caplog): + date_for = '2019-01-02' + error_message = dict(message=[u'expected failure']) + assert not CourseOverview.objects.count() + + def mock_get_courses(site): + CourseOverviewFactory() + return CourseOverview.objects.all() + + def mock_pop_single_cdm(**kwargs): + pass + + def mock_update_enrollment_data_fails(**kwargs): + # TODO: test with different exceptions + # At least one with and without `message_dict` + raise Exception(message=error_message) + + assert SiteDailyMetrics.objects.count() == 0 + assert CourseDailyMetrics.objects.count() == 0 + monkeypatch.setattr( + figures.sites, 'get_courses_for_site', mock_get_courses) + monkeypatch.setattr( + figures.tasks, 'populate_single_cdm', mock_pop_single_cdm) + monkeypatch.setattr( + figures.tasks, 'update_enrollment_data', mock_update_enrollment_data_fails) + + figures.tasks.populate_daily_metrics(date_for=date_for) + last_log = caplog.records[-1] + assert last_log.message.startswith( + 'FIGURES:FAIL figures.tasks update_enrollment_data') + + @pytest.mark.skipif(OPENEDX_RELEASE == GINKGO, reason='Broken test. Apparent Django 1.8 incompatibility') def test_populate_daily_metrics_multisite(transactional_db, monkeypatch): @@ -195,6 +255,7 @@ def test_populate_monthly_metrics_for_site(transactional_db, monkeypatch): """ expected_site = SiteFactory() sites_visited = [] + def mock_fill_last_smm_month(site): assert site == expected_site sites_visited.append(site) @@ -222,4 +283,4 @@ def mock_populate_monthly_metrics_for_site(site_id): mock_populate_monthly_metrics_for_site) figures.tasks.run_figures_monthly_metrics() - assert set(sites_visited) == set([expected_site.id]) \ No newline at end of file + assert set(sites_visited) == set([expected_site.id]) From 4e12e9d651d08056a5853cb27be1bb6ee2a22509 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 17 Dec 2020 12:29:12 +0100 Subject: [PATCH 3/4] backfill_enrollment_data_for_site - Fixed Typo in error handling This function is running into 'CourseNotFound' exceptions. Because of a poorly place paranthesis in the logger message, the function then threw a "TypeError: str() takes at most 1 argument (2 given)" which THEN caused the whole daily metrics pipeline to fail because I forgot to wrap the new code in exception handling --- figures/backfill.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/figures/backfill.py b/figures/backfill.py index 438ad0b5..645fcab3 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -67,7 +67,7 @@ def backfill_enrollment_data_for_site(site): enrollment_data.append((obj, created)) except CourseNotFound: errors.append('CourseNotFound for course "{}". ' - ' CourseEnrollment ID='.format(str(rec.course_id, - rec.id))) + ' CourseEnrollment ID='.format(str(rec.course_id), + rec.id)) return dict(results=enrollment_data, errors=errors) From 73f07b4f3662df44de121f75e3ee035b9ce88dd2 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Thu, 17 Dec 2020 13:57:03 +0100 Subject: [PATCH 4/4] Bump Figures 0.3.dev6, Fix error message string, pylint errors --- figures/backfill.py | 7 ++++--- figures/pipeline/course_daily_metrics.py | 2 +- figures/tasks.py | 6 +++--- setup.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/figures/backfill.py b/figures/backfill.py index 645fcab3..0a41e0b9 100644 --- a/figures/backfill.py +++ b/figures/backfill.py @@ -66,8 +66,9 @@ def backfill_enrollment_data_for_site(site): course_id=rec.course_id) enrollment_data.append((obj, created)) except CourseNotFound: - errors.append('CourseNotFound for course "{}". ' - ' CourseEnrollment ID='.format(str(rec.course_id), - rec.id)) + msg = ('CourseNotFound for course "{course}". ' + ' CourseEnrollment ID={ce_id}') + errors.append(msg.format(course=str(rec.course_id), + ce_id=rec.id)) return dict(results=enrollment_data, errors=errors) diff --git a/figures/pipeline/course_daily_metrics.py b/figures/pipeline/course_daily_metrics.py index 694b7c99..4842dfe9 100644 --- a/figures/pipeline/course_daily_metrics.py +++ b/figures/pipeline/course_daily_metrics.py @@ -273,7 +273,7 @@ def extract(self, course_id, date_for=None, **_kwargs): progress_data = bulk_calculate_course_progress_data(course_id=course_id, date_for=date_for) data['average_progress'] = progress_data['average_progress'] - except Exception: + except Exception: # pylint: disable=broad-except # Broad exception for starters. Refine as we see what gets caught # Make sure we set the average_progres to None so that upstream # does not think things are normal diff --git a/figures/tasks.py b/figures/tasks.py index 21e5370a..94f05187 100644 --- a/figures/tasks.py +++ b/figures/tasks.py @@ -96,7 +96,7 @@ def update_enrollment_data(site_id, **_kwargs): logger.error( 'figurs.tasks.update_enrollment_data: site_id={} does not exist'.format( site_id)) - except Exception: + except Exception: # pylint: disable=broad-except msg = ('FIGURES:FAIL daily metrics:update_enrollment_data' ' for site_id={}'.format(site_id)) logger.exception(msg) @@ -165,12 +165,12 @@ def populate_daily_metrics(date_for=None, force_update=False): # Until we implement signal triggers try: update_enrollment_data(site_id=site.id) - except Exception: + except Exception: # pylint: disable=broad-except msg = ('FIGURES:FAIL figures.tasks update_enrollment_data ' ' unhandled exception. site[{}]:{}') logger.exception(msg.format(site.id, site.domain)) - except Exception: + except Exception: # pylint: disable=broad-except msg = ('FIGURES:FAIL populate_daily_metrics unhandled site level' ' exception for site[{}]={}') logger.exception(msg.format(site.id, site.domain)) diff --git a/setup.py b/setup.py index 5d9bc8f6..6b620dc3 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setup( name='Figures', - version='0.4.dev5', + version='0.4.dev6', packages=find_packages(), include_package_data=True, license='MIT',