From 01e5fef11a59ac3caa1ca86778af11241b684211 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Wed, 14 Oct 2020 18:26:05 +0200 Subject: [PATCH] Reworked SiteMonthlyMetrics registered users metric * now use collected total user counts in SiteDailyMetrics.total_user_count for the registered users * This saves expensive database queries * This is more accurate because it preserves historical data. The previous approach captured live data for users for a site and users can be removed from sites * Very rudimentary test coverage added What can be improved: This does not report on users who have enrolled for the current day. We want to avoide querying the Django 'User' model for 'date_joined' because the field is not indexed Test coverage can be improved. This commit just includes very basic coverage --- figures/metrics.py | 30 +++----- tests/metrics/test_metrics.py | 40 +--------- tests/metrics/test_registered_users.py | 101 +++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 60 deletions(-) create mode 100644 tests/metrics/test_registered_users.py diff --git a/figures/metrics.py b/figures/metrics.py index bb99b2fa..7cc477b8 100644 --- a/figures/metrics.py +++ b/figures/metrics.py @@ -293,7 +293,7 @@ def get_active_users_for_time_period(site, start_date, end_date, course_ids=None **filter_args).values('student__id').distinct().count() -def get_total_site_users_for_time_period(site, start_date, end_date, **kwargs): +def get_total_site_users_for_time_period(site, start_date, end_date, **_kwargs): """ Returns the maximum number of users who joined before or on the end date @@ -304,28 +304,15 @@ def get_total_site_users_for_time_period(site, start_date, end_date, **kwargs): TODO: Consider first trying to get the data from the SiteDailyMetrics model. If there are no records, then get the data from the User model """ - def calc_from_user_model(): - filter_args = dict( - date_joined__lt=as_datetime(next_day(end_date)), - ) - users = figures.sites.get_users_for_site(site) - return users.filter(**filter_args).count() - - def calc_from_site_daily_metrics(): - filter_args = dict( - site=site, - date_for__gt=prev_day(start_date), - date_for__lt=next_day(end_date)) - qs = SiteDailyMetrics.objects.filter(**filter_args) - if qs: - return qs.aggregate(maxval=Max('total_user_count'))['maxval'] - else: - return 0 - if kwargs.get('calc_from_sdm'): - return calc_from_site_daily_metrics() + filter_args = dict(site=site, + date_for__gt=prev_day(start_date), + date_for__lt=next_day(end_date)) + qs = SiteDailyMetrics.objects.filter(**filter_args) + if qs: + return qs.aggregate(maxval=Max('total_user_count'))['maxval'] else: - return calc_from_user_model() + return 0 def get_total_site_users_joined_for_time_period(site, start_date, end_date, @@ -334,6 +321,7 @@ def get_total_site_users_joined_for_time_period(site, start_date, end_date, NOTE: Untested and not yet used in the general site metrics, but we'll want to add it TODO: Rename this function to be "new_users" for consistency with the API endpoint + TODO: When we implement this, add data to Figures model space for performance """ def calc_from_user_model(): filter_args = dict( diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py index 2a072386..495641ad 100644 --- a/tests/metrics/test_metrics.py +++ b/tests/metrics/test_metrics.py @@ -55,7 +55,6 @@ get_total_course_completions_for_time_period, get_total_enrollments_for_time_period, get_total_site_courses_for_time_period, - get_total_site_users_for_time_period, get_total_site_users_joined_for_time_period, ) @@ -308,25 +307,7 @@ def test_get_active_users_for_month(self): count = get_active_users_for_time_period(site=self.site, start_date=start_date, end_date=end_date) - assert count == len(sm_in) -1 - - def test_get_total_site_users_for_time_period(self): - ''' - TODO: add users who joined before and after the time period, and - compare the count to the users created on or before the end date - - TODO: Create - ''' - users = create_users_joined_over_time( - site=self.site, - is_multisite=figures.helpers.is_multisite(), - start_date=self.data_start_date, - end_date=self.data_end_date) - count = get_total_site_users_for_time_period( - site=self.site, - start_date=self.data_start_date, - end_date=self.data_end_date) - assert count == len(users) + assert count == len(sm_in) - 1 def test_get_total_site_users_joined_for_time_period(self): ''' @@ -366,7 +347,6 @@ def test_get_total_site_courses_for_time_period(self): assert count == self.site_daily_metrics[-1].course_count - def test_get_monthly_site_metrics(self): ''' Since we are testing results for individual getters in other test @@ -442,23 +422,6 @@ def test_get_active_users_for_time_period(self): assert count == len(student_module_sets) - def test_get_total_site_users_for_time_period(self): - ''' - TODO: add users who joined before and after the time period, and - compare the count to the users created on or before the end date - - TODO: Create - ''' - users = create_users_joined_over_time( - site=self.alpha_site, - is_multisite=figures.helpers.is_multisite(), - start_date=self.data_start_date, - end_date=self.data_end_date) - count = get_total_site_users_for_time_period(site=self.alpha_site, - start_date=self.data_start_date, - end_date=self.data_end_date) - assert count == len(users) - def test_get_total_site_users_joined_for_time_period(self): ''' TODO: add users who joined before and after the time period, and @@ -494,7 +457,6 @@ def test_get_total_site_courses_for_time_period(self): end_date=self.data_end_date) assert count == self.alpha_site_daily_metrics[-1].course_count - def test_get_monthly_site_metrics(self): ''' Since we are testing results for individual getters in other test diff --git a/tests/metrics/test_registered_users.py b/tests/metrics/test_registered_users.py new file mode 100644 index 00000000..28065ec7 --- /dev/null +++ b/tests/metrics/test_registered_users.py @@ -0,0 +1,101 @@ +"""Tests for registered user metrics + +This module tests metrics functionality in `figures.metrics` that work with +the "registered user" metric + +The registered user metric provides the count of how many users have registered +for a given site for a time range + +TODO: Provide details here +* Figures UI registered users on a site +* The pipeline, storage, API retrieval, and any derivative metrics + +# Developer Note + +* Initially, we have basic tests for coverage + +* We've just changed the metric from live User model `date_joined` queries to + using stored data in `SiteDailyMetrics.total_user_count` +* We need to deploy quickly, so doing minimal testing to start + + +""" + +from datetime import datetime +import pytest + +from figures.metrics import get_total_site_users_for_time_period + +from tests.factories import ( + SiteDailyMetricsFactory, + SiteFactory, +) + + +@pytest.fixture +@pytest.mark.django_db +def sdm_test_data(db): + """Quick-n-dirty test data construction + + Expected count is the highest 'total_user_count' + """ + dates = [ + datetime(year=2020, month=3, day=15), + datetime(year=2020, month=4, day=1), + datetime(year=2020, month=4, day=15), + datetime(year=2020, month=4, day=30), + datetime(year=2020, month=5, day=1) + ] + + our_date_range = [ + datetime(year=2020, month=4, day=1), + datetime(year=2020, month=4, day=30), + ] + my_site = SiteFactory() + other_site = SiteFactory() + + my_sdm = [ + # month before + SiteDailyMetricsFactory(site=my_site, + date_for=dates[0], + total_user_count=10), + # in our date range + SiteDailyMetricsFactory(site=my_site, + date_for=dates[1], + total_user_count=20), + SiteDailyMetricsFactory(site=my_site, + date_for=dates[2], + total_user_count=30), + SiteDailyMetricsFactory(site=my_site, + date_for=dates[3], + total_user_count=25), + # month after + SiteDailyMetricsFactory(site=my_site, + date_for=dates[4], + total_user_count=500), + ] + # Create one SDM record for the other site in the month we query + other_sdm = [ + SiteDailyMetricsFactory(site=other_site, + date_for=dates[2], + total_user_count=13)] + return dict( + our_date_range=our_date_range, + my_site=my_site, + other_site=other_site, + my_sdm=my_sdm, + other_sdm=other_sdm, + expected_count=30, + ) + + +def test_get_total_site_users_for_month(sdm_test_data): + """ + """ + my_site = sdm_test_data['my_site'] + start_date, end_date = sdm_test_data['our_date_range'] + count = get_total_site_users_for_time_period( + site=my_site, + start_date=start_date, + end_date=end_date) + assert count == sdm_test_data['expected_count']