From 259d8f1e8a5e2cad3f6ed5a9faef235cef87a806 Mon Sep 17 00:00:00 2001 From: Dan Mihaila Date: Thu, 31 Oct 2024 13:05:43 +0200 Subject: [PATCH] HDX-10191 org stats download as xlsx --- .../helpers/organization_helper.py | 46 +++- .../ckanext/hdx_org_group/plugin.py | 2 +- .../hdx_org_group/views/organization.py | 74 +++++-- .../templates/organization/stats.html | 204 ++++++++++-------- .../ckanext/hdx_theme/util/jql.py | 92 ++++++-- .../ckanext/hdx_theme/util/jql_queries.py | 41 ++++ requirements.in | 1 + requirements.txt | 1 + 8 files changed, 328 insertions(+), 133 deletions(-) diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py index bf4c78a64c..7b5f59e306 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py @@ -8,13 +8,14 @@ import logging import os import six - +import openpyxl import ckanext.hdx_search.cli.click_feature_search_command as lunr import ckanext.hdx_theme.helpers.helpers as h import ckanext.hdx_users.helpers.mailer as hdx_mailer from sqlalchemy import func import ckanext.hdx_org_group.helpers.static_lists as static_lists - +from flask import make_response +from tempfile import NamedTemporaryFile import ckan.lib.dictization as dictization import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.dictization.model_save as model_save @@ -30,6 +31,7 @@ from ckan.common import _, c, config import ckan.plugins.toolkit as toolkit import ckan.lib.base as base +import ckanext.hdx_theme.util.jql as jql BUCKET = str(uploader.get_storage_path()) + '/storage/uploads/group/' abort = base.abort @@ -799,3 +801,43 @@ def org_add_last_updated_field(displayed_orgs): def hdx_organization_type_get_value(org_type_key): return next((org_type[0] for org_type in static_lists.ORGANIZATION_TYPE_LIST if org_type[1] == org_type_key), org_type_key) + +def hdx_generate_organization_stats(org_dict): + # Define variable to load the dataframe + wb = openpyxl.Workbook() + + # Create SheetOne with Data + sheetOne = wb.create_sheet("Downloads and Page Views") + + result = jql.pageviews_downloads_per_organization_last_5_years(org_dict.get('id')) + data = [('Date', 'Page View - Unique', 'Page Views - Total', 'Resource Download - Unique', 'Resource Download - Total')] + for key, value in result.items(): + data.append((key, value.get('pageviews_unique'), value.get('pageviews_total'), value.get('downloads_unique'), value.get('downloads_total'))) + for item in data: + sheetOne.append(item) + + # Create SheetTwo with Data + sheetTwo = wb.create_sheet("README") + + data = [('Overview', 'This spreadsheet contains the number of downloads of files and page views of datasets of the organization, tracked monthly over the past 4 years.'), + ('Data Source', 'The data has been sourced from the analytics platform Mixpanel.'), + ('Contents', 'The spreadsheet includes the following information: 1. Page Views: Total page views by month. 2. Downloads: Total number of downloads by month.'), + ('Caveats', 'To ensure accurate data representation, we have excluded as much bot traffic as possible.'), + ('Update Frequency', 'The spreadsheet is refreshed automatically on the first day of each month.'), + ('Contact', 'For additional inquiries, please contact us at hdx@un.org')] + + for item in data: + sheetTwo.append(item) + + # Remove default Sheet + wb.remove(wb['Sheet']) + + # Iterate the loop to read the cell values + with NamedTemporaryFile() as tmp: + wb.save(tmp) + tmp.seek(0) + output = make_response(tmp.read()) + output.headers['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + output.headers['Content-Disposition'] = f'attachment; filename="{org_dict.get("name")}_stats.xlsx"' + + return output diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py index 7f83cd6c11..97d4a224b6 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py @@ -92,7 +92,7 @@ def group_types(self): # IGroupForm def setup_template_variables(self, context, data_dict): - org.new_org_template_variables(context, data_dict) + org.new_org_template_variables(data_dict) # IValidators def get_validators(self): diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py index 01f7c613c7..2f22dd6127 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py @@ -2,19 +2,19 @@ from flask import Blueprint from six.moves.urllib.parse import urlencode - +from ckan.types import Context +import ckan.lib.plugins as lib_plugins import ckan.model as model import ckan.plugins.toolkit as tk -import ckan.lib.plugins as lib_plugins - import ckanext.hdx_org_group.helpers.org_meta_dao as org_meta_dao import ckanext.hdx_org_group.helpers.organization_helper as helper import ckanext.hdx_org_group.helpers.static_lists as static_lists import ckanext.hdx_theme.helpers.helpers as hdx_helpers - -from ckan.views.group import _get_group_template, CreateGroupView, EditGroupView +from ckan.views.group import CreateGroupView, EditGroupView, _get_group_template from ckanext.hdx_org_group.controller_logic.organization_read_logic import OrgReadLogic -from ckanext.hdx_org_group.controller_logic.organization_stats_logic import OrganizationStatsLogic +from ckanext.hdx_org_group.controller_logic.organization_stats_logic import ( + OrganizationStatsLogic, +) from ckanext.hdx_org_group.views.light_organization import _index from ckanext.hdx_theme.util.light_redirect import check_redirect_needed from ckanext.hdx_theme.util.mail import NoRecipientException @@ -46,7 +46,7 @@ def index(): @check_redirect_needed def read(id): - context = { + context: Context = { 'model': model, 'session': model.Session, 'for_view': True, @@ -82,19 +82,19 @@ def read(id): } template_file = _get_group_template('read_template', 'organization') return render(template_file, template_data) - except NotFound as e: + except NotFound: abort(404, _('Page not found')) - except NotAuthorized as e: + except NotAuthorized: abort(403, _('Not authorized to see this page')) def _generate_template_data_for_custom_org(org_read_logic): - ''' + """ :param org_read_logic: :type org_read_logic: OrgReadLogic :returns: the template data dict :rtype: dict - ''' + """ org_meta = org_read_logic.org_meta org_dict = org_meta.org_dict org_id = org_dict['id'] @@ -147,7 +147,7 @@ def _generate_template_data_for_custom_org(org_read_logic): }, - # This is hear for compatibility with the custom_org_header.html template, which is still + # This is here for compatibility with the custom_org_header.html template, which is still # used from pylon controllers 'org_meta': { 'id': org_dict['name'], @@ -177,7 +177,7 @@ def _generate_template_data_for_custom_org(org_read_logic): def request_new(): - context = {'model': model, 'session': model.Session, 'user': g.user} + context: Context = {'model': model, 'session': model.Session, 'user': g.user} try: check_access('hdx_send_new_org_request', context) except NotAuthorized: @@ -231,7 +231,7 @@ def _process_new_org_request(): if hdx_org_type_code: hdx_org_type = next( - (type[0] for type in static_lists.ORGANIZATION_TYPE_LIST if type[1] == hdx_org_type_code), '-1') + (_type[0] for _type in static_lists.ORGANIZATION_TYPE_LIST if _type[1] == hdx_org_type_code), '-1') data = { 'name': request.form.get('name', ''), @@ -271,7 +271,7 @@ def _transform_dict_for_mailing(data_dict): return data_dict_for_mailing -def new_org_template_variables(context, data_dict): +def new_org_template_variables(data_dict): data_dict['hdx_org_type_list'] = [{'value': '-1', 'text': _('-- Please select --')}] + \ [{'value': t[1], 'text': _(t[0])} for t in static_lists.ORGANIZATION_TYPE_LIST] @@ -296,7 +296,7 @@ def stats(id): def restore(id): - context = { + context: Context = { 'model': model, 'session': model.Session, 'user': g.user, 'for_edit': True, @@ -304,7 +304,7 @@ def restore(id): try: check_access('organization_patch', context, {'id': id}) - except NotAuthorized as e: + except NotAuthorized: return abort(403, _('Unauthorized to restore this organization')) try: @@ -329,7 +329,7 @@ def activity(id): def activity_offset(id, offset=0): - ''' + """ Modified core functionality to use the new OrgMetaDao class for fetching information needed on all org-related pages. @@ -340,7 +340,7 @@ def activity_offset(id, offset=0): :param offset: :type offset: int :return: - ''' + """ org_meta = org_meta_dao.OrgMetaDao(id, g.user, g.userobj) org_meta.fetch_all() org_dict = org_meta.org_dict @@ -350,26 +350,53 @@ def activity_offset(id, offset=0): # Add the group's activity stream (already rendered to HTML) to the # template context for the group/read.html template to retrieve later. - context = {'model': model, 'session': model.Session, + context: Context = {'model': model, 'session': model.Session, 'user': g.user, 'for_view': True} group_activity_stream = get_action('organization_activity_list')( context, {'id': org_dict['id'], 'offset': offset}) - - extra_vars = { 'org_dict': org_dict, 'org_meta': org_meta, 'group_activity_stream': group_activity_stream, } - template = None if org_meta.is_custom: template = 'organization/custom_activity_stream.html' else: template = lib_plugins.lookup_group_plugin('organization').activity_template() return render(template, extra_vars) +def organization_stats(id): + """ + Handles downloading .xlsx organization stats + + :returns: xlsx + """ + + context: Context = { + 'model': model, + 'session': model.Session, + 'user': g.user or g.author, + 'auth_user_obj': g.userobj + } + + try: + check_access('organization_update', context, {'id': id}) + except NotAuthorized: + return abort(403, _('Unauthorized to restore this organization')) + + # check if organization exists + try: + org_dict = get_action('organization_show')(context, {'id': id}) + output = helper.hdx_generate_organization_stats(org_dict) + return output + + except NotFound: + return abort(404, _('Organization not found')) + except NotAuthorized: + return abort(404, _('Organization not found')) + hdx_org.add_url_rule(u'/', view_func=index, strict_slashes=False) hdx_org.add_url_rule( @@ -387,3 +414,4 @@ def activity_offset(id, offset=0): hdx_org.add_url_rule(u'/restore/', view_func=restore, methods=[u'POST']) hdx_org.add_url_rule(u'/activity/', view_func=activity) hdx_org.add_url_rule(u'/activity//', view_func=activity_offset, defaults={'offset': 0}) +hdx_org.add_url_rule(u'//download_stats', view_func=organization_stats) diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html b/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html index 1dcd4804ba..75439e095e 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html +++ b/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html @@ -3,124 +3,140 @@ {% block subtitle %}{{ _('Stats') }} - {{ org_meta.org_dict.title or org_meta.name }}{% endblock %} {% block primary_content_inner %} - - - - + + + + -

{% block page_heading %}{{ _('Stats') }}{% endblock %}

-
-
-
- Number of Downloads (last 24 weeks) - -
?
-
-
+

{% block page_heading %}{{ _('Stats') }}{% endblock %}

+
+
+ {% set margin_top="" %} + {% if h.check_access('organization_update', {'id': org_meta.org_dict.id}) %} +
+

Export more detailed usage statistics [?]: XLS

-
-
-
-
-
-
-
- Total number of downloads for this organisation's content -
-
-
- + {% set margin_top="mt0" %} + {% endif %} +
+ Number of Downloads (last 24 weeks) + +
?
+
+
+
+
+
+
+
+
+
+
+ Total number of downloads for this organisation's content +
+
+
+ {{ data.stats_total_downloads.formatted_value }} - {{ data.stats_total_downloads.units }} -
-
-
+ {{ data.stats_total_downloads.units }}
+
+
+
-
-
-
-
-
- Downloads and Total Page Views (last 24 weeks) - -
?
-
-
-
-
-
-
-
+
+
+
+
+
+ Downloads and Total Page Views (last 24 weeks) + +
?
+
+
-
-
- Users (last 30 days) - -
?
-
-
-
-
-
- Unique visitors who downloaded at least one dataset from this org -
-
-
- +
+
+
+
+
+
+
+ Users (last 30 days) + +
?
+
+
+
+
+
+ Unique visitors who downloaded at least one dataset from this org +
+
+
+ {{ data.stats_downloaders.formatted_value }} - {{ data.stats_downloaders.units }} users -
-
-
+ {{ data.stats_downloaders.units }} users
+
+
+
-
-
-
- Unique visitors to this organisation's content -
-
-
- +
+
+
+ Unique visitors to this organisation's content +
+
+
+ {{ data.stats_viewers.formatted_value }} - {{ data.stats_viewers.units }} users -
-
-
+ {{ data.stats_viewers.units }} users
+
+
-
-
-
-
+
+
+
+
+
{% endblock %} {% block primary_content_module_margin %}{% endblock %} {% block styles %} - {{ super() }} -{# {% resource 'hdx_theme/dashboard.css' %}#} -{# {% resource 'hdx_theme/hdx_dashboard.js' %}#} -{# {% resource 'hdx_theme/crisis/topline.css' %}#} -{# {% resource 'hdx_theme/organization/stats.js' %}#} -{# {% resource 'hdx_theme/organization/stats.css' %}#} -{# {% resource 'hdx_theme/d3' %}#} -{# {% resource 'hdx_theme/c3' %}#} -{# {% resource 'hdx_theme/datasets/stats-chart.js' %}#} + {{ super() }} + {# {% resource 'hdx_theme/dashboard.css' %}#} + {# {% resource 'hdx_theme/hdx_dashboard.js' %}#} + {# {% resource 'hdx_theme/crisis/topline.css' %}#} + {# {% resource 'hdx_theme/organization/stats.js' %}#} + {# {% resource 'hdx_theme/organization/stats.css' %}#} + {# {% resource 'hdx_theme/d3' %}#} + {# {% resource 'hdx_theme/c3' %}#} + {# {% resource 'hdx_theme/datasets/stats-chart.js' %}#} {% asset 'hdx_theme/base-dashboard-styles' %} {% asset 'hdx_theme/charting-scripts' %} diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py index f6268d9bf2..02a74a04da 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py +++ b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py @@ -2,7 +2,7 @@ import logging from dogpile.cache import make_region -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from collections import OrderedDict from functools import wraps @@ -53,12 +53,12 @@ def _compile_query(self): } def _run_query(self, transformer): - ''' + """ :param transformer: transforms the request result :type transformer: MappingResultTransformer :return: a dict mapping the key to the values :rtype: dict - ''' + """ nose_test = True if config.get('ckan.site_id') == 'test.ckan.net' else False if nose_test: return {} @@ -75,12 +75,12 @@ def __init__(self, query, hours_since_now): @staticmethod def _compute_period(hours_since_now): - ''' + """ :param hours_since_now: for how many hours back should the mixpanel call be made :type hours_since_now: int :return: a list with 2 iso date strings representing the beginning and ending of the period :rtype: list[str] - ''' + """ until_date_str = datetime.utcnow().isoformat()[:10] from_date_str = (datetime.utcnow() - timedelta(hours=hours_since_now)).isoformat()[ @@ -91,27 +91,27 @@ def _compute_period(hours_since_now): class JqlQueryExecutorForWeeksSinceNow(JqlQueryExecutor): def __init__(self, query, weeks_since, since_date): - ''' + """ :param query: :type query: str :param weeks_since: :type weeks_since: int :param since_date: :type since_date: datetime - ''' + """ super(JqlQueryExecutorForWeeksSinceNow, self).__init__(query) self.args += self._compute_period(weeks_since, since_date) @staticmethod def _compute_period(weeks_since, since_date): - ''' + """ :param weeks_since_now: for how many weeks back should the mixpanel call be made ( a week starts monday ) :type weeks_since_now: int :param since_date: :type since_date: datetime :return: a list with 2 iso date strings representing the beginning and ending of the period :rtype: list[str] - ''' + """ until_date = since_date until_date_str = until_date.isoformat()[:10] @@ -120,10 +120,39 @@ def _compute_period(weeks_since, since_date): return [from_date_str, until_date_str] +class JqlQueryExecutorForLast5Years(JqlQueryExecutor): + def __init__(self, query, org_id): + """ + :param query: + :type query: str + """ + super(JqlQueryExecutorForLast5Years, self).__init__(query) + self.args += self._compute_period() + self.args += [org_id] + + @staticmethod + def _compute_period(): + """ + :return: a list with 2 iso date strings representing the beginning and ending of the period, + since 5 years ago on January 1st until last day of previous month + :rtype: list[str] + """ + today = datetime.now(timezone.utc) + + # Calculate the date 5 years ago on January 1st + from_date = today.replace(year=today.year - 5, month=1, day=1) + from_date_str = from_date.isoformat()[:10] + + # last day of previous month + until_date = today.replace(day=1) - timedelta(days=1) + until_date_str = until_date.isoformat()[:10] + + return [from_date_str, until_date_str] + class JqlQueryExecutorForWeeksSinceNowWithGroupFiltering(JqlQueryExecutorForWeeksSinceNow): def __init__(self, query, weeks_since, since_date, group): - ''' + """ :param query: :type query: str :param weeks_since: @@ -132,7 +161,7 @@ def __init__(self, query, weeks_since, since_date, group): :type since_date: datetime :param group: :type group: MixpanelDatasetGroups - ''' + """ super(JqlQueryExecutorForWeeksSinceNowWithGroupFiltering, self).__init__(query, weeks_since, since_date) self.args.append(group) @@ -142,16 +171,45 @@ def __init__(self, key_name): self.key_name = key_name def transform(self, response): - ''' + """ :param response: the HTTP response :type response: requests.Response :return: :rtype: dict - ''' + """ return {item.get(self.key_name): item.get('value') for item in response.json()} +class MappingCustomResultTransformer(object): + # def __init__(self, key_name): + # self.key_name = key_name + + def __init__(self): + # self.key_name = key_name + pass + + def transform(self, response): + """ + + :param response: the HTTP response + :type response: requests.Response + :return: + :rtype: dict + """ + # return {item.get(self.key_name): item.get('value') for item in response.json()} + result = OrderedDict() + for item in response.json(): + if item.get('date') not in result: + result[item.get('date')] = OrderedDict() + if item.get('event_name') == 'page view': + result[item.get('date')]['pageviews_unique'] = item.get('unique_count') + result[item.get('date')]['pageviews_total'] = item.get('total_count') + if item.get('event_name') == 'resource download': + result[item.get('date')]['downloads_unique'] = item.get('unique_count') + result[item.get('date')]['downloads_total'] = item.get('total_count') + return dict(sorted(result.items())) + class MultipleValueMappingResultTransformer(MappingResultTransformer): def __init__(self, key_name, secondary_key_name): super(MultipleValueMappingResultTransformer, self).__init__(key_name) @@ -372,3 +430,11 @@ def _generate_mandatory_dates(since, weeks): mandatory_dates.insert(0, since - timedelta(weeks=i, days=since.weekday())) mandatory_values = list(map(lambda x: x.isoformat()[:10], mandatory_dates)) return mandatory_values + +def pageviews_downloads_per_organization_last_5_years(org_id): + query_executor = JqlQueryExecutorForLast5Years(jql_queries.PAGEVIEWS_AND_DOWNLOADS_PER_ORGANIZATION, org_id = org_id) + + result = query_executor.run_query(MappingCustomResultTransformer()) + # MultipleValueMappingResultTransformer('org_id', 'dataset_id')) + + return result diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py index 41f54257f4..d43c015b52 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py +++ b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py @@ -283,3 +283,44 @@ }}); }} ''' + + +PAGEVIEWS_AND_DOWNLOADS_PER_ORGANIZATION = ''' +/* 9. pageviews and downloads by organization and unique pageviews and downloads by organization +VER 1.1 + +Used for stats download by org admins +*/ + +''' + COMMON_HEADER + \ +''' +function main() {{ + return Events({{ + from_date: "{}", + to_date: "{}", + event_selectors: [ + {{event: "resource download"}}, + {{event: "page view"}} + ] + }}) + .filter(event => event.properties["org name"] == "{}") + .groupByUser(["name", mixpanel.numeric_bucket('time', mixpanel.monthly_time_buckets)], mixpanel.reducer.count()) // unique + .map(function(r){{ + return {{ + user_id: r.key[0], + event_name: r.key[1], + date: new Date(r.key[2]).toISOString().substring(0,10), + count: r.value, + }}; + }}) + .groupBy(["event_name", "date"], [mixpanel.reducer.count(), mixpanel.reducer.sum("count")]) + .map(function(r){{ + return {{ + event_name: r.key[0], + date: new Date(r.key[1]).toISOString().substring(0,10), + unique_count: r.value[0], + total_count: r.value[1], + }}; + }}) +}} +''' diff --git a/requirements.in b/requirements.in index 411b0d4538..5facd59bf8 100644 --- a/requirements.in +++ b/requirements.in @@ -88,3 +88,4 @@ pyotp~=2.6.0 # END ckanext-security python-json-logger~=2.0.7 +openpyxl==3.1.5 diff --git a/requirements.txt b/requirements.txt index 6a4be263a7..c03842f16c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -228,6 +228,7 @@ wtforms==3.1.1 # via flask-wtf zope-interface==5.4.0 # via -r requirements.in +openpyxl==3.1.5 # The following packages are considered to be unsafe in a requirements file: # setuptools