From 259d8f1e8a5e2cad3f6ed5a9faef235cef87a806 Mon Sep 17 00:00:00 2001 From: Dan Mihaila Date: Thu, 31 Oct 2024 13:05:43 +0200 Subject: [PATCH 1/2] HDX-10191 org stats download as xlsx --- .../helpers/organization_helper.py | 46 +++- .../ckanext/hdx_org_group/plugin.py | 2 +- .../hdx_org_group/views/organization.py | 74 +++++-- .../templates/organization/stats.html | 204 ++++++++++-------- .../ckanext/hdx_theme/util/jql.py | 92 ++++++-- .../ckanext/hdx_theme/util/jql_queries.py | 41 ++++ requirements.in | 1 + requirements.txt | 1 + 8 files changed, 328 insertions(+), 133 deletions(-) diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py index bf4c78a64c..7b5f59e306 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py @@ -8,13 +8,14 @@ import logging import os import six - +import openpyxl import ckanext.hdx_search.cli.click_feature_search_command as lunr import ckanext.hdx_theme.helpers.helpers as h import ckanext.hdx_users.helpers.mailer as hdx_mailer from sqlalchemy import func import ckanext.hdx_org_group.helpers.static_lists as static_lists - +from flask import make_response +from tempfile import NamedTemporaryFile import ckan.lib.dictization as dictization import ckan.lib.dictization.model_dictize as model_dictize import ckan.lib.dictization.model_save as model_save @@ -30,6 +31,7 @@ from ckan.common import _, c, config import ckan.plugins.toolkit as toolkit import ckan.lib.base as base +import ckanext.hdx_theme.util.jql as jql BUCKET = str(uploader.get_storage_path()) + '/storage/uploads/group/' abort = base.abort @@ -799,3 +801,43 @@ def org_add_last_updated_field(displayed_orgs): def hdx_organization_type_get_value(org_type_key): return next((org_type[0] for org_type in static_lists.ORGANIZATION_TYPE_LIST if org_type[1] == org_type_key), org_type_key) + +def hdx_generate_organization_stats(org_dict): + # Define variable to load the dataframe + wb = openpyxl.Workbook() + + # Create SheetOne with Data + sheetOne = wb.create_sheet("Downloads and Page Views") + + result = jql.pageviews_downloads_per_organization_last_5_years(org_dict.get('id')) + data = [('Date', 'Page View - Unique', 'Page Views - Total', 'Resource Download - Unique', 'Resource Download - Total')] + for key, value in result.items(): + data.append((key, value.get('pageviews_unique'), value.get('pageviews_total'), value.get('downloads_unique'), value.get('downloads_total'))) + for item in data: + sheetOne.append(item) + + # Create SheetTwo with Data + sheetTwo = wb.create_sheet("README") + + data = [('Overview', 'This spreadsheet contains the number of downloads of files and page views of datasets of the organization, tracked monthly over the past 4 years.'), + ('Data Source', 'The data has been sourced from the analytics platform Mixpanel.'), + ('Contents', 'The spreadsheet includes the following information: 1. Page Views: Total page views by month. 2. Downloads: Total number of downloads by month.'), + ('Caveats', 'To ensure accurate data representation, we have excluded as much bot traffic as possible.'), + ('Update Frequency', 'The spreadsheet is refreshed automatically on the first day of each month.'), + ('Contact', 'For additional inquiries, please contact us at hdx@un.org')] + + for item in data: + sheetTwo.append(item) + + # Remove default Sheet + wb.remove(wb['Sheet']) + + # Iterate the loop to read the cell values + with NamedTemporaryFile() as tmp: + wb.save(tmp) + tmp.seek(0) + output = make_response(tmp.read()) + output.headers['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + output.headers['Content-Disposition'] = f'attachment; filename="{org_dict.get("name")}_stats.xlsx"' + + return output diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py index 7f83cd6c11..97d4a224b6 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/plugin.py @@ -92,7 +92,7 @@ def group_types(self): # IGroupForm def setup_template_variables(self, context, data_dict): - org.new_org_template_variables(context, data_dict) + org.new_org_template_variables(data_dict) # IValidators def get_validators(self): diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py index 01f7c613c7..2f22dd6127 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/views/organization.py @@ -2,19 +2,19 @@ from flask import Blueprint from six.moves.urllib.parse import urlencode - +from ckan.types import Context +import ckan.lib.plugins as lib_plugins import ckan.model as model import ckan.plugins.toolkit as tk -import ckan.lib.plugins as lib_plugins - import ckanext.hdx_org_group.helpers.org_meta_dao as org_meta_dao import ckanext.hdx_org_group.helpers.organization_helper as helper import ckanext.hdx_org_group.helpers.static_lists as static_lists import ckanext.hdx_theme.helpers.helpers as hdx_helpers - -from ckan.views.group import _get_group_template, CreateGroupView, EditGroupView +from ckan.views.group import CreateGroupView, EditGroupView, _get_group_template from ckanext.hdx_org_group.controller_logic.organization_read_logic import OrgReadLogic -from ckanext.hdx_org_group.controller_logic.organization_stats_logic import OrganizationStatsLogic +from ckanext.hdx_org_group.controller_logic.organization_stats_logic import ( + OrganizationStatsLogic, +) from ckanext.hdx_org_group.views.light_organization import _index from ckanext.hdx_theme.util.light_redirect import check_redirect_needed from ckanext.hdx_theme.util.mail import NoRecipientException @@ -46,7 +46,7 @@ def index(): @check_redirect_needed def read(id): - context = { + context: Context = { 'model': model, 'session': model.Session, 'for_view': True, @@ -82,19 +82,19 @@ def read(id): } template_file = _get_group_template('read_template', 'organization') return render(template_file, template_data) - except NotFound as e: + except NotFound: abort(404, _('Page not found')) - except NotAuthorized as e: + except NotAuthorized: abort(403, _('Not authorized to see this page')) def _generate_template_data_for_custom_org(org_read_logic): - ''' + """ :param org_read_logic: :type org_read_logic: OrgReadLogic :returns: the template data dict :rtype: dict - ''' + """ org_meta = org_read_logic.org_meta org_dict = org_meta.org_dict org_id = org_dict['id'] @@ -147,7 +147,7 @@ def _generate_template_data_for_custom_org(org_read_logic): }, - # This is hear for compatibility with the custom_org_header.html template, which is still + # This is here for compatibility with the custom_org_header.html template, which is still # used from pylon controllers 'org_meta': { 'id': org_dict['name'], @@ -177,7 +177,7 @@ def _generate_template_data_for_custom_org(org_read_logic): def request_new(): - context = {'model': model, 'session': model.Session, 'user': g.user} + context: Context = {'model': model, 'session': model.Session, 'user': g.user} try: check_access('hdx_send_new_org_request', context) except NotAuthorized: @@ -231,7 +231,7 @@ def _process_new_org_request(): if hdx_org_type_code: hdx_org_type = next( - (type[0] for type in static_lists.ORGANIZATION_TYPE_LIST if type[1] == hdx_org_type_code), '-1') + (_type[0] for _type in static_lists.ORGANIZATION_TYPE_LIST if _type[1] == hdx_org_type_code), '-1') data = { 'name': request.form.get('name', ''), @@ -271,7 +271,7 @@ def _transform_dict_for_mailing(data_dict): return data_dict_for_mailing -def new_org_template_variables(context, data_dict): +def new_org_template_variables(data_dict): data_dict['hdx_org_type_list'] = [{'value': '-1', 'text': _('-- Please select --')}] + \ [{'value': t[1], 'text': _(t[0])} for t in static_lists.ORGANIZATION_TYPE_LIST] @@ -296,7 +296,7 @@ def stats(id): def restore(id): - context = { + context: Context = { 'model': model, 'session': model.Session, 'user': g.user, 'for_edit': True, @@ -304,7 +304,7 @@ def restore(id): try: check_access('organization_patch', context, {'id': id}) - except NotAuthorized as e: + except NotAuthorized: return abort(403, _('Unauthorized to restore this organization')) try: @@ -329,7 +329,7 @@ def activity(id): def activity_offset(id, offset=0): - ''' + """ Modified core functionality to use the new OrgMetaDao class for fetching information needed on all org-related pages. @@ -340,7 +340,7 @@ def activity_offset(id, offset=0): :param offset: :type offset: int :return: - ''' + """ org_meta = org_meta_dao.OrgMetaDao(id, g.user, g.userobj) org_meta.fetch_all() org_dict = org_meta.org_dict @@ -350,26 +350,53 @@ def activity_offset(id, offset=0): # Add the group's activity stream (already rendered to HTML) to the # template context for the group/read.html template to retrieve later. - context = {'model': model, 'session': model.Session, + context: Context = {'model': model, 'session': model.Session, 'user': g.user, 'for_view': True} group_activity_stream = get_action('organization_activity_list')( context, {'id': org_dict['id'], 'offset': offset}) - - extra_vars = { 'org_dict': org_dict, 'org_meta': org_meta, 'group_activity_stream': group_activity_stream, } - template = None if org_meta.is_custom: template = 'organization/custom_activity_stream.html' else: template = lib_plugins.lookup_group_plugin('organization').activity_template() return render(template, extra_vars) +def organization_stats(id): + """ + Handles downloading .xlsx organization stats + + :returns: xlsx + """ + + context: Context = { + 'model': model, + 'session': model.Session, + 'user': g.user or g.author, + 'auth_user_obj': g.userobj + } + + try: + check_access('organization_update', context, {'id': id}) + except NotAuthorized: + return abort(403, _('Unauthorized to restore this organization')) + + # check if organization exists + try: + org_dict = get_action('organization_show')(context, {'id': id}) + output = helper.hdx_generate_organization_stats(org_dict) + return output + + except NotFound: + return abort(404, _('Organization not found')) + except NotAuthorized: + return abort(404, _('Organization not found')) + hdx_org.add_url_rule(u'/', view_func=index, strict_slashes=False) hdx_org.add_url_rule( @@ -387,3 +414,4 @@ def activity_offset(id, offset=0): hdx_org.add_url_rule(u'/restore/', view_func=restore, methods=[u'POST']) hdx_org.add_url_rule(u'/activity/', view_func=activity) hdx_org.add_url_rule(u'/activity//', view_func=activity_offset, defaults={'offset': 0}) +hdx_org.add_url_rule(u'//download_stats', view_func=organization_stats) diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html b/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html index 1dcd4804ba..75439e095e 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html +++ b/ckanext-hdx_theme/ckanext/hdx_theme/templates/organization/stats.html @@ -3,124 +3,140 @@ {% block subtitle %}{{ _('Stats') }} - {{ org_meta.org_dict.title or org_meta.name }}{% endblock %} {% block primary_content_inner %} - - - - + + + + -

{% block page_heading %}{{ _('Stats') }}{% endblock %}

-
-
-
- Number of Downloads (last 24 weeks) - -
?
-
-
+

{% block page_heading %}{{ _('Stats') }}{% endblock %}

+
+
+ {% set margin_top="" %} + {% if h.check_access('organization_update', {'id': org_meta.org_dict.id}) %} +
+

Export more detailed usage statistics [?]: XLS

-
-
-
-
-
-
-
- Total number of downloads for this organisation's content -
-
-
- + {% set margin_top="mt0" %} + {% endif %} +
+ Number of Downloads (last 24 weeks) + +
?
+
+
+
+
+
+
+
+
+
+
+ Total number of downloads for this organisation's content +
+
+
+ {{ data.stats_total_downloads.formatted_value }} - {{ data.stats_total_downloads.units }} -
-
-
+ {{ data.stats_total_downloads.units }}
+
+
+
-
-
-
-
-
- Downloads and Total Page Views (last 24 weeks) - -
?
-
-
-
-
-
-
-
+
+
+
+
+
+ Downloads and Total Page Views (last 24 weeks) + +
?
+
+
-
-
- Users (last 30 days) - -
?
-
-
-
-
-
- Unique visitors who downloaded at least one dataset from this org -
-
-
- +
+
+
+
+
+
+
+ Users (last 30 days) + +
?
+
+
+
+
+
+ Unique visitors who downloaded at least one dataset from this org +
+
+
+ {{ data.stats_downloaders.formatted_value }} - {{ data.stats_downloaders.units }} users -
-
-
+ {{ data.stats_downloaders.units }} users
+
+
+
-
-
-
- Unique visitors to this organisation's content -
-
-
- +
+
+
+ Unique visitors to this organisation's content +
+
+
+ {{ data.stats_viewers.formatted_value }} - {{ data.stats_viewers.units }} users -
-
-
+ {{ data.stats_viewers.units }} users
+
+
-
-
-
-
+
+
+
+
+
{% endblock %} {% block primary_content_module_margin %}{% endblock %} {% block styles %} - {{ super() }} -{# {% resource 'hdx_theme/dashboard.css' %}#} -{# {% resource 'hdx_theme/hdx_dashboard.js' %}#} -{# {% resource 'hdx_theme/crisis/topline.css' %}#} -{# {% resource 'hdx_theme/organization/stats.js' %}#} -{# {% resource 'hdx_theme/organization/stats.css' %}#} -{# {% resource 'hdx_theme/d3' %}#} -{# {% resource 'hdx_theme/c3' %}#} -{# {% resource 'hdx_theme/datasets/stats-chart.js' %}#} + {{ super() }} + {# {% resource 'hdx_theme/dashboard.css' %}#} + {# {% resource 'hdx_theme/hdx_dashboard.js' %}#} + {# {% resource 'hdx_theme/crisis/topline.css' %}#} + {# {% resource 'hdx_theme/organization/stats.js' %}#} + {# {% resource 'hdx_theme/organization/stats.css' %}#} + {# {% resource 'hdx_theme/d3' %}#} + {# {% resource 'hdx_theme/c3' %}#} + {# {% resource 'hdx_theme/datasets/stats-chart.js' %}#} {% asset 'hdx_theme/base-dashboard-styles' %} {% asset 'hdx_theme/charting-scripts' %} diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py index f6268d9bf2..02a74a04da 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py +++ b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py @@ -2,7 +2,7 @@ import logging from dogpile.cache import make_region -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from collections import OrderedDict from functools import wraps @@ -53,12 +53,12 @@ def _compile_query(self): } def _run_query(self, transformer): - ''' + """ :param transformer: transforms the request result :type transformer: MappingResultTransformer :return: a dict mapping the key to the values :rtype: dict - ''' + """ nose_test = True if config.get('ckan.site_id') == 'test.ckan.net' else False if nose_test: return {} @@ -75,12 +75,12 @@ def __init__(self, query, hours_since_now): @staticmethod def _compute_period(hours_since_now): - ''' + """ :param hours_since_now: for how many hours back should the mixpanel call be made :type hours_since_now: int :return: a list with 2 iso date strings representing the beginning and ending of the period :rtype: list[str] - ''' + """ until_date_str = datetime.utcnow().isoformat()[:10] from_date_str = (datetime.utcnow() - timedelta(hours=hours_since_now)).isoformat()[ @@ -91,27 +91,27 @@ def _compute_period(hours_since_now): class JqlQueryExecutorForWeeksSinceNow(JqlQueryExecutor): def __init__(self, query, weeks_since, since_date): - ''' + """ :param query: :type query: str :param weeks_since: :type weeks_since: int :param since_date: :type since_date: datetime - ''' + """ super(JqlQueryExecutorForWeeksSinceNow, self).__init__(query) self.args += self._compute_period(weeks_since, since_date) @staticmethod def _compute_period(weeks_since, since_date): - ''' + """ :param weeks_since_now: for how many weeks back should the mixpanel call be made ( a week starts monday ) :type weeks_since_now: int :param since_date: :type since_date: datetime :return: a list with 2 iso date strings representing the beginning and ending of the period :rtype: list[str] - ''' + """ until_date = since_date until_date_str = until_date.isoformat()[:10] @@ -120,10 +120,39 @@ def _compute_period(weeks_since, since_date): return [from_date_str, until_date_str] +class JqlQueryExecutorForLast5Years(JqlQueryExecutor): + def __init__(self, query, org_id): + """ + :param query: + :type query: str + """ + super(JqlQueryExecutorForLast5Years, self).__init__(query) + self.args += self._compute_period() + self.args += [org_id] + + @staticmethod + def _compute_period(): + """ + :return: a list with 2 iso date strings representing the beginning and ending of the period, + since 5 years ago on January 1st until last day of previous month + :rtype: list[str] + """ + today = datetime.now(timezone.utc) + + # Calculate the date 5 years ago on January 1st + from_date = today.replace(year=today.year - 5, month=1, day=1) + from_date_str = from_date.isoformat()[:10] + + # last day of previous month + until_date = today.replace(day=1) - timedelta(days=1) + until_date_str = until_date.isoformat()[:10] + + return [from_date_str, until_date_str] + class JqlQueryExecutorForWeeksSinceNowWithGroupFiltering(JqlQueryExecutorForWeeksSinceNow): def __init__(self, query, weeks_since, since_date, group): - ''' + """ :param query: :type query: str :param weeks_since: @@ -132,7 +161,7 @@ def __init__(self, query, weeks_since, since_date, group): :type since_date: datetime :param group: :type group: MixpanelDatasetGroups - ''' + """ super(JqlQueryExecutorForWeeksSinceNowWithGroupFiltering, self).__init__(query, weeks_since, since_date) self.args.append(group) @@ -142,16 +171,45 @@ def __init__(self, key_name): self.key_name = key_name def transform(self, response): - ''' + """ :param response: the HTTP response :type response: requests.Response :return: :rtype: dict - ''' + """ return {item.get(self.key_name): item.get('value') for item in response.json()} +class MappingCustomResultTransformer(object): + # def __init__(self, key_name): + # self.key_name = key_name + + def __init__(self): + # self.key_name = key_name + pass + + def transform(self, response): + """ + + :param response: the HTTP response + :type response: requests.Response + :return: + :rtype: dict + """ + # return {item.get(self.key_name): item.get('value') for item in response.json()} + result = OrderedDict() + for item in response.json(): + if item.get('date') not in result: + result[item.get('date')] = OrderedDict() + if item.get('event_name') == 'page view': + result[item.get('date')]['pageviews_unique'] = item.get('unique_count') + result[item.get('date')]['pageviews_total'] = item.get('total_count') + if item.get('event_name') == 'resource download': + result[item.get('date')]['downloads_unique'] = item.get('unique_count') + result[item.get('date')]['downloads_total'] = item.get('total_count') + return dict(sorted(result.items())) + class MultipleValueMappingResultTransformer(MappingResultTransformer): def __init__(self, key_name, secondary_key_name): super(MultipleValueMappingResultTransformer, self).__init__(key_name) @@ -372,3 +430,11 @@ def _generate_mandatory_dates(since, weeks): mandatory_dates.insert(0, since - timedelta(weeks=i, days=since.weekday())) mandatory_values = list(map(lambda x: x.isoformat()[:10], mandatory_dates)) return mandatory_values + +def pageviews_downloads_per_organization_last_5_years(org_id): + query_executor = JqlQueryExecutorForLast5Years(jql_queries.PAGEVIEWS_AND_DOWNLOADS_PER_ORGANIZATION, org_id = org_id) + + result = query_executor.run_query(MappingCustomResultTransformer()) + # MultipleValueMappingResultTransformer('org_id', 'dataset_id')) + + return result diff --git a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py index 41f54257f4..d43c015b52 100644 --- a/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py +++ b/ckanext-hdx_theme/ckanext/hdx_theme/util/jql_queries.py @@ -283,3 +283,44 @@ }}); }} ''' + + +PAGEVIEWS_AND_DOWNLOADS_PER_ORGANIZATION = ''' +/* 9. pageviews and downloads by organization and unique pageviews and downloads by organization +VER 1.1 + +Used for stats download by org admins +*/ + +''' + COMMON_HEADER + \ +''' +function main() {{ + return Events({{ + from_date: "{}", + to_date: "{}", + event_selectors: [ + {{event: "resource download"}}, + {{event: "page view"}} + ] + }}) + .filter(event => event.properties["org name"] == "{}") + .groupByUser(["name", mixpanel.numeric_bucket('time', mixpanel.monthly_time_buckets)], mixpanel.reducer.count()) // unique + .map(function(r){{ + return {{ + user_id: r.key[0], + event_name: r.key[1], + date: new Date(r.key[2]).toISOString().substring(0,10), + count: r.value, + }}; + }}) + .groupBy(["event_name", "date"], [mixpanel.reducer.count(), mixpanel.reducer.sum("count")]) + .map(function(r){{ + return {{ + event_name: r.key[0], + date: new Date(r.key[1]).toISOString().substring(0,10), + unique_count: r.value[0], + total_count: r.value[1], + }}; + }}) +}} +''' diff --git a/requirements.in b/requirements.in index 411b0d4538..5facd59bf8 100644 --- a/requirements.in +++ b/requirements.in @@ -88,3 +88,4 @@ pyotp~=2.6.0 # END ckanext-security python-json-logger~=2.0.7 +openpyxl==3.1.5 diff --git a/requirements.txt b/requirements.txt index 6a4be263a7..c03842f16c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -228,6 +228,7 @@ wtforms==3.1.1 # via flask-wtf zope-interface==5.4.0 # via -r requirements.in +openpyxl==3.1.5 # The following packages are considered to be unsafe in a requirements file: # setuptools From 29fa1d0a6b87a8dbbdf744e9751c500b2fde50bc Mon Sep 17 00:00:00 2001 From: Dan Mihaila Date: Tue, 5 Nov 2024 15:37:28 +0200 Subject: [PATCH 2/2] HDX-10191 org stats download as xlsx - formatting output --- .../helpers/organization_helper.py | 72 ++++++++++++------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py index 7b5f59e306..38f07acf7e 100644 --- a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py +++ b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py @@ -1,8 +1,8 @@ -''' +""" Created on Jan 14, 2015 @author: alexandru-m-g -''' +""" import json import logging @@ -10,7 +10,6 @@ import six import openpyxl import ckanext.hdx_search.cli.click_feature_search_command as lunr -import ckanext.hdx_theme.helpers.helpers as h import ckanext.hdx_users.helpers.mailer as hdx_mailer from sqlalchemy import func import ckanext.hdx_org_group.helpers.static_lists as static_lists @@ -32,6 +31,7 @@ import ckan.plugins.toolkit as toolkit import ckan.lib.base as base import ckanext.hdx_theme.util.jql as jql +from openpyxl.styles import Alignment, Font BUCKET = str(uploader.get_storage_path()) + '/storage/uploads/group/' abort = base.abort @@ -48,7 +48,7 @@ def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datasets=False): - ''' + """ :param results: list of organizations to filter/sort :type results: list[dict] :param sort_by: @@ -59,7 +59,7 @@ def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datas :type has_datasets: bool :return: sorted/filtered list :rtype: list[dict] - ''' + """ filtered_results = results if q: @@ -350,9 +350,9 @@ def hdx_organization_delete(context, data_dict): def _run_core_group_org_action(context, data_dict, core_action): - ''' + """ Runs core ckan action with lunr update - ''' + """ test = True if config.get('ckan.site_id') == 'test.ckan.net' else False result = core_action(context, data_dict) if not test: @@ -368,7 +368,7 @@ def hdx_group_or_org_update(context, data_dict, is_org=False): id = _get_or_bust(data_dict, 'id') group = model.Group.get(id) - context["group"] = group + context['group'] = group if group is None: raise NotFound('Group was not found.') @@ -606,7 +606,7 @@ def hdx_group_or_org_create(context, data_dict, is_org=False): # to ensure they still work try: group_plugin.check_data_dict(data_dict, schema) - except TypeError as e: + except TypeError: group_plugin.check_data_dict(data_dict) data, errors = lib_plugins.plugin_validate( @@ -668,8 +668,8 @@ def hdx_group_or_org_create(context, data_dict, is_org=False): if not context.get('defer_commit'): model.repo.commit() - context["group"] = group - context["id"] = group.id + context['group'] = group + context['id'] = group.id # creator of group/org becomes an admin # this needs to be after the repo.commit or else revisions break @@ -720,9 +720,9 @@ def notify_admins(data_dict): # for admin in data_dict.get('admins'): hdx_mailer.mail_recipient(data_dict.get('admins'), data_dict.get('subject'), data_dict.get('message')) except Exception as e: - log.error("Email server error: can not send email to admin users" + e.message) + log.error('Email server error: can not send email to admin users' + e.message) return False - log.info("admin users where notified by email") + log.info('admin users where notified by email') return True @@ -777,11 +777,11 @@ def _find_last_update_for_orgs(org_names): 'model': model, 'session': model.Session } - filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names)) + fq_filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names)) data_dict = { 'q': '', - 'fq': filter, + 'fq': fq_filter, 'fq_list': ['{!collapse field=organization nullPolicy=expand sort="metadata_modified desc"} '], 'rows': len(org_names), 'start': 0, @@ -803,34 +803,54 @@ def hdx_organization_type_get_value(org_type_key): org_type_key) def hdx_generate_organization_stats(org_dict): + # Define variable to load the dataframe wb = openpyxl.Workbook() + # Bold font style + bold_font = Font(bold=True) + # Create SheetOne with Data - sheetOne = wb.create_sheet("Downloads and Page Views") + sheet_one = wb.active + sheet_one.title = 'Downloads and Page Views' result = jql.pageviews_downloads_per_organization_last_5_years(org_dict.get('id')) data = [('Date', 'Page View - Unique', 'Page Views - Total', 'Resource Download - Unique', 'Resource Download - Total')] for key, value in result.items(): data.append((key, value.get('pageviews_unique'), value.get('pageviews_total'), value.get('downloads_unique'), value.get('downloads_total'))) - for item in data: - sheetOne.append(item) + + for row_num, row_data in enumerate(data, start=1): + for col_num, cell_value in enumerate(row_data, start=1): + cell = sheet_one.cell(row=row_num, column=col_num, value=cell_value) + if row_num == 1: + cell.font = bold_font # Apply bold to header row + cell.alignment = Alignment(horizontal='center', vertical='center') + + # Set the width of the columns for the second sheet + for col_letter in ['A', 'B', 'C', 'D', 'E']: + sheet_one.column_dimensions[col_letter].width = 25 # Create SheetTwo with Data - sheetTwo = wb.create_sheet("README") + sheet_two = wb.create_sheet(title='README') data = [('Overview', 'This spreadsheet contains the number of downloads of files and page views of datasets of the organization, tracked monthly over the past 4 years.'), - ('Data Source', 'The data has been sourced from the analytics platform Mixpanel.'), - ('Contents', 'The spreadsheet includes the following information: 1. Page Views: Total page views by month. 2. Downloads: Total number of downloads by month.'), + ('Data Source', 'The data has been sourced from the analytics platform Mixpanel [https://mixpanel.com/].'), + ('Contents', 'The spreadsheet includes the following information: \n1. Page Views: Total page views by month. \n2. Downloads: Total number of downloads by month.'), ('Caveats', 'To ensure accurate data representation, we have excluded as much bot traffic as possible.'), ('Update Frequency', 'The spreadsheet is refreshed automatically on the first day of each month.'), ('Contact', 'For additional inquiries, please contact us at hdx@un.org')] - for item in data: - sheetTwo.append(item) - - # Remove default Sheet - wb.remove(wb['Sheet']) + # Add data to the worksheet + for row_num, (header, text) in enumerate(data, start=1): + sheet_two[f'A{row_num}'] = header + sheet_two[f'A{row_num}'].font = bold_font # Apply bold to the first column + sheet_two[f'B{row_num}'] = text + sheet_two[f'A{row_num}'].alignment = Alignment(horizontal='left', vertical='top') + sheet_two[f'B{row_num}'].alignment = Alignment(horizontal='left', vertical='top', wrap_text=True) + + # Set the width of the columns + sheet_two.column_dimensions['A'].width = 20 + sheet_two.column_dimensions['B'].width = 100 # Iterate the loop to read the cell values with NamedTemporaryFile() as tmp: