HDX-10191 org stats download as xlsx - formatting output

OCHA-DAP · Nov 5, 2024 · 29fa1d0 · 29fa1d0
1 parent 259d8f1
commit 29fa1d0
Showing 1 changed file with 46 additions and 26 deletions.
diff --git a/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py b/ckanext-hdx_org_group/ckanext/hdx_org_group/helpers/organization_helper.py
@@ -1,16 +1,15 @@
-'''
+"""
 Created on Jan 14, 2015
 
 @author: alexandru-m-g
-'''
+"""
 
 import json
 import logging
 import os
 import six
 import openpyxl
 import ckanext.hdx_search.cli.click_feature_search_command as lunr
-import ckanext.hdx_theme.helpers.helpers as h
 import ckanext.hdx_users.helpers.mailer as hdx_mailer
 from sqlalchemy import func
 import ckanext.hdx_org_group.helpers.static_lists as static_lists
@@ -32,6 +31,7 @@
 import ckan.plugins.toolkit as toolkit
 import ckan.lib.base as base
 import ckanext.hdx_theme.util.jql as jql
+from openpyxl.styles import Alignment, Font
 
 BUCKET = str(uploader.get_storage_path()) + '/storage/uploads/group/'
 abort = base.abort
@@ -48,7 +48,7 @@
 
 
 def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datasets=False):
-    '''
+    """
     :param results: list of organizations to filter/sort
     :type results: list[dict]
     :param sort_by:
@@ -59,7 +59,7 @@ def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datas
     :type has_datasets: bool
     :return: sorted/filtered list
     :rtype: list[dict]
-    '''
+    """
 
     filtered_results = results
     if q:
@@ -350,9 +350,9 @@ def hdx_organization_delete(context, data_dict):
 
 
 def _run_core_group_org_action(context, data_dict, core_action):
-    '''
+    """
     Runs core ckan action with lunr update
-    '''
+    """
     test = True if config.get('ckan.site_id') == 'test.ckan.net' else False
     result = core_action(context, data_dict)
     if not test:
@@ -368,7 +368,7 @@ def hdx_group_or_org_update(context, data_dict, is_org=False):
     id = _get_or_bust(data_dict, 'id')
 
     group = model.Group.get(id)
-    context["group"] = group
+    context['group'] = group
     if group is None:
         raise NotFound('Group was not found.')
 
@@ -606,7 +606,7 @@ def hdx_group_or_org_create(context, data_dict, is_org=False):
         # to ensure they still work
         try:
             group_plugin.check_data_dict(data_dict, schema)
-        except TypeError as e:
+        except TypeError:
             group_plugin.check_data_dict(data_dict)
 
     data, errors = lib_plugins.plugin_validate(
@@ -668,8 +668,8 @@ def hdx_group_or_org_create(context, data_dict, is_org=False):
 
     if not context.get('defer_commit'):
         model.repo.commit()
-    context["group"] = group
-    context["id"] = group.id
+    context['group'] = group
+    context['id'] = group.id
 
     # creator of group/org becomes an admin
     # this needs to be after the repo.commit or else revisions break
@@ -720,9 +720,9 @@ def notify_admins(data_dict):
             # for admin in data_dict.get('admins'):
             hdx_mailer.mail_recipient(data_dict.get('admins'), data_dict.get('subject'), data_dict.get('message'))
     except Exception as e:
-        log.error("Email server error: can not send email to admin users" + e.message)
+        log.error('Email server error: can not send email to admin users' + e.message)
         return False
-    log.info("admin users where notified by email")
+    log.info('admin users where notified by email')
     return True
 
 
@@ -777,11 +777,11 @@ def _find_last_update_for_orgs(org_names):
             'model': model,
             'session': model.Session
         }
-        filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names))
+        fq_filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names))
 
         data_dict = {
             'q': '',
-            'fq': filter,
+            'fq': fq_filter,
             'fq_list': ['{!collapse field=organization nullPolicy=expand sort="metadata_modified desc"} '],
             'rows': len(org_names),
             'start': 0,
@@ -803,34 +803,54 @@ def hdx_organization_type_get_value(org_type_key):
                 org_type_key)
 
 def hdx_generate_organization_stats(org_dict):
+
     # Define variable to load the dataframe
     wb = openpyxl.Workbook()
 
+    # Bold font style
+    bold_font = Font(bold=True)
+
     # Create SheetOne with Data
-    sheetOne = wb.create_sheet("Downloads and Page Views")
+    sheet_one = wb.active
+    sheet_one.title = 'Downloads and Page Views'
 
     result = jql.pageviews_downloads_per_organization_last_5_years(org_dict.get('id'))
     data = [('Date', 'Page View - Unique', 'Page Views - Total', 'Resource Download - Unique', 'Resource Download - Total')]
     for key, value in result.items():
         data.append((key, value.get('pageviews_unique'), value.get('pageviews_total'), value.get('downloads_unique'), value.get('downloads_total')))
-    for item in data:
-        sheetOne.append(item)
+
+    for row_num, row_data in enumerate(data, start=1):
+        for col_num, cell_value in enumerate(row_data, start=1):
+            cell = sheet_one.cell(row=row_num, column=col_num, value=cell_value)
+            if row_num == 1:
+                cell.font = bold_font  # Apply bold to header row
+            cell.alignment = Alignment(horizontal='center', vertical='center')
+
+    # Set the width of the columns for the second sheet
+    for col_letter in ['A', 'B', 'C', 'D', 'E']:
+        sheet_one.column_dimensions[col_letter].width = 25
 
     # Create SheetTwo with Data
-    sheetTwo = wb.create_sheet("README")
+    sheet_two = wb.create_sheet(title='README')
 
     data = [('Overview', 'This spreadsheet contains the number of downloads of files and page views of datasets of the organization, tracked monthly over the past 4 years.'),
-            ('Data Source', 'The data has been sourced from the analytics platform Mixpanel.'),
-            ('Contents', 'The spreadsheet includes the following information: 1. Page Views: Total page views by month. 2. Downloads: Total number of downloads by month.'),
+            ('Data Source', 'The data has been sourced from the analytics platform Mixpanel [https://mixpanel.com/].'),
+            ('Contents', 'The spreadsheet includes the following information: \n1. Page Views: Total page views by month. \n2. Downloads: Total number of downloads by month.'),
             ('Caveats', 'To ensure accurate data representation, we have excluded as much bot traffic as possible.'),
             ('Update Frequency', 'The spreadsheet is refreshed automatically on the first day of each month.'),
             ('Contact', 'For additional inquiries, please contact us at [email protected]')]
 
-    for item in data:
-        sheetTwo.append(item)
-
-    # Remove default Sheet
-    wb.remove(wb['Sheet'])
+    # Add data to the worksheet
+    for row_num, (header, text) in enumerate(data, start=1):
+        sheet_two[f'A{row_num}'] = header
+        sheet_two[f'A{row_num}'].font = bold_font  # Apply bold to the first column
+        sheet_two[f'B{row_num}'] = text
+        sheet_two[f'A{row_num}'].alignment = Alignment(horizontal='left', vertical='top')
+        sheet_two[f'B{row_num}'].alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
+
+    # Set the width of the columns
+    sheet_two.column_dimensions['A'].width = 20
+    sheet_two.column_dimensions['B'].width = 100
 
     # Iterate the loop to read the cell values
     with NamedTemporaryFile() as tmp: