Skip to content

Commit

Permalink
HDX-10191 org stats download as xlsx - formatting output
Browse files Browse the repository at this point in the history
  • Loading branch information
danmihaila committed Nov 5, 2024
1 parent 259d8f1 commit 29fa1d0
Showing 1 changed file with 46 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
'''
"""
Created on Jan 14, 2015
@author: alexandru-m-g
'''
"""

import json
import logging
import os
import six
import openpyxl
import ckanext.hdx_search.cli.click_feature_search_command as lunr
import ckanext.hdx_theme.helpers.helpers as h
import ckanext.hdx_users.helpers.mailer as hdx_mailer
from sqlalchemy import func
import ckanext.hdx_org_group.helpers.static_lists as static_lists
Expand All @@ -32,6 +31,7 @@
import ckan.plugins.toolkit as toolkit
import ckan.lib.base as base
import ckanext.hdx_theme.util.jql as jql
from openpyxl.styles import Alignment, Font

BUCKET = str(uploader.get_storage_path()) + '/storage/uploads/group/'
abort = base.abort
Expand All @@ -48,7 +48,7 @@


def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datasets=False):
'''
"""
:param results: list of organizations to filter/sort
:type results: list[dict]
:param sort_by:
Expand All @@ -59,7 +59,7 @@ def filter_and_sort_results_case_insensitive(results, sort_by, q=None, has_datas
:type has_datasets: bool
:return: sorted/filtered list
:rtype: list[dict]
'''
"""

filtered_results = results
if q:
Expand Down Expand Up @@ -350,9 +350,9 @@ def hdx_organization_delete(context, data_dict):


def _run_core_group_org_action(context, data_dict, core_action):
'''
"""
Runs core ckan action with lunr update
'''
"""
test = True if config.get('ckan.site_id') == 'test.ckan.net' else False
result = core_action(context, data_dict)
if not test:
Expand All @@ -368,7 +368,7 @@ def hdx_group_or_org_update(context, data_dict, is_org=False):
id = _get_or_bust(data_dict, 'id')

group = model.Group.get(id)
context["group"] = group
context['group'] = group
if group is None:
raise NotFound('Group was not found.')

Expand Down Expand Up @@ -606,7 +606,7 @@ def hdx_group_or_org_create(context, data_dict, is_org=False):
# to ensure they still work
try:
group_plugin.check_data_dict(data_dict, schema)
except TypeError as e:
except TypeError:
group_plugin.check_data_dict(data_dict)

data, errors = lib_plugins.plugin_validate(
Expand Down Expand Up @@ -668,8 +668,8 @@ def hdx_group_or_org_create(context, data_dict, is_org=False):

if not context.get('defer_commit'):
model.repo.commit()
context["group"] = group
context["id"] = group.id
context['group'] = group
context['id'] = group.id

# creator of group/org becomes an admin
# this needs to be after the repo.commit or else revisions break
Expand Down Expand Up @@ -720,9 +720,9 @@ def notify_admins(data_dict):
# for admin in data_dict.get('admins'):
hdx_mailer.mail_recipient(data_dict.get('admins'), data_dict.get('subject'), data_dict.get('message'))
except Exception as e:
log.error("Email server error: can not send email to admin users" + e.message)
log.error('Email server error: can not send email to admin users' + e.message)
return False
log.info("admin users where notified by email")
log.info('admin users where notified by email')
return True


Expand Down Expand Up @@ -777,11 +777,11 @@ def _find_last_update_for_orgs(org_names):
'model': model,
'session': model.Session
}
filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names))
fq_filter = 'organization:({}) +dataset_type:dataset'.format(' OR '.join(org_names))

data_dict = {
'q': '',
'fq': filter,
'fq': fq_filter,
'fq_list': ['{!collapse field=organization nullPolicy=expand sort="metadata_modified desc"} '],
'rows': len(org_names),
'start': 0,
Expand All @@ -803,34 +803,54 @@ def hdx_organization_type_get_value(org_type_key):
org_type_key)

def hdx_generate_organization_stats(org_dict):

# Define variable to load the dataframe
wb = openpyxl.Workbook()

# Bold font style
bold_font = Font(bold=True)

# Create SheetOne with Data
sheetOne = wb.create_sheet("Downloads and Page Views")
sheet_one = wb.active
sheet_one.title = 'Downloads and Page Views'

result = jql.pageviews_downloads_per_organization_last_5_years(org_dict.get('id'))
data = [('Date', 'Page View - Unique', 'Page Views - Total', 'Resource Download - Unique', 'Resource Download - Total')]
for key, value in result.items():
data.append((key, value.get('pageviews_unique'), value.get('pageviews_total'), value.get('downloads_unique'), value.get('downloads_total')))
for item in data:
sheetOne.append(item)

for row_num, row_data in enumerate(data, start=1):
for col_num, cell_value in enumerate(row_data, start=1):
cell = sheet_one.cell(row=row_num, column=col_num, value=cell_value)
if row_num == 1:
cell.font = bold_font # Apply bold to header row
cell.alignment = Alignment(horizontal='center', vertical='center')

# Set the width of the columns for the second sheet
for col_letter in ['A', 'B', 'C', 'D', 'E']:
sheet_one.column_dimensions[col_letter].width = 25

# Create SheetTwo with Data
sheetTwo = wb.create_sheet("README")
sheet_two = wb.create_sheet(title='README')

data = [('Overview', 'This spreadsheet contains the number of downloads of files and page views of datasets of the organization, tracked monthly over the past 4 years.'),
('Data Source', 'The data has been sourced from the analytics platform Mixpanel.'),
('Contents', 'The spreadsheet includes the following information: 1. Page Views: Total page views by month. 2. Downloads: Total number of downloads by month.'),
('Data Source', 'The data has been sourced from the analytics platform Mixpanel [https://mixpanel.com/].'),
('Contents', 'The spreadsheet includes the following information: \n1. Page Views: Total page views by month. \n2. Downloads: Total number of downloads by month.'),
('Caveats', 'To ensure accurate data representation, we have excluded as much bot traffic as possible.'),
('Update Frequency', 'The spreadsheet is refreshed automatically on the first day of each month.'),
('Contact', 'For additional inquiries, please contact us at [email protected]')]

for item in data:
sheetTwo.append(item)

# Remove default Sheet
wb.remove(wb['Sheet'])
# Add data to the worksheet
for row_num, (header, text) in enumerate(data, start=1):
sheet_two[f'A{row_num}'] = header
sheet_two[f'A{row_num}'].font = bold_font # Apply bold to the first column
sheet_two[f'B{row_num}'] = text
sheet_two[f'A{row_num}'].alignment = Alignment(horizontal='left', vertical='top')
sheet_two[f'B{row_num}'].alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)

# Set the width of the columns
sheet_two.column_dimensions['A'].width = 20
sheet_two.column_dimensions['B'].width = 100

# Iterate the loop to read the cell values
with NamedTemporaryFile() as tmp:
Expand Down

0 comments on commit 29fa1d0

Please sign in to comment.