Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ancestor information to summary and session log exports #12782

Merged
merged 6 commits into from
Nov 6, 2024
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 81 additions & 17 deletions kolibri/core/logger/csv_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import math
import os
from collections import OrderedDict
from functools import partial

from dateutil import parser
from django.core.cache import cache
Expand All @@ -27,8 +26,39 @@
"summary": "{}_{}_content_summary_logs_from_{}_to_{}.csv",
}

CACHE_TIMEOUT = 60 * 10

def cache_channel_name(obj):

def add_content_to_cache(content_id, **kwargs):
title_key = "{content_id}_ContentNode_title".format(content_id=content_id)
ancestors_key = "{content_id}_ContentNode_ancestors".format(content_id=content_id)

cache.set(title_key, kwargs.get("title", ""), CACHE_TIMEOUT)
cache.set(ancestors_key, kwargs.get("ancestors", []), CACHE_TIMEOUT)


def get_cached_content_data(content_id):
title_key = f"{content_id}_ContentNode_title"
ancestors_key = f"{content_id}_ContentNode_ancestors"

title = cache.get(title_key)
ancestors = cache.get(ancestors_key)

if title is None or ancestors is None:
node = ContentNode.objects.filter(content_id=content_id).first()
if node:
title = node.title
ancestors = node.ancestors
else:
title = ""
ancestors = []

add_content_to_cache(content_id, title=title, ancestors=ancestors)

return title, ancestors


def get_cached_channel_name(obj):
channel_id = obj["channel_id"]
key = "{id}_ChannelMetadata_name".format(id=channel_id)
channel_name = cache.get(key)
Expand All @@ -37,27 +67,24 @@ def cache_channel_name(obj):
channel_name = ChannelMetadata.objects.get(id=channel_id)
except ChannelMetadata.DoesNotExist:
channel_name = ""
cache.set(key, channel_name, 60 * 10)
cache.set(key, channel_name, CACHE_TIMEOUT)
return channel_name


def cache_content_title(obj):
def get_cached_content_title(obj):
content_id = obj["content_id"]
key = "{id}_ContentNode_title".format(id=content_id)
title = cache.get(key)
if title is None:
node = ContentNode.objects.filter(content_id=content_id).first()
if node:
title = node.title
else:
title = ""
cache.set(key, title, 60 * 10)
title, _ = get_cached_content_data(content_id)
return title


def get_cached_ancestors(content_id):
_, ancestors = get_cached_content_data(content_id)
return ancestors


mappings = {
"channel_name": cache_channel_name,
"content_title": cache_content_title,
"channel_name": get_cached_channel_name,
"content_title": get_cached_content_title,
"time_spent": lambda x: "{:.1f}".format(round(x["time_spent"], 1)),
"progress": lambda x: "{:.4f}".format(math.floor(x["progress"] * 10000.0) / 10000),
}
Expand Down Expand Up @@ -103,7 +130,37 @@ def cache_content_title(obj):
)
)

map_object = partial(output_mapper, labels=labels, output_mappings=mappings)

def get_max_ancestor_depth():
max_depth = 0
content_ids = ContentSummaryLog.objects.values_list("content_id", flat=True)
nodes = ContentNode.objects.filter(content_id__in=content_ids).only(
"content_id", "title", "ancestors"
)
for node in nodes:
ancestors = node.ancestors
# cache it here so the retireival while adding ancestors info into csv is faster
add_content_to_cache(node.content_id, title=node.title, ancestors=ancestors)
max_depth = max(max_depth, len(ancestors))
return max_depth


def add_ancestors_info(row, ancestors, max_depth):
row.update(
{
f"Topic level {level + 1}": ancestors[level]["title"]
if level < len(ancestors)
else ""
for level in range(max_depth)
}
)


def map_object(item):
mapped_item = output_mapper(item, labels=labels, output_mappings=mappings)
ancestors = get_cached_ancestors(item["content_id"])
add_ancestors_info(mapped_item, ancestors, get_max_ancestor_depth())
return mapped_item


classes_info = {
Expand Down Expand Up @@ -171,11 +228,18 @@ def csv_file_generator(
queryset = queryset.filter(start_timestamp__lte=end)

# Exclude completion timestamp for the sessionlog CSV
header_labels = tuple(
header_labels = list(
label
for label in labels.values()
if log_type == "summary" or label != labels["completion_timestamp"]
)
# len of topic headers should be equal to the max depth of the content node
topic_headers = [
(f"Topic level {i+1}", _(f"Topic level {i+1}"))
for i in range(get_max_ancestor_depth())
]

header_labels += [label for _, label in topic_headers]

csv_file = open_csv_for_writing(filepath)

Expand Down