Skip to content

Commit

Permalink
Merge pull request #12782 from thesujai/ancestor-info-in-logs
Browse files Browse the repository at this point in the history
Add ancestor information to summary and session log exports
  • Loading branch information
rtibbles authored Nov 6, 2024
2 parents 2d99adf + 93af375 commit 9e119ca
Showing 1 changed file with 86 additions and 17 deletions.
103 changes: 86 additions & 17 deletions kolibri/core/logger/csv_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import math
import os
from collections import OrderedDict
from functools import partial

from dateutil import parser
from django.core.cache import cache
Expand All @@ -27,8 +26,39 @@
"summary": "{}_{}_content_summary_logs_from_{}_to_{}.csv",
}

CACHE_TIMEOUT = 60 * 10

def cache_channel_name(obj):

def add_content_to_cache(content_id, **kwargs):
title_key = "{content_id}_ContentNode_title".format(content_id=content_id)
ancestors_key = "{content_id}_ContentNode_ancestors".format(content_id=content_id)

cache.set(title_key, kwargs.get("title", ""), CACHE_TIMEOUT)
cache.set(ancestors_key, kwargs.get("ancestors", []), CACHE_TIMEOUT)


def get_cached_content_data(content_id):
title_key = f"{content_id}_ContentNode_title"
ancestors_key = f"{content_id}_ContentNode_ancestors"

title = cache.get(title_key)
ancestors = cache.get(ancestors_key)

if title is None or ancestors is None:
node = ContentNode.objects.filter(content_id=content_id).first()
if node:
title = node.title
ancestors = node.ancestors
else:
title = ""
ancestors = []

add_content_to_cache(content_id, title=title, ancestors=ancestors)

return title, ancestors


def get_cached_channel_name(obj):
channel_id = obj["channel_id"]
key = "{id}_ChannelMetadata_name".format(id=channel_id)
channel_name = cache.get(key)
Expand All @@ -37,27 +67,24 @@ def cache_channel_name(obj):
channel_name = ChannelMetadata.objects.get(id=channel_id)
except ChannelMetadata.DoesNotExist:
channel_name = ""
cache.set(key, channel_name, 60 * 10)
cache.set(key, channel_name, CACHE_TIMEOUT)
return channel_name


def cache_content_title(obj):
def get_cached_content_title(obj):
content_id = obj["content_id"]
key = "{id}_ContentNode_title".format(id=content_id)
title = cache.get(key)
if title is None:
node = ContentNode.objects.filter(content_id=content_id).first()
if node:
title = node.title
else:
title = ""
cache.set(key, title, 60 * 10)
title, _ = get_cached_content_data(content_id)
return title


def get_cached_ancestors(content_id):
_, ancestors = get_cached_content_data(content_id)
return ancestors


mappings = {
"channel_name": cache_channel_name,
"content_title": cache_content_title,
"channel_name": get_cached_channel_name,
"content_title": get_cached_content_title,
"time_spent": lambda x: "{:.1f}".format(round(x["time_spent"], 1)),
"progress": lambda x: "{:.4f}".format(math.floor(x["progress"] * 10000.0) / 10000),
}
Expand Down Expand Up @@ -103,7 +130,39 @@ def cache_content_title(obj):
)
)

map_object = partial(output_mapper, labels=labels, output_mappings=mappings)

def get_max_ancestor_depth():
"""Returns one less than the maximum depth of the ancestors of all content nodes"""
max_depth = 0
content_ids = ContentSummaryLog.objects.values_list("content_id", flat=True)
nodes = ContentNode.objects.filter(content_id__in=content_ids).only(
"content_id", "title", "ancestors"
)
for node in nodes:
ancestors = node.ancestors
# cache it here so the retireival while adding ancestors info into csv is faster
add_content_to_cache(node.content_id, title=node.title, ancestors=ancestors)
max_depth = max(max_depth, len(ancestors))
return max_depth - 1


def add_ancestors_info(row, ancestors, max_depth):
ancestors = ancestors[1:]
row.update(
{
f"Topic level {level + 1}": ancestors[level]["title"]
if level < len(ancestors)
else ""
for level in range(max_depth)
}
)


def map_object(item):
mapped_item = output_mapper(item, labels=labels, output_mappings=mappings)
ancestors = get_cached_ancestors(item["content_id"])
add_ancestors_info(mapped_item, ancestors, get_max_ancestor_depth())
return mapped_item


classes_info = {
Expand Down Expand Up @@ -171,11 +230,21 @@ def csv_file_generator(
queryset = queryset.filter(start_timestamp__lte=end)

# Exclude completion timestamp for the sessionlog CSV
header_labels = tuple(
header_labels = list(
label
for label in labels.values()
if log_type == "summary" or label != labels["completion_timestamp"]
)
# len of topic headers should be equal to the max depth of the content node
topic_headers = [
(f"Topic level {i+1}", _(f"Topic level {i+1}"))
for i in range(get_max_ancestor_depth())
]

content_id_index = header_labels.index(labels["content_id"])
header_labels[content_id_index:content_id_index] = [
label for _, label in topic_headers
]

csv_file = open_csv_for_writing(filepath)

Expand Down

0 comments on commit 9e119ca

Please sign in to comment.