From e835ce34e0e104290476e96b833f771d12565c3c Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Mon, 4 Nov 2024 12:59:19 +0530
Subject: [PATCH 1/6] add function to get the max_ancestor_depth

---
 kolibri/core/logger/csv_export.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index d63f60acf42..208091e65d2 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -103,6 +103,16 @@ def cache_content_title(obj):
     )
 )
 
+
+def get_max_ancestor_depth():
+    max_depth = 0
+    for node in ContentNode.objects.filter(
+        content_id__in=ContentSummaryLog.objects.values_list("content_id", flat=True)
+    ):
+        max_depth = max(max_depth, len(node.ancestors))
+    return max_depth
+
+
 map_object = partial(output_mapper, labels=labels, output_mappings=mappings)
 
 

From c84ff836fe598f0d698103953e6e47c058c8d04b Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Mon, 4 Nov 2024 13:01:53 +0530
Subject: [PATCH 2/6] make header_labels mutable

---
 kolibri/core/logger/csv_export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index 208091e65d2..0d909ea33c7 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -181,7 +181,7 @@ def csv_file_generator(
         queryset = queryset.filter(start_timestamp__lte=end)
 
     # Exclude completion timestamp for the sessionlog CSV
-    header_labels = tuple(
+    header_labels = list(
         label
         for label in labels.values()
         if log_type == "summary" or label != labels["completion_timestamp"]

From caf881128391a1470b2b1add8c5a9ea5c9892606 Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Mon, 4 Nov 2024 13:02:35 +0530
Subject: [PATCH 3/6] add topic headers in header_labels

---
 kolibri/core/logger/csv_export.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index 0d909ea33c7..5098ae85f31 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -186,6 +186,13 @@ def csv_file_generator(
         for label in labels.values()
         if log_type == "summary" or label != labels["completion_timestamp"]
     )
+    # len of topic headers should be equal to the max depth of the content node
+    topic_headers = [
+        (f"Topic level {i+1}", _(f"Topic level {i+1}"))
+        for i in range(get_max_ancestor_depth())
+    ]
+
+    header_labels += [label for _, label in topic_headers]
 
     csv_file = open_csv_for_writing(filepath)
 

From de9b0366fa3355ff5c8981784a956969cbfac7ec Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Mon, 4 Nov 2024 13:04:22 +0530
Subject: [PATCH 4/6] update the map_object to add the ancestor info before
 writing into csv

---
 kolibri/core/logger/csv_export.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index 5098ae85f31..a6275b63159 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -4,7 +4,6 @@
 import math
 import os
 from collections import OrderedDict
-from functools import partial
 
 from dateutil import parser
 from django.core.cache import cache
@@ -113,7 +112,25 @@ def get_max_ancestor_depth():
     return max_depth
 
 
-map_object = partial(output_mapper, labels=labels, output_mappings=mappings)
+def add_ancestors_info(row, ancestors, max_depth):
+    row.update(
+        {
+            f"Topic level {level + 1}": ancestors[level]["title"]
+            if level < len(ancestors)
+            else ""
+            for level in range(max_depth)
+        }
+    )
+
+
+def map_object(item):
+    mapped_item = output_mapper(item, labels=labels, output_mappings=mappings)
+    node = ContentNode.objects.filter(content_id=item["content_id"]).first()
+    if node and node.ancestors:
+        add_ancestors_info(mapped_item, node.ancestors, get_max_ancestor_depth())
+    else:
+        add_ancestors_info(mapped_item, [], get_max_ancestor_depth())
+    return mapped_item
 
 
 classes_info = {

From 03113e058b7db1cb5bfe3f135f2dfa72562f60e5 Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Tue, 5 Nov 2024 01:44:31 +0530
Subject: [PATCH 5/6] optimize retreival of ancestors by caching

---
 kolibri/core/logger/csv_export.py | 76 +++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 23 deletions(-)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index a6275b63159..afa7de1ef81 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -26,8 +26,39 @@
     "summary": "{}_{}_content_summary_logs_from_{}_to_{}.csv",
 }
 
+CACHE_TIMEOUT = 60 * 10
 
-def cache_channel_name(obj):
+
+def add_content_to_cache(content_id, **kwargs):
+    title_key = "{content_id}_ContentNode_title".format(content_id=content_id)
+    ancestors_key = "{content_id}_ContentNode_ancestors".format(content_id=content_id)
+
+    cache.set(title_key, kwargs.get("title", ""), CACHE_TIMEOUT)
+    cache.set(ancestors_key, kwargs.get("ancestors", []), CACHE_TIMEOUT)
+
+
+def get_cached_content_data(content_id):
+    title_key = f"{content_id}_ContentNode_title"
+    ancestors_key = f"{content_id}_ContentNode_ancestors"
+
+    title = cache.get(title_key)
+    ancestors = cache.get(ancestors_key)
+
+    if title is None or ancestors is None:
+        node = ContentNode.objects.filter(content_id=content_id).first()
+        if node:
+            title = node.title
+            ancestors = node.ancestors
+        else:
+            title = ""
+            ancestors = []
+
+        add_content_to_cache(content_id, title=title, ancestors=ancestors)
+
+    return title, ancestors
+
+
+def get_cached_channel_name(obj):
     channel_id = obj["channel_id"]
     key = "{id}_ChannelMetadata_name".format(id=channel_id)
     channel_name = cache.get(key)
@@ -36,27 +67,24 @@ def cache_channel_name(obj):
             channel_name = ChannelMetadata.objects.get(id=channel_id)
         except ChannelMetadata.DoesNotExist:
             channel_name = ""
-        cache.set(key, channel_name, 60 * 10)
+        cache.set(key, channel_name, CACHE_TIMEOUT)
     return channel_name
 
 
-def cache_content_title(obj):
+def get_cached_content_title(obj):
     content_id = obj["content_id"]
-    key = "{id}_ContentNode_title".format(id=content_id)
-    title = cache.get(key)
-    if title is None:
-        node = ContentNode.objects.filter(content_id=content_id).first()
-        if node:
-            title = node.title
-        else:
-            title = ""
-        cache.set(key, title, 60 * 10)
+    title, _ = get_cached_content_data(content_id)
     return title
 
 
+def get_cached_ancestors(content_id):
+    _, ancestors = get_cached_content_data(content_id)
+    return ancestors
+
+
 mappings = {
-    "channel_name": cache_channel_name,
-    "content_title": cache_content_title,
+    "channel_name": get_cached_channel_name,
+    "content_title": get_cached_content_title,
     "time_spent": lambda x: "{:.1f}".format(round(x["time_spent"], 1)),
     "progress": lambda x: "{:.4f}".format(math.floor(x["progress"] * 10000.0) / 10000),
 }
@@ -105,10 +133,15 @@ def cache_content_title(obj):
 
 def get_max_ancestor_depth():
     max_depth = 0
-    for node in ContentNode.objects.filter(
-        content_id__in=ContentSummaryLog.objects.values_list("content_id", flat=True)
-    ):
-        max_depth = max(max_depth, len(node.ancestors))
+    content_ids = ContentSummaryLog.objects.values_list("content_id", flat=True)
+    nodes = ContentNode.objects.filter(content_id__in=content_ids).only(
+        "content_id", "title", "ancestors"
+    )
+    for node in nodes:
+        ancestors = node.ancestors
+        # cache it here so the retireival while adding ancestors info into csv is faster
+        add_content_to_cache(node.content_id, title=node.title, ancestors=ancestors)
+        max_depth = max(max_depth, len(ancestors))
     return max_depth
 
 
@@ -125,11 +158,8 @@ def add_ancestors_info(row, ancestors, max_depth):
 
 def map_object(item):
     mapped_item = output_mapper(item, labels=labels, output_mappings=mappings)
-    node = ContentNode.objects.filter(content_id=item["content_id"]).first()
-    if node and node.ancestors:
-        add_ancestors_info(mapped_item, node.ancestors, get_max_ancestor_depth())
-    else:
-        add_ancestors_info(mapped_item, [], get_max_ancestor_depth())
+    ancestors = get_cached_ancestors(item["content_id"])
+    add_ancestors_info(mapped_item, ancestors, get_max_ancestor_depth())
     return mapped_item
 
 

From 93af375f2dd4cfd215dde3d5a6233d468001f52f Mon Sep 17 00:00:00 2001
From: Sujai Kumar Gupta <sujayscience1234@gmail.com>
Date: Wed, 6 Nov 2024 22:36:47 +0530
Subject: [PATCH 6/6] move the ancestor info in between of csv and ignore the
 first ancestor

---
 kolibri/core/logger/csv_export.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py
index afa7de1ef81..31a529142b0 100644
--- a/kolibri/core/logger/csv_export.py
+++ b/kolibri/core/logger/csv_export.py
@@ -132,6 +132,7 @@ def get_cached_ancestors(content_id):
 
 
 def get_max_ancestor_depth():
+    """Returns one less than the maximum depth of the ancestors of all content nodes"""
     max_depth = 0
     content_ids = ContentSummaryLog.objects.values_list("content_id", flat=True)
     nodes = ContentNode.objects.filter(content_id__in=content_ids).only(
@@ -142,10 +143,11 @@ def get_max_ancestor_depth():
         # cache it here so the retireival while adding ancestors info into csv is faster
         add_content_to_cache(node.content_id, title=node.title, ancestors=ancestors)
         max_depth = max(max_depth, len(ancestors))
-    return max_depth
+    return max_depth - 1
 
 
 def add_ancestors_info(row, ancestors, max_depth):
+    ancestors = ancestors[1:]
     row.update(
         {
             f"Topic level {level + 1}": ancestors[level]["title"]
@@ -239,7 +241,10 @@ def csv_file_generator(
         for i in range(get_max_ancestor_depth())
     ]
 
-    header_labels += [label for _, label in topic_headers]
+    content_id_index = header_labels.index(labels["content_id"])
+    header_labels[content_id_index:content_id_index] = [
+        label for _, label in topic_headers
+    ]
 
     csv_file = open_csv_for_writing(filepath)