From d0637883e4dee46a7303a8a0bc7c1ff52f9d2023 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Sat, 29 May 2021 06:17:44 -0700
Subject: [PATCH 01/12] Add extensive testing to content node viewset. Slightly
 refactor to handle edge cases and reduce JOINs.

---
 kolibri/core/content/api.py                   | 205 ++++++++++--------
 kolibri/core/content/test/test_content_app.py | 109 +++++++++-
 2 files changed, 225 insertions(+), 89 deletions(-)

diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py
index a84904f68de..d6d41d18176 100644
--- a/kolibri/core/content/api.py
+++ b/kolibri/core/content/api.py
@@ -210,25 +210,6 @@ class OptionalPageNumberPagination(ValuesViewsetPageNumberPagination):
     page_size_query_param = "page_size"
 
 
-def map_lang(obj):
-    keys = ["id", "lang_code", "lang_subcode", "lang_name", "lang_direction"]
-
-    lower_case = set(["id", "lang_code", "lang_subcode"])
-
-    output = {}
-
-    for key in keys:
-        output[key] = obj.pop("lang__" + key)
-        if key in lower_case and output[key]:
-            output[key] = output[key].lower()
-
-    if not any(output.values()):
-        # All keys are null so return None
-        return None
-
-    return output
-
-
 def map_file(file):
     file["checksum"] = file.pop("local_file__id")
     file["available"] = file.pop("local_file__available")
@@ -241,15 +222,17 @@ def map_file(file):
             "extension": file["extension"],
         }
     )
-    file["lang"] = map_lang(file)
     return file
 
 
-@method_decorator(cache_forever, name="dispatch")
-class ContentNodeViewset(ReadOnlyValuesViewset):
+class BaseContentNodeMixin(object):
+    """
+    A base mixin for viewsets that need to return the same format of data
+    serialization for ContentNodes.
+    """
+
     filter_backends = (DjangoFilterBackend,)
     filter_class = ContentNodeFilter
-    pagination_class = OptionalPageNumberPagination
 
     values = (
         "id",
@@ -260,12 +243,7 @@ class ContentNodeViewset(ReadOnlyValuesViewset):
         "content_id",
         "description",
         "kind",
-        # Language keys
-        "lang__id",
-        "lang__lang_code",
-        "lang__lang_subcode",
-        "lang__lang_name",
-        "lang__lang_direction",
+        "lang_id",
         "license_description",
         "license_name",
         "license_owner",
@@ -279,29 +257,28 @@ class ContentNodeViewset(ReadOnlyValuesViewset):
         "tree_id",
     )
 
-    field_map = {"lang": map_lang}
-
-    def consolidate(self, items, queryset):
-        output = []
+    def get_queryset(self):
+        return models.ContentNode.objects.filter(available=True)
 
-        if items:
-            assessmentmetadata = {
-                a["contentnode"]: a
-                for a in models.AssessmentMetaData.objects.filter(
-                    contentnode__in=queryset
-                ).values(
-                    "assessment_item_ids",
-                    "number_of_assessments",
-                    "mastery_model",
-                    "randomize",
-                    "is_manipulable",
-                    "contentnode",
-                )
-            }
+    def get_related_data_maps(self, items, queryset):
+        assessmentmetadata_map = {
+            a["contentnode"]: a
+            for a in models.AssessmentMetaData.objects.filter(
+                contentnode__in=queryset
+            ).values(
+                "assessment_item_ids",
+                "number_of_assessments",
+                "mastery_model",
+                "randomize",
+                "is_manipulable",
+                "contentnode",
+            )
+        }
 
-            files = {}
+        files_map = {}
 
-            for f in models.File.objects.filter(contentnode__in=queryset).values(
+        files = list(
+            models.File.objects.filter(contentnode__in=queryset).values(
                 "id",
                 "contentnode",
                 "local_file__id",
@@ -310,44 +287,102 @@ def consolidate(self, items, queryset):
                 "local_file__file_size",
                 "local_file__extension",
                 "preset",
-                "lang__id",
-                "lang__lang_code",
-                "lang__lang_subcode",
-                "lang__lang_name",
-                "lang__lang_direction",
+                "lang_id",
                 "supplementary",
                 "thumbnail",
-            ):
-                if f["contentnode"] not in files:
-                    files[f["contentnode"]] = []
-                files[f["contentnode"]].append(map_file(f))
+            )
+        )
 
-            ancestors = queryset.get_ancestors().values(
-                "id", "title", "lft", "rght", "tree_id"
+        lang_ids = set([obj["lang_id"] for obj in items + files])
+
+        languages_map = {
+            lang["id"]: lang
+            for lang in models.Language.objects.filter(id__in=lang_ids).values(
+                "id", "lang_code", "lang_subcode", "lang_name", "lang_direction"
             )
+        }
 
-            tags = {}
+        for f in files:
+            contentnode_id = f.pop("contentnode")
+            if contentnode_id not in files_map:
+                files_map[contentnode_id] = []
+            lang_id = f.pop("lang_id")
+            f["lang"] = languages_map.get(lang_id)
+            files_map[contentnode_id].append(map_file(f))
+
+        tags_map = {}
+
+        for t in models.ContentTag.objects.filter(tagged_content__in=queryset).values(
+            "tag_name",
+            "tagged_content",
+        ):
+            if t["tagged_content"] not in tags_map:
+                tags_map[t["tagged_content"]] = [t["tag_name"]]
+            else:
+                tags_map[t["tagged_content"]].append(t["tag_name"])
 
-            for t in models.ContentTag.objects.filter(
-                tagged_content__in=queryset
-            ).values(
-                "tag_name",
-                "tagged_content",
-            ):
-                if t["tagged_content"] not in tags:
-                    tags[t["tagged_content"]] = [t["tag_name"]]
-                else:
-                    tags[t["tagged_content"]].append(t["tag_name"])
-
-            for item in items:
-                item["assessmentmetadata"] = assessmentmetadata.get(item["id"])
-                item["tags"] = tags.get(item["id"], [])
-                item["files"] = files.get(item["id"], [])
-
-                lft = item.pop("lft")
-                rght = item.pop("rght")
-                tree_id = item.pop("tree_id")
-                item["ancestors"] = list(
+        return assessmentmetadata_map, files_map, languages_map, tags_map
+
+    def process_items(self, items, queryset, ancestor_lookup_method=None):
+        output = []
+        assessmentmetadata, files_map, languages_map, tags = self.get_related_data_maps(
+            items, queryset
+        )
+        for item in items:
+            item["assessmentmetadata"] = assessmentmetadata.get(item["id"])
+            item["tags"] = tags.get(item["id"], [])
+            item["files"] = files_map.get(item["id"], [])
+            lang_id = item.pop("lang_id")
+            item["lang"] = languages_map.get(lang_id)
+            if ancestor_lookup_method:
+                item["ancestors"] = ancestor_lookup_method(item)
+            item["is_leaf"] = item.get("kind") != content_kinds.TOPIC
+            output.append(item)
+        return output
+
+
+@method_decorator(cache_forever, name="dispatch")
+class ContentNodeViewset(BaseContentNodeMixin, ReadOnlyValuesViewset):
+    pagination_class = OptionalPageNumberPagination
+
+    def consolidate(self, items, queryset):
+        if items:
+
+            # We need to batch our queries for ancestors as the size of the expression tree
+            # depends on the number of nodes that we are querying for.
+            ANCESTOR_BATCH_SIZE = 1000
+
+            if len(items) > ANCESTOR_BATCH_SIZE:
+
+                ancestors_map = {}
+
+                for i in range(0, len(items), ANCESTOR_BATCH_SIZE):
+
+                    for anc in (
+                        models.ContentNode.objects.filter(
+                            id__in=[
+                                item["id"]
+                                for item in items[i : i + ANCESTOR_BATCH_SIZE]
+                            ]
+                        )
+                        .get_ancestors()
+                        .values("id", "title", "lft", "rght", "tree_id")
+                    ):
+                        ancestors_map[anc["id"]] = anc
+
+                ancestors = sorted(ancestors_map.values(), key=lambda x: x["lft"])
+            else:
+                ancestors = list(
+                    queryset.get_ancestors()
+                    .values("id", "title", "lft", "rght", "tree_id")
+                    .order_by("lft")
+                )
+
+            def ancestor_lookup(item):
+                lft = item.get("lft")
+                rght = item.get("rght")
+                tree_id = item.get("tree_id")
+                return list(
                     filter(
                         lambda x: x["lft"] < lft
                         and x["rght"] > rght
@@ -355,12 +390,10 @@ def consolidate(self, items, queryset):
                         ancestors,
                     )
                 )
-                item["is_leaf"] = item.get("kind") != content_kinds.TOPIC
-                output.append(item)
-        return output
 
-    def get_queryset(self):
-        return models.ContentNode.objects.filter(available=True)
+            return self.process_items(items, queryset, ancestor_lookup)
+
+        return []
 
     @list_route(methods=["get"])
     def descendants(self, request):
diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py
index dd58962010d..798dd0de4fd 100644
--- a/kolibri/core/content/test/test_content_app.py
+++ b/kolibri/core/content/test/test_content_app.py
@@ -18,6 +18,7 @@
 from kolibri.core.auth.models import FacilityUser
 from kolibri.core.auth.test.helpers import provision_device
 from kolibri.core.content import models as content
+from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder
 from kolibri.core.device.models import DevicePermissions
 from kolibri.core.device.models import DeviceSettings
 from kolibri.core.logger.models import ContentSessionLog
@@ -233,13 +234,115 @@ def test_related_filter(self):
         )
         self.assertEqual(response.data[0]["title"], "c2")
 
+    def _assert_nodes(self, data, nodes):
+        def map_language(lang):
+            if lang:
+                return {
+                    f: getattr(lang, f)
+                    for f in [
+                        "id",
+                        "lang_code",
+                        "lang_subcode",
+                        "lang_name",
+                        "lang_direction",
+                    ]
+                }
+
+        for actual, expected in zip(data, nodes):
+            assessmentmetadata = (
+                expected.assessmentmetadata.all()
+                .values(
+                    "assessment_item_ids",
+                    "number_of_assessments",
+                    "mastery_model",
+                    "randomize",
+                    "is_manipulable",
+                    "contentnode",
+                )
+                .first()
+            )
+            files = []
+            for f in expected.files.all():
+                "local_file__id",
+                "local_file__available",
+                "local_file__file_size",
+                "local_file__extension",
+                "lang_id",
+                file = {}
+                for field in [
+                    "id",
+                    "priority",
+                    "preset",
+                    "supplementary",
+                    "thumbnail",
+                ]:
+                    file[field] = getattr(f, field)
+                file["checksum"] = f.local_file_id
+                for field in [
+                    "available",
+                    "file_size",
+                    "extension",
+                ]:
+                    file[field] = getattr(f.local_file, field)
+                file["lang"] = map_language(f.lang)
+                file["storage_url"] = f.get_storage_url()
+                files.append(file)
+            self.assertEqual(
+                actual,
+                {
+                    "id": expected.id,
+                    "available": expected.available,
+                    "author": expected.author,
+                    "channel_id": expected.channel_id,
+                    "coach_content": expected.coach_content,
+                    "content_id": expected.content_id,
+                    "description": expected.description,
+                    "kind": expected.kind,
+                    "lang": map_language(expected.lang),
+                    "license_description": expected.license_description,
+                    "license_name": expected.license_name,
+                    "license_owner": expected.license_owner,
+                    "num_coach_contents": expected.num_coach_contents,
+                    "options": expected.options,
+                    "parent": expected.parent_id,
+                    "sort_order": expected.sort_order,
+                    "title": expected.title,
+                    "lft": expected.lft,
+                    "rght": expected.rght,
+                    "tree_id": expected.tree_id,
+                    "ancestors": list(
+                        expected.get_ancestors().values(
+                            "id", "title", "lft", "rght", "tree_id"
+                        )
+                    ),
+                    "tags": list(
+                        expected.tags.all().values_list("tag_name", flat=True)
+                    ),
+                    "assessmentmetadata": assessmentmetadata,
+                    "is_leaf": expected.kind != "topic",
+                    "files": files,
+                },
+            )
+
     def test_contentnode_list(self):
         root = content.ContentNode.objects.get(title="root")
-        expected_output = (
-            root.get_descendants(include_self=True).filter(available=True).count()
-        )
+        nodes = root.get_descendants(include_self=True).filter(available=True)
+        expected_output = len(nodes)
+        response = self.client.get(reverse("kolibri:core:contentnode-list"))
+        self.assertEqual(len(response.data), expected_output)
+        self._assert_nodes(response.data, nodes)
+
+    def test_contentnode_list_long(self):
+        # This will make > 1000 nodes which should test our ancestor batching behaviour
+        builder = ChannelBuilder(levels=4)
+        builder.insert_into_default_db()
+        content.ContentNode.objects.update(available=True)
+        nodes = content.ContentNode.objects.filter(available=True)
+        expected_output = len(nodes)
+        self.assertGreater(expected_output, 1000)
         response = self.client.get(reverse("kolibri:core:contentnode-list"))
         self.assertEqual(len(response.data), expected_output)
+        self._assert_nodes(response.data, nodes)
 
     @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio")
     def test_contentnode_granular_network_import(self, stats_mock):

From 4e91168fa8e36ec4e50c6b0a0fb43b731b8808aa Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Sat, 29 May 2021 06:18:42 -0700
Subject: [PATCH 02/12] Factor Retrieve and List out into mixins.

---
 kolibri/core/api.py | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/kolibri/core/api.py b/kolibri/core/api.py
index 04a9f6d2533..531474341a3 100644
--- a/kolibri/core/api.py
+++ b/kolibri/core/api.py
@@ -117,7 +117,7 @@ def remove_invalid_fields(self, queryset, fields, view, request):
         return ordering
 
 
-class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet):
+class BaseValuesViewset(viewsets.GenericViewSet):
     """
     A viewset that uses a values call to get all model/queryset data in
     a single database query, rather than delegating serialization to a
@@ -125,7 +125,7 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet):
     """
 
     # A tuple of values to get from the queryset
-    values = None
+    # values = None
     # A map of target_key, source_key where target_key is the final target_key that will be set
     # and source_key is the key on the object retrieved from the values call.
     # Alternatively, the source_key can be a callable that will be passed the object and return
@@ -134,8 +134,8 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet):
     field_map = {}
 
     def __init__(self, *args, **kwargs):
-        viewset = super(ReadOnlyValuesViewset, self).__init__(*args, **kwargs)
-        if not isinstance(self.values, tuple):
+        viewset = super(BaseValuesViewset, self).__init__(*args, **kwargs)
+        if not hasattr(self, "values") or not isinstance(self.values, tuple):
             raise TypeError("values must be defined as a tuple")
         self._values = tuple(self.values)
         if not isinstance(self.field_map, dict):
@@ -205,6 +205,18 @@ def serialize(self, queryset):
             list(map(self._map_fields, values_queryset or [])), queryset
         )
 
+    def serialize_object(self, **filter_kwargs):
+        queryset = self.get_queryset()
+        try:
+            filter_kwargs = filter_kwargs or self._get_lookup_filter()
+            return self.serialize(queryset.filter(**filter_kwargs))[0]
+        except IndexError:
+            raise Http404(
+                "No %s matches the given query." % queryset.model._meta.object_name
+            )
+
+
+class ListModelMixin(object):
     def list(self, request, *args, **kwargs):
         queryset = self.filter_queryset(self.get_queryset())
 
@@ -218,20 +230,16 @@ def list(self, request, *args, **kwargs):
 
         return Response(self.serialize(queryset))
 
-    def serialize_object(self, **filter_kwargs):
-        queryset = self.get_queryset()
-        try:
-            filter_kwargs = filter_kwargs or self._get_lookup_filter()
-            return self.serialize(queryset.filter(**filter_kwargs))[0]
-        except IndexError:
-            raise Http404(
-                "No %s matches the given query." % queryset.model._meta.object_name
-            )
 
+class RetrieveModelMixin(object):
     def retrieve(self, request, *args, **kwargs):
         return Response(self.serialize_object())
 
 
+class ReadOnlyValuesViewset(BaseValuesViewset, RetrieveModelMixin, ListModelMixin):
+    pass
+
+
 class CreateModelMixin(BaseCreateModelMixin):
     def create(self, request, *args, **kwargs):
         serializer = self.get_serializer(data=request.data)

From 2ed1c7f8ea9f63c6b6a21d282cac1dd42a49c050 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Sat, 29 May 2021 07:09:12 -0700
Subject: [PATCH 03/12] Add contentnode_tree endpoint. Add tests.

---
 kolibri/core/content/api.py                   | 176 +++++++++++++-
 kolibri/core/content/api_urls.py              |   4 +
 .../core/content/test/test_channel_upgrade.py |   5 +-
 kolibri/core/content/test/test_content_app.py | 215 +++++++++++-------
 4 files changed, 305 insertions(+), 95 deletions(-)

diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py
index d6d41d18176..3dba0b5d46d 100644
--- a/kolibri/core/content/api.py
+++ b/kolibri/core/content/api.py
@@ -5,6 +5,7 @@
 
 import requests
 from django.core.cache import cache
+from django.core.exceptions import ValidationError
 from django.db.models import Exists
 from django.db.models import OuterRef
 from django.db.models import Q
@@ -31,6 +32,7 @@
 from rest_framework.generics import get_object_or_404
 from rest_framework.response import Response
 
+from kolibri.core.api import BaseValuesViewset
 from kolibri.core.api import ReadOnlyValuesViewset
 from kolibri.core.auth.middleware import session_exempt
 from kolibri.core.content import models
@@ -383,11 +385,14 @@ def ancestor_lookup(item):
                 rght = item.get("rght")
                 tree_id = item.get("tree_id")
                 return list(
-                    filter(
-                        lambda x: x["lft"] < lft
-                        and x["rght"] > rght
-                        and x["tree_id"] == tree_id,
-                        ancestors,
+                    map(
+                        lambda x: {"id": x["id"], "title": x["title"]},
+                        filter(
+                            lambda x: x["lft"] < lft
+                            and x["rght"] > rght
+                            and x["tree_id"] == tree_id,
+                            ancestors,
+                        ),
                     )
                 )
 
@@ -719,6 +724,167 @@ def resume(self, request, **kwargs):
         return Response(self.serialize(queryset))
 
 
+@method_decorator(cache_forever, name="dispatch")
+class ContentNodeTreeViewset(BaseContentNodeMixin, BaseValuesViewset):
+    # We fix the page size at 25 for a couple of reasons:
+    # 1. At this size the query appears to be relatively performant, and will deliver most of the tree
+    #    data to the frontend in a single query.
+    # 2. In the case where the tree topology means that this will not produce the full query, the limit of
+    #    25 immediate children and 25 grand children means that we are at most using 1 + 25 + 25 * 25 = 651
+    #    SQL parameters in the query to get the nodes for serialization - this means that we should not ever
+    #    run into an issue where we hit a SQL parameters limit in the queries in here.
+    # If we find that this page size is too high, we should lower it, but for the reasons noted above, we
+    # should not raise it.
+    page_size = 25
+
+    def consolidate(self, items, queryset):
+        if items:
+            return self.process_items(items, queryset)
+        return []
+
+    def validate_and_return_params(self, request):
+        depth = request.query_params.get("depth", 2)
+        lft__gt = request.query_params.get("lft__gt")
+
+        try:
+            depth = int(depth)
+            if 1 > depth or depth > 2:
+                raise ValueError
+        except ValueError:
+            raise ValidationError("Depth query parameter must have the value 1 or 2")
+
+        if lft__gt is not None:
+            try:
+                lft__gt = int(lft__gt)
+                if 1 > lft__gt:
+                    raise ValueError
+            except ValueError:
+                raise ValidationError(
+                    "lft__gt query parameter must be a positive integer if specified"
+                )
+        return depth, lft__gt
+
+    def get_grandchild_ids(self, child_ids, depth):
+        if depth == 2:
+            # Use this to keep track of how many grand children we have accumulated per child of the parent node
+            gc_by_parent = {}
+            # Iterate through the grand children of the parent node in lft order so we follow the tree traversal order
+            for gc in (
+                self.filter_queryset(self.get_queryset())
+                .filter(parent_id__in=child_ids)
+                .values("id", "parent_id")
+                .order_by("lft")
+            ):
+                # If we have not already added a list of nodes to the gc_by_parent map, initialize it here
+                if gc["parent_id"] not in gc_by_parent:
+                    gc_by_parent[gc["parent_id"]] = []
+                # If the number of grand children for a specific child node is less than the page size
+                # then we keep on adding them to both lists
+                # If not, we just skip this node, as we have already hit the page limit for the node that is
+                # its parent.
+                if len(gc_by_parent[gc["parent_id"]]) < self.page_size:
+                    gc_by_parent[gc["parent_id"]].append(gc["id"])
+                    yield gc["id"]
+
+    def retrieve(self, request, **kwargs):
+        """
+        A nested, paginated representation of the children and grandchildren of a specific node
+
+        :param request: request object
+        :param pk: id parent node
+        :return: an object representing the parent with a pagination object as "children"
+        """
+
+        # Get the model for the parent node here - we do this so that we trigger a 404 immediately if the node
+        # does not exist (or exists but is not available), and so that we have the Django model object later to
+        # use for the `get_ancestors` MPTT method.
+        parent_model = self.get_object()
+
+        depth, lft__gt = self.validate_and_return_params(request)
+
+        # Get a list of child_ids of the parent node up to the pagination limit
+        child_qs = self.get_queryset().filter(parent=parent_model)
+        if lft__gt is not None:
+            child_qs = child_qs.filter(lft__gt=lft__gt)
+        child_ids = child_qs.values_list("id", flat=True).order_by("lft")[
+            0 : self.page_size
+        ]
+
+        # Get a flat list of ids for grandchildren we will be returning
+        gc_ids = self.get_grandchild_ids(child_ids, depth)
+
+        # We explicitly order by lft here, so that the nodes are in tree traversal order, so we can iterate over them and build
+        # out our nested representation, being sure that any ancestors have already been processed.
+        nodes = self.serialize(
+            self.filter_queryset(self.get_queryset())
+            .filter(Q(id=parent_model.id) | Q(id__in=child_ids) | Q(id__in=gc_ids))
+            .order_by("lft")
+        )
+
+        # The serialized parent representation is the first node in the lft order
+        parent = nodes[0]
+
+        # Do a query to get the parent's ancestors - for all other nodes, we can manually build
+        # the ancestors from the tree topology that we already know!
+        parent["ancestors"] = list(parent_model.get_ancestors().values("id", "title"))
+
+        # Use this to keep track of direct children of the parent node
+        # this will allow us to do lookups for the grandchildren, in order
+        # to insert them into the "children" property
+        children_by_id = {}
+
+        # Iterate through all the descendants that we have serialized
+        for desc in nodes[1:]:
+            # First check to see whether it is a direct child of the
+            # parent node that we initially queried
+            if desc["parent"] == parent_model.id:
+                # If so add them to the children_by_id map so that
+                # grandchildren descendants can reference them later
+                children_by_id[desc["id"]] = desc
+                # The parent of this descendant is the parent node
+                # for this query
+                desc_parent = parent
+                more_depth = 2
+            elif desc["parent"] in children_by_id:
+                # Otherwise, check to see if our descendant's parent is in the
+                # children_by_id map - if it failed the first condition,
+                # it really should not fail this
+                desc_parent = children_by_id[desc["parent"]]
+                more_depth = 1
+            else:
+                # If we get to here, we have a node that is not in the tree subsection we are
+                # trying to return, so we just ignore it. This shouldn't happen.
+                continue
+            if "children" not in desc_parent:
+                # If the parent of the descendant does not already have its `children` property
+                # initialized, do so here.
+                desc_parent["children"] = {"results": [], "more": None}
+            # The ancestors field for the descendant will be its parents ancestors, plus its parent!
+            desc["ancestors"] = desc_parent["ancestors"] + [
+                {"id": desc_parent["id"], "title": desc_parent["title"]}
+            ]
+            # Add this descendant to the results for the children pagination object
+            desc_parent["children"]["results"].append(desc)
+            # Only bother updating the URL for more if we have hit the page size limit
+            # otherwise it will just continue to be None
+            if len(desc_parent["children"]["results"]) == self.page_size:
+                # Any subsequent queries to get siblings of this node can restrict themselves
+                # to looking for nodes with lft greater than the rght value of this descendant
+                lft__gt = desc["rght"]
+                # If the rght value of this descendant is exactly 1 less than the rght value of
+                # its parent, then there are no more children that can be queried.
+                # So only in this instance do we update the more URL
+                if desc["rght"] + 1 < desc_parent["rght"]:
+                    params = request.query_params.copy()
+                    params["lft__gt"] = lft__gt
+                    params["depth"] = more_depth
+                    desc_parent["children"]["more"] = {
+                        "id": desc_parent["id"],
+                        "params": params,
+                    }
+        return Response(parent)
+
+
 # return the result of and-ing a list of queries
 def intersection(queries):
     if queries:
diff --git a/kolibri/core/content/api_urls.py b/kolibri/core/content/api_urls.py
index edc2bc5ef5d..c02e653fc82 100644
--- a/kolibri/core/content/api_urls.py
+++ b/kolibri/core/content/api_urls.py
@@ -6,6 +6,7 @@
 from .api import ContentNodeGranularViewset
 from .api import ContentNodeProgressViewset
 from .api import ContentNodeSearchViewset
+from .api import ContentNodeTreeViewset
 from .api import ContentNodeViewset
 from .api import FileViewset
 from .api import RemoteChannelViewSet
@@ -14,6 +15,9 @@
 router.register("channel", ChannelMetadataViewSet, base_name="channel")
 
 router.register(r"contentnode", ContentNodeViewset, base_name="contentnode")
+router.register(
+    r"contentnode_tree", ContentNodeTreeViewset, base_name="contentnode_tree"
+)
 router.register(
     r"contentnode_search", ContentNodeSearchViewset, base_name="contentnode_search"
 )
diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py
index e814ba58b7e..834bb868c7a 100644
--- a/kolibri/core/content/test/test_channel_upgrade.py
+++ b/kolibri/core/content/test/test_channel_upgrade.py
@@ -52,8 +52,9 @@ class ChannelBuilder(object):
         "root_node",
     )
 
-    def __init__(self, levels=3):
+    def __init__(self, levels=3, num_children=5):
         self.levels = levels
+        self.num_children = num_children
 
         self.modified = set()
 
@@ -297,7 +298,7 @@ def data(self):
 
     def recurse_and_generate(self, parent_id, levels):
         children = []
-        for i in range(0, 5):
+        for i in range(0, self.num_children):
             if levels == 0:
                 node = self.generate_leaf(parent_id)
             else:
diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py
index 798dd0de4fd..0c6b601bec6 100644
--- a/kolibri/core/content/test/test_content_app.py
+++ b/kolibri/core/content/test/test_content_app.py
@@ -234,95 +234,92 @@ def test_related_filter(self):
         )
         self.assertEqual(response.data[0]["title"], "c2")
 
-    def _assert_nodes(self, data, nodes):
-        def map_language(lang):
-            if lang:
-                return {
-                    f: getattr(lang, f)
-                    for f in [
-                        "id",
-                        "lang_code",
-                        "lang_subcode",
-                        "lang_name",
-                        "lang_direction",
-                    ]
-                }
-
-        for actual, expected in zip(data, nodes):
-            assessmentmetadata = (
-                expected.assessmentmetadata.all()
-                .values(
-                    "assessment_item_ids",
-                    "number_of_assessments",
-                    "mastery_model",
-                    "randomize",
-                    "is_manipulable",
-                    "contentnode",
-                )
-                .first()
-            )
-            files = []
-            for f in expected.files.all():
-                "local_file__id",
-                "local_file__available",
-                "local_file__file_size",
-                "local_file__extension",
-                "lang_id",
-                file = {}
-                for field in [
+    def map_language(self, lang):
+        if lang:
+            return {
+                f: getattr(lang, f)
+                for f in [
                     "id",
-                    "priority",
-                    "preset",
-                    "supplementary",
-                    "thumbnail",
-                ]:
-                    file[field] = getattr(f, field)
-                file["checksum"] = f.local_file_id
-                for field in [
-                    "available",
-                    "file_size",
-                    "extension",
-                ]:
-                    file[field] = getattr(f.local_file, field)
-                file["lang"] = map_language(f.lang)
-                file["storage_url"] = f.get_storage_url()
-                files.append(file)
-            self.assertEqual(
-                actual,
-                {
-                    "id": expected.id,
-                    "available": expected.available,
-                    "author": expected.author,
-                    "channel_id": expected.channel_id,
-                    "coach_content": expected.coach_content,
-                    "content_id": expected.content_id,
-                    "description": expected.description,
-                    "kind": expected.kind,
-                    "lang": map_language(expected.lang),
-                    "license_description": expected.license_description,
-                    "license_name": expected.license_name,
-                    "license_owner": expected.license_owner,
-                    "num_coach_contents": expected.num_coach_contents,
-                    "options": expected.options,
-                    "parent": expected.parent_id,
-                    "sort_order": expected.sort_order,
-                    "title": expected.title,
-                    "lft": expected.lft,
-                    "rght": expected.rght,
-                    "tree_id": expected.tree_id,
-                    "ancestors": list(
-                        expected.get_ancestors().values(
-                            "id", "title", "lft", "rght", "tree_id"
-                        )
-                    ),
-                    "tags": list(
-                        expected.tags.all().values_list("tag_name", flat=True)
-                    ),
-                    "assessmentmetadata": assessmentmetadata,
-                    "is_leaf": expected.kind != "topic",
-                    "files": files,
-                },
+                    "lang_code",
+                    "lang_subcode",
+                    "lang_name",
+                    "lang_direction",
+                ]
+            }
+
+    def _assert_node(self, actual, expected):
+        assessmentmetadata = (
+            expected.assessmentmetadata.all()
+            .values(
+                "assessment_item_ids",
+                "number_of_assessments",
+                "mastery_model",
+                "randomize",
+                "is_manipulable",
+                "contentnode",
             )
+            .first()
+        )
+        files = []
+        for f in expected.files.all():
+            "local_file__id",
+            "local_file__available",
+            "local_file__file_size",
+            "local_file__extension",
+            "lang_id",
+            file = {}
+            for field in [
+                "id",
+                "priority",
+                "preset",
+                "supplementary",
+                "thumbnail",
+            ]:
+                file[field] = getattr(f, field)
+            file["checksum"] = f.local_file_id
+            for field in [
+                "available",
+                "file_size",
+                "extension",
+            ]:
+                file[field] = getattr(f.local_file, field)
+            file["lang"] = self.map_language(f.lang)
+            file["storage_url"] = f.get_storage_url()
+            files.append(file)
+        self.assertEqual(
+            actual,
+            {
+                "id": expected.id,
+                "available": expected.available,
+                "author": expected.author,
+                "channel_id": expected.channel_id,
+                "coach_content": expected.coach_content,
+                "content_id": expected.content_id,
+                "description": expected.description,
+                "kind": expected.kind,
+                "lang": self.map_language(expected.lang),
+                "license_description": expected.license_description,
+                "license_name": expected.license_name,
+                "license_owner": expected.license_owner,
+                "num_coach_contents": expected.num_coach_contents,
+                "options": expected.options,
+                "parent": expected.parent_id,
+                "sort_order": expected.sort_order,
+                "title": expected.title,
+                "lft": expected.lft,
+                "rght": expected.rght,
+                "tree_id": expected.tree_id,
+                "ancestors": list(expected.get_ancestors().values("id", "title")),
+                "tags": list(expected.tags.all().values_list("tag_name", flat=True)),
+                "assessmentmetadata": assessmentmetadata,
+                "is_leaf": expected.kind != "topic",
+                "files": files,
+            },
+        )
+
+    def _assert_nodes(self, data, nodes):
+        for actual, expected in zip(data, nodes):
+            self._assert_node(actual, expected)
 
     def test_contentnode_list(self):
         root = content.ContentNode.objects.get(title="root")
@@ -334,7 +331,7 @@ def test_contentnode_list(self):
 
     def test_contentnode_list_long(self):
         # This will make > 1000 nodes which should test our ancestor batching behaviour
-        builder = ChannelBuilder(levels=4)
+        builder = ChannelBuilder(num_children=10)
         builder.insert_into_default_db()
         content.ContentNode.objects.update(available=True)
         nodes = content.ContentNode.objects.filter(available=True)
@@ -344,6 +341,48 @@ def test_contentnode_list_long(self):
         self.assertEqual(len(response.data), expected_output)
         self._assert_nodes(response.data, nodes)
 
+    def _recurse_and_assert(self, data, nodes, recursion_depth=0):
+        for actual, expected in zip(data, nodes):
+            children = actual.pop("children", None)
+            self._assert_node(actual, expected)
+            if children:
+                child_nodes = content.ContentNode.objects.filter(
+                    available=True, parent=expected
+                )
+                if children["more"] is None:
+                    self.assertEqual(len(child_nodes), len(children["results"]))
+                else:
+                    self.assertGreater(len(child_nodes), len(children["results"]))
+                    self.assertEqual(children["more"]["id"], expected.id)
+                    self.assertEqual(
+                        children["more"]["params"]["lft__gt"], child_nodes[24].rght
+                    )
+                    self.assertEqual(
+                        children["more"]["params"]["depth"], 2 - recursion_depth
+                    )
+                self._recurse_and_assert(
+                    children["results"],
+                    child_nodes,
+                    recursion_depth=recursion_depth + 1,
+                )
+
+    def test_contentnode_tree(self):
+        root = content.ContentNode.objects.get(title="root")
+        response = self.client.get(
+            reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id})
+        )
+        self._recurse_and_assert([response.data], [root])
+
+    def test_contentnode_tree_long(self):
+        builder = ChannelBuilder(levels=2, num_children=30)
+        builder.insert_into_default_db()
+        content.ContentNode.objects.all().update(available=True)
+        root = content.ContentNode.objects.get(id=builder.root_node["id"])
+        response = self.client.get(
+            reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id})
+        )
+        self._recurse_and_assert([response.data], [root])
+
     @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio")
     def test_contentnode_granular_network_import(self, stats_mock):
         c1 = content.ContentNode.objects.get(title="root")

From f862df12dfabca8dd88c51a56a569d700ce97bc6 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Sat, 29 May 2021 07:10:00 -0700
Subject: [PATCH 04/12] Add support for tree endpoint to contentnode resource.
 Simplify caching.

---
 .../assets/src/api-resources/contentNode.js   | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js
index e8f5bdbca8a..f8eb0213260 100644
--- a/kolibri/core/assets/src/api-resources/contentNode.js
+++ b/kolibri/core/assets/src/api-resources/contentNode.js
@@ -1,5 +1,8 @@
+import isPlainObject from 'lodash/isPlainObject';
 import { Resource } from 'kolibri.lib.apiResource';
 import Store from 'kolibri.coreVue.vuex.store';
+import urls from 'kolibri.urls';
+import cloneDeep from '../cloneDeep';
 
 export default new Resource({
   name: 'contentnode',
@@ -34,4 +37,51 @@ export default new Resource({
   fetchNextSteps(getParams) {
     return this.fetchDetailCollection('next_steps', Store.getters.currentUserId, getParams);
   },
+  cache: {},
+  fetchModel({ id }) {
+    if (this.cache[id]) {
+      return Promise.resolve(this.cache[id]);
+    }
+    return this.client({ url: this.modelUrl(id) }).then(response => {
+      this.cacheData(response.data);
+      return response.data;
+    });
+  },
+  cacheData(data) {
+    if (Array.isArray(data)) {
+      for (let model of data) {
+        this.cacheData(model);
+      }
+    } else if (isPlainObject(data)) {
+      if (data[this.idKey]) {
+        this.cache[data[this.idKey]] = Object.assign(
+          this.cache[data[this.idKey]] || {},
+          cloneDeep(data)
+        );
+        if (data.children) {
+          this.cacheData(data.children);
+        }
+      } else if (data.results) {
+        for (let model of data.results) {
+          this.cacheData(model);
+        }
+      }
+    }
+  },
+  fetchCollection(params) {
+    return this.client({ url: this.collectionUrl(), params }).then(response => {
+      this.cacheData(response.data);
+      return response.data;
+    });
+  },
+  fetchTree(id, params) {
+    const url = urls['kolibri:core:contentnode_tree_detail'](id);
+    return this.client({ url, params }).then(response => {
+      this.cacheData(response.data);
+      return response.data;
+    });
+  },
+  fetchMoreTree({ id, params }) {
+    return this.fetchTree(id, params);
+  },
 });

From 9a0cc7079e62b4c1833b3fb7497bff093116aaa4 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Fri, 4 Jun 2021 14:06:04 -0700
Subject: [PATCH 05/12] Update ChannelBuilder caching to prevent weird
 cross-test issues.

---
 kolibri/core/content/test/test_channel_upgrade.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py
index 834bb868c7a..10f60f0ebc6 100644
--- a/kolibri/core/content/test/test_channel_upgrade.py
+++ b/kolibri/core/content/test/test_channel_upgrade.py
@@ -67,6 +67,10 @@ def __init__(self, levels=3, num_children=5):
 
         self.generate_nodes_from_root_node()
 
+    @property
+    def cache_key(self):
+        return "{}_{}".format(self.levels, self.num_children)
+
     def generate_new_tree(self):
         self.channel = self.channel_data()
         self.files = {}
@@ -83,7 +87,7 @@ def generate_new_tree(self):
             )
 
     def load_data(self):
-        data = copy.deepcopy(self.__TREE_CACHE[self.levels])
+        data = copy.deepcopy(self.__TREE_CACHE[self.cache_key])
 
         for key in self.tree_keys:
             setattr(self, key, data[key])
@@ -94,7 +98,7 @@ def save_data(self):
         for key in self.tree_keys:
             data[key] = getattr(self, key)
 
-        self.__TREE_CACHE[self.levels] = copy.deepcopy(data)
+        self.__TREE_CACHE[self.cache_key] = copy.deepcopy(data)
 
     def generate_nodes_from_root_node(self):
         self._django_nodes = ContentNode.objects.build_tree_nodes(self.root_node)

From a12b3da7645cf70c4be3c07b0cc3577f81977ab0 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Mon, 14 Jun 2021 17:35:04 -0700
Subject: [PATCH 06/12] Add rudimentary automatic serializer generation for
 read only values viewsets. Only used for DRF YASG documentation generation.

---
 kolibri/core/api.py | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/kolibri/core/api.py b/kolibri/core/api.py
index 531474341a3..6a8514553e1 100644
--- a/kolibri/core/api.py
+++ b/kolibri/core/api.py
@@ -10,6 +10,7 @@
 from rest_framework.mixins import DestroyModelMixin
 from rest_framework.mixins import UpdateModelMixin as BaseUpdateModelMixin
 from rest_framework.response import Response
+from rest_framework.serializers import ModelSerializer
 from rest_framework.serializers import Serializer
 from rest_framework.serializers import UUIDField
 from rest_framework.serializers import ValidationError
@@ -143,11 +144,53 @@ def __init__(self, *args, **kwargs):
         self._field_map = self.field_map.copy()
         return viewset
 
+    def generate_serializer(self):
+        queryset = getattr(self, "queryset", None)
+        if queryset is None:
+            try:
+                queryset = self.get_queryset()
+            except Exception:
+                pass
+        model = getattr(queryset, "model", None)
+        if model is None:
+            return Serializer
+        mapped_fields = {v: k for k, v in self.field_map.items() if isinstance(v, str)}
+        fields = []
+        extra_kwargs = {}
+        for value in self.values:
+            try:
+                model._meta.get_field(value)
+                if value in mapped_fields:
+                    extra_kwargs[mapped_fields[value]] = {"source": value}
+                    value = mapped_fields[value]
+                fields.append(value)
+            except Exception:
+                pass
+
+        meta = type(
+            "Meta",
+            (object,),
+            {
+                "fields": fields,
+                "read_only_fields": fields,
+                "model": model,
+                "extra_kwargs": extra_kwargs,
+            },
+        )
+        CustomSerializer = type(
+            "{}Serializer".format(self.__class__.__name__),
+            (ModelSerializer,),
+            {"Meta": meta},
+        )
+
+        return CustomSerializer
+
     def get_serializer_class(self):
         if self.serializer_class is not None:
             return self.serializer_class
         # Hack to prevent the renderer logic from breaking completely.
-        return Serializer
+        self.__class__.serializer_class = self.generate_serializer()
+        return self.__class__.serializer_class
 
     def _get_lookup_filter(self):
         lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field

From 3199a591f22efebaeb103bf11bafb60e518dff51 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Mon, 14 Jun 2021 18:34:21 -0700
Subject: [PATCH 07/12] Add more documentation of returned content node
 metadata.

---
 .../assets/src/api-resources/contentNode.js   | 90 +++++++++++++++++++
 kolibri/core/content/api.py                   | 24 +++++
 2 files changed, 114 insertions(+)

diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js
index f8eb0213260..a21c57bbc15 100644
--- a/kolibri/core/assets/src/api-resources/contentNode.js
+++ b/kolibri/core/assets/src/api-resources/contentNode.js
@@ -4,6 +4,81 @@ import Store from 'kolibri.coreVue.vuex.store';
 import urls from 'kolibri.urls';
 import cloneDeep from '../cloneDeep';
 
+/**
+ * Type definition for Language metadata
+ * @typedef {Object} Language
+ * @property {string} id - an IETF language tag
+ * @property {string} lang_code - the ISO 639‑1 language code
+ * @property {string} lang_subcode - the regional identifier
+ * @property {string} lang_name - the name of the language in that language
+ * @property {('ltr'|'rtl'|)} lang_direction - Direction of the language's script,
+ * top to bottom is not supported currently
+ */
+
+/**
+ * Type definition for AssessmentMetadata
+ * @typedef {Object} AssessmentMetadata
+ * @property {string[]} assessment_item_ids - an array of ids for assessment items
+ * @property {number} number_of_assessments - the length of assessment_item_ids
+ * @property {Object} mastery_model - object describing the mastery criterion for finishing practice
+ * @property {boolean} randomize - whether to randomize the order of assessments
+ * @property {boolean} is_manipulable - Whether this assessment can be programmatically updated
+ */
+
+/**
+ * Type definition for File
+ * @typedef {Object} File
+ * @property {string} id - id of the file object
+ * @property {string} checksum - md5 checksum of the file, used to generate the file name
+ * @property {boolean} available - whether the file is available on the server
+ * @property {number} file_size - file_size in bytes
+ * @property {string} extension - file extension, also used to generate the file name
+ * @property {string} preset - preset, the role that the file plays for this content node
+ * @property {Language|null} lang - The language of the File
+ * @property {boolean} supplementary - Whether this file is optional
+ * @property {boolean} thumbnail - Whether this file is a thumbnail
+ */
+
+/**
+ * Type definition for ContentNode metadata
+ * @typedef {Object} ContentNode
+ * @property {string} id - unique id of the ContentNode
+ * @property {string} channel_id - unique channel_id of the channel that the ContentNode is in
+ * @property {string} content_id - identifier that is common across all instances of this resource
+ * @property {string} title - A title that summarizes this ContentNode for the user
+ * @property {string} description - detailed description of the ContentNode
+ * @property {string} author - author of the ContentNode
+ * @property {string} thumbnail_url - URL for the thumbnail for this ContentNode,
+ * this may be any valid URL format including base64 encoded or blob URL
+ * @property {boolean} available - Whether the ContentNode has all necessary files for rendering
+ * @property {boolean} coach_content - Whether the ContentNode is intended only for coach users
+ * @property {Language|null} lang - The primary language of the ContentNode
+ * @property {string} license_description - The description of the license, which may be localized
+ * @property {string} license_name - The human readable name of the license, localized
+ * @property {string} license_owner - The name of the person or organization that holds copyright
+ * @property {number} num_coach_contents - Number of coach contents that are descendants of this
+ * @property {string} parent - The unique id of the parent of this ContentNode
+ * @property {number} sort_order - The order of display for this node in its channel
+ * if depth recursion was not deep enough
+ * @property {string[]} tags - Tags that apply to this content
+ * @property {boolean} is_leaf - Whether is a leaf resource or not
+ * @property {AssessmentMetadata|null} assessmentmetadata - Additional metadata for assessments
+ * @property {File[]} files - array of file objects associated with this ContentNode
+ * @property {Object[]} ancestors - array of objects with 'id' and 'title' properties
+ * @property {Children} [children] - optional pagination object with children of this ContentNode
+ */
+
+/**
+ * Type definition for children pagination object
+ * @typedef {Object} Children
+ * @property {Object} more - parameters for requesting more objects
+ * @property {string} more.id - the id of the parent of these child nodes
+ * @property {Object} more.params - the get parameters that should be used for requesting more nodes
+ * @property {number} more.params.depth - 1 or 2, how deep the nesting should be returned
+ * @property {number} more.params.lft__gt - integer value to return a lft value greater than
+ * @property {ContentNode[]} results - the array of ContentNodes for this page
+ */
+
 export default new Resource({
   name: 'contentnode',
   useContentCacheKey: true,
@@ -74,6 +149,13 @@ export default new Resource({
       return response.data;
     });
   },
+  /**
+   * A method to request paginated tree data from the backend
+   * @param {string} id - the id of the parent node for this request
+   * @param {Object} params - the GET parameters to return more results,
+   * may be both pagination and non-pagination specific parameters
+   * @return {Promise<ContentNode>} Promise that resolves with the model data
+   */
   fetchTree(id, params) {
     const url = urls['kolibri:core:contentnode_tree_detail'](id);
     return this.client({ url, params }).then(response => {
@@ -81,6 +163,14 @@ export default new Resource({
       return response.data;
     });
   },
+  /**
+   * A method to simplify requesting more items from a previously paginated response from fetchTree
+   * @param {Object} more - the 'more' property of the 'children' pagination object from a response.
+   * @param {string} more.id - the id of the parent node for this request
+   * @param {Object} more.params - the GET parameters to return more results,
+   * may be both pagination and non-pagination specific parameters
+   * @return {Promise<ContentNode>} Promise that resolves with the model data
+   */
   fetchMoreTree({ id, params }) {
     return this.fetchTree(id, params);
   },
diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py
index 3dba0b5d46d..cd735ac281b 100644
--- a/kolibri/core/content/api.py
+++ b/kolibri/core/content/api.py
@@ -790,6 +790,26 @@ def retrieve(self, request, **kwargs):
         """
         A nested, paginated representation of the children and grandchildren of a specific node
 
+        GET parameters on request can be:
+        depth - a value of either 1 or 2 indicating the depth to recurse the tree, either 1 or 2 levels
+        if this parameter is missing it will default to 2.
+        lft__gt - a value to return child nodes with a lft value greater than this, if missing defaults to None
+
+        The pagination object returned for "children" will have this form:
+        {
+            "results": [...<serialized children>],
+            "more": {
+                "id": <parent_id>,
+                "params": {
+                    lft__gt: <int>,
+                    depth: {1,2},
+                },
+            }
+        }
+
+        The "more" property describes the "id" required to do URL reversal on this endpoint, and the params that should
+        be passed as query parameters to get the next set of results for pagination.
+
         :param request: request object
         :param pk: id parent node
         :return: an object representing the parent with a pagination object as "children"
@@ -844,12 +864,16 @@ def retrieve(self, request, **kwargs):
                 # The parent of this descendant is the parent node
                 # for this query
                 desc_parent = parent
+                # When we request more results for pagination, we want to return
+                # both nodes at this level, and the nodes at the lower level
                 more_depth = 2
             elif desc["parent"] in children_by_id:
                 # Otherwise, check to see if our descendant's parent is in the
                 # children_by_id map - if it failed the first condition,
                 # it really should not fail this
                 desc_parent = children_by_id[desc["parent"]]
+                # When we request more results for pagination, we only want to return
+                # nodes at this level, and not any of its children
                 more_depth = 1
             else:
                 # If we get to here, we have a node that is not in the tree subsection we are

From 7b82e9db5d857b0e8df6386172530ee623e79a30 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Tue, 15 Jun 2021 08:10:54 -0700
Subject: [PATCH 08/12] Always return a clone of the cache to prevent the cache
 being modified.

---
 kolibri/core/assets/src/api-resources/contentNode.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js
index a21c57bbc15..a98ce7c1d54 100644
--- a/kolibri/core/assets/src/api-resources/contentNode.js
+++ b/kolibri/core/assets/src/api-resources/contentNode.js
@@ -115,7 +115,7 @@ export default new Resource({
   cache: {},
   fetchModel({ id }) {
     if (this.cache[id]) {
-      return Promise.resolve(this.cache[id]);
+      return Promise.resolve(cloneDeep(this.cache[id]));
     }
     return this.client({ url: this.modelUrl(id) }).then(response => {
       this.cacheData(response.data);

From 73fe54d184c4848b4442199be5091a67de537f1a Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Tue, 15 Jun 2021 08:38:56 -0700
Subject: [PATCH 09/12] Add tests for query parameters for contentnode tree
 viewset.

---
 .../core/content/test/test_channel_upgrade.py | 15 +++++--
 kolibri/core/content/test/test_content_app.py | 45 +++++++++++++++++++
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py
index 10f60f0ebc6..87256c614f4 100644
--- a/kolibri/core/content/test/test_channel_upgrade.py
+++ b/kolibri/core/content/test/test_channel_upgrade.py
@@ -61,7 +61,6 @@ def __init__(self, levels=3, num_children=5):
         try:
             self.load_data()
         except KeyError:
-            print("key error {}".format(self.levels))
             self.generate_new_tree()
             self.save_data()
 
@@ -87,10 +86,18 @@ def generate_new_tree(self):
             )
 
     def load_data(self):
-        data = copy.deepcopy(self.__TREE_CACHE[self.cache_key])
+        try:
+            data = copy.deepcopy(self.__TREE_CACHE[self.cache_key])
 
-        for key in self.tree_keys:
-            setattr(self, key, data[key])
+            for key in self.tree_keys:
+                setattr(self, key, data[key])
+        except KeyError:
+            print(
+                "No tree cache found for {} levels and {} children per level".format(
+                    self.levels, self.num_children
+                )
+            )
+            raise
 
     def save_data(self):
         data = {}
diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py
index 0c6b601bec6..d79088b69a0 100644
--- a/kolibri/core/content/test/test_content_app.py
+++ b/kolibri/core/content/test/test_content_app.py
@@ -383,6 +383,51 @@ def test_contentnode_tree_long(self):
         )
         self._recurse_and_assert([response.data], [root])
 
+    def test_contentnode_tree_depth_1(self):
+        root = content.ContentNode.objects.get(title="root")
+        response = self.client.get(
+            reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}),
+            data={"depth": 1},
+        )
+        self._recurse_and_assert([response.data], [root])
+
+    def test_contentnode_tree_lft__gt(self):
+        builder = ChannelBuilder(levels=2, num_children=30)
+        builder.insert_into_default_db()
+        content.ContentNode.objects.all().update(available=True)
+        root = content.ContentNode.objects.get(id=builder.root_node["id"])
+        lft__gt = content.ContentNode.objects.filter(parent=root)[24].rght
+        response = self.client.get(
+            reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}),
+            data={"lft__gt": lft__gt},
+        )
+        self.assertEqual(len(response.data["children"]["results"]), 5)
+        self.assertIsNone(response.data["children"]["more"])
+        first_node = content.ContentNode.objects.filter(parent=root)[25]
+        self._recurse_and_assert(
+            [response.data["children"]["results"][0]], [first_node], recursion_depth=1
+        )
+
+    def test_contentnode_tree_more(self):
+        builder = ChannelBuilder(levels=2, num_children=30)
+        builder.insert_into_default_db()
+        content.ContentNode.objects.all().update(available=True)
+        root = content.ContentNode.objects.get(id=builder.root_node["id"])
+        response = self.client.get(
+            reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id})
+        )
+        first_child = response.data["children"]["results"][0]
+        self.assertEqual(first_child["children"]["more"]["params"]["depth"], 1)
+        nested_page_response = self.client.get(
+            reverse(
+                "kolibri:core:contentnode_tree-detail",
+                kwargs={"pk": first_child["children"]["more"]["id"]},
+            ),
+            data=first_child["children"]["more"]["params"],
+        )
+        self.assertEqual(len(nested_page_response.data["children"]["results"]), 5)
+        self.assertIsNone(nested_page_response.data["children"]["more"])
+
     @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio")
     def test_contentnode_granular_network_import(self, stats_mock):
         c1 = content.ContentNode.objects.get(title="root")

From 5234a960b72a9289840b7b679dff8d7e0af8c093 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Tue, 15 Jun 2021 14:46:37 -0700
Subject: [PATCH 10/12] Properly limit ancestor batch size.

---
 kolibri/core/content/api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py
index cd735ac281b..20af6070b08 100644
--- a/kolibri/core/content/api.py
+++ b/kolibri/core/content/api.py
@@ -352,7 +352,9 @@ def consolidate(self, items, queryset):
 
             # We need to batch our queries for ancestors as the size of the expression tree
             # depends on the number of nodes that we are querying for.
-            ANCESTOR_BATCH_SIZE = 1000
+            # On Windows, the SQL parameter limit is 999, and an ancestors call can produce
+            # 3 parameters per node in the queryset, so this should max out the parameters at 750.
+            ANCESTOR_BATCH_SIZE = 250
 
             if len(items) > ANCESTOR_BATCH_SIZE:
 

From 5b2de43bc9505bcbcf26980482a977dd55049e4c Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Tue, 15 Jun 2021 15:10:25 -0700
Subject: [PATCH 11/12] Order by tag name to ensure consistent ordering of tags
 in API results.

---
 kolibri/core/content/api.py                   | 10 +++++++---
 kolibri/core/content/test/test_content_app.py |  6 +++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py
index 20af6070b08..526595dc35a 100644
--- a/kolibri/core/content/api.py
+++ b/kolibri/core/content/api.py
@@ -314,9 +314,13 @@ def get_related_data_maps(self, items, queryset):
 
         tags_map = {}
 
-        for t in models.ContentTag.objects.filter(tagged_content__in=queryset).values(
-            "tag_name",
-            "tagged_content",
+        for t in (
+            models.ContentTag.objects.filter(tagged_content__in=queryset)
+            .values(
+                "tag_name",
+                "tagged_content",
+            )
+            .order_by("tag_name")
         ):
             if t["tagged_content"] not in tags_map:
                 tags_map[t["tagged_content"]] = [t["tag_name"]]
diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py
index d79088b69a0..b452af17a98 100644
--- a/kolibri/core/content/test/test_content_app.py
+++ b/kolibri/core/content/test/test_content_app.py
@@ -310,7 +310,11 @@ def _assert_node(self, actual, expected):
                 "rght": expected.rght,
                 "tree_id": expected.tree_id,
                 "ancestors": list(expected.get_ancestors().values("id", "title")),
-                "tags": list(expected.tags.all().values_list("tag_name", flat=True)),
+                "tags": list(
+                    expected.tags.all()
+                    .order_by("tag_name")
+                    .values_list("tag_name", flat=True)
+                ),
                 "assessmentmetadata": assessmentmetadata,
                 "is_leaf": expected.kind != "topic",
                 "files": files,

From 8225c03090dc42a249677ed7ce846c6eb49722f1 Mon Sep 17 00:00:00 2001
From: Richard Tibbles <richard@learningequality.org>
Date: Thu, 17 Jun 2021 13:36:06 -0700
Subject: [PATCH 12/12] Skip large data insertions on postgres.

---
 kolibri/core/content/test/test_content_app.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py
index b452af17a98..85b263f3635 100644
--- a/kolibri/core/content/test/test_content_app.py
+++ b/kolibri/core/content/test/test_content_app.py
@@ -2,10 +2,12 @@
 To run this test, type this in command line <kolibri manage test -- kolibri.core.content>
 """
 import datetime
+import unittest
 import uuid
 
 import mock
 import requests
+from django.conf import settings
 from django.core.cache import cache
 from django.core.urlresolvers import reverse
 from django.test import TestCase
@@ -333,6 +335,11 @@ def test_contentnode_list(self):
         self.assertEqual(len(response.data), expected_output)
         self._assert_nodes(response.data, nodes)
 
+    @unittest.skipIf(
+        getattr(settings, "DATABASES")["default"]["ENGINE"]
+        == "django.db.backends.postgresql",
+        "Skipping postgres as not as vulnerable to large queries and large insertions are less performant",
+    )
     def test_contentnode_list_long(self):
         # This will make > 1000 nodes which should test our ancestor batching behaviour
         builder = ChannelBuilder(num_children=10)
@@ -377,6 +384,11 @@ def test_contentnode_tree(self):
         )
         self._recurse_and_assert([response.data], [root])
 
+    @unittest.skipIf(
+        getattr(settings, "DATABASES")["default"]["ENGINE"]
+        == "django.db.backends.postgresql",
+        "Skipping postgres as not as vulnerable to large queries and large insertions are less performant",
+    )
     def test_contentnode_tree_long(self):
         builder = ChannelBuilder(levels=2, num_children=30)
         builder.insert_into_default_db()
@@ -395,6 +407,11 @@ def test_contentnode_tree_depth_1(self):
         )
         self._recurse_and_assert([response.data], [root])
 
+    @unittest.skipIf(
+        getattr(settings, "DATABASES")["default"]["ENGINE"]
+        == "django.db.backends.postgresql",
+        "Skipping postgres as not as vulnerable to large queries and large insertions are less performant",
+    )
     def test_contentnode_tree_lft__gt(self):
         builder = ChannelBuilder(levels=2, num_children=30)
         builder.insert_into_default_db()
@@ -412,6 +429,11 @@ def test_contentnode_tree_lft__gt(self):
             [response.data["children"]["results"][0]], [first_node], recursion_depth=1
         )
 
+    @unittest.skipIf(
+        getattr(settings, "DATABASES")["default"]["ENGINE"]
+        == "django.db.backends.postgresql",
+        "Skipping postgres as not as vulnerable to large queries and large insertions are less performant",
+    )
     def test_contentnode_tree_more(self):
         builder = ChannelBuilder(levels=2, num_children=30)
         builder.insert_into_default_db()