From d0637883e4dee46a7303a8a0bc7c1ff52f9d2023 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 29 May 2021 06:17:44 -0700 Subject: [PATCH 01/12] Add extensive testing to content node viewset. Slightly refactor to handle edge cases and reduce JOINs. --- kolibri/core/content/api.py | 205 ++++++++++-------- kolibri/core/content/test/test_content_app.py | 109 +++++++++- 2 files changed, 225 insertions(+), 89 deletions(-) diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index a84904f68de..d6d41d18176 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -210,25 +210,6 @@ class OptionalPageNumberPagination(ValuesViewsetPageNumberPagination): page_size_query_param = "page_size" -def map_lang(obj): - keys = ["id", "lang_code", "lang_subcode", "lang_name", "lang_direction"] - - lower_case = set(["id", "lang_code", "lang_subcode"]) - - output = {} - - for key in keys: - output[key] = obj.pop("lang__" + key) - if key in lower_case and output[key]: - output[key] = output[key].lower() - - if not any(output.values()): - # All keys are null so return None - return None - - return output - - def map_file(file): file["checksum"] = file.pop("local_file__id") file["available"] = file.pop("local_file__available") @@ -241,15 +222,17 @@ def map_file(file): "extension": file["extension"], } ) - file["lang"] = map_lang(file) return file -@method_decorator(cache_forever, name="dispatch") -class ContentNodeViewset(ReadOnlyValuesViewset): +class BaseContentNodeMixin(object): + """ + A base mixin for viewsets that need to return the same format of data + serialization for ContentNodes. + """ + filter_backends = (DjangoFilterBackend,) filter_class = ContentNodeFilter - pagination_class = OptionalPageNumberPagination values = ( "id", @@ -260,12 +243,7 @@ class ContentNodeViewset(ReadOnlyValuesViewset): "content_id", "description", "kind", - # Language keys - "lang__id", - "lang__lang_code", - "lang__lang_subcode", - "lang__lang_name", - "lang__lang_direction", + "lang_id", "license_description", "license_name", "license_owner", @@ -279,29 +257,28 @@ class ContentNodeViewset(ReadOnlyValuesViewset): "tree_id", ) - field_map = {"lang": map_lang} - - def consolidate(self, items, queryset): - output = [] + def get_queryset(self): + return models.ContentNode.objects.filter(available=True) - if items: - assessmentmetadata = { - a["contentnode"]: a - for a in models.AssessmentMetaData.objects.filter( - contentnode__in=queryset - ).values( - "assessment_item_ids", - "number_of_assessments", - "mastery_model", - "randomize", - "is_manipulable", - "contentnode", - ) - } + def get_related_data_maps(self, items, queryset): + assessmentmetadata_map = { + a["contentnode"]: a + for a in models.AssessmentMetaData.objects.filter( + contentnode__in=queryset + ).values( + "assessment_item_ids", + "number_of_assessments", + "mastery_model", + "randomize", + "is_manipulable", + "contentnode", + ) + } - files = {} + files_map = {} - for f in models.File.objects.filter(contentnode__in=queryset).values( + files = list( + models.File.objects.filter(contentnode__in=queryset).values( "id", "contentnode", "local_file__id", @@ -310,44 +287,102 @@ def consolidate(self, items, queryset): "local_file__file_size", "local_file__extension", "preset", - "lang__id", - "lang__lang_code", - "lang__lang_subcode", - "lang__lang_name", - "lang__lang_direction", + "lang_id", "supplementary", "thumbnail", - ): - if f["contentnode"] not in files: - files[f["contentnode"]] = [] - files[f["contentnode"]].append(map_file(f)) + ) + ) - ancestors = queryset.get_ancestors().values( - "id", "title", "lft", "rght", "tree_id" + lang_ids = set([obj["lang_id"] for obj in items + files]) + + languages_map = { + lang["id"]: lang + for lang in models.Language.objects.filter(id__in=lang_ids).values( + "id", "lang_code", "lang_subcode", "lang_name", "lang_direction" ) + } - tags = {} + for f in files: + contentnode_id = f.pop("contentnode") + if contentnode_id not in files_map: + files_map[contentnode_id] = [] + lang_id = f.pop("lang_id") + f["lang"] = languages_map.get(lang_id) + files_map[contentnode_id].append(map_file(f)) + + tags_map = {} + + for t in models.ContentTag.objects.filter(tagged_content__in=queryset).values( + "tag_name", + "tagged_content", + ): + if t["tagged_content"] not in tags_map: + tags_map[t["tagged_content"]] = [t["tag_name"]] + else: + tags_map[t["tagged_content"]].append(t["tag_name"]) - for t in models.ContentTag.objects.filter( - tagged_content__in=queryset - ).values( - "tag_name", - "tagged_content", - ): - if t["tagged_content"] not in tags: - tags[t["tagged_content"]] = [t["tag_name"]] - else: - tags[t["tagged_content"]].append(t["tag_name"]) - - for item in items: - item["assessmentmetadata"] = assessmentmetadata.get(item["id"]) - item["tags"] = tags.get(item["id"], []) - item["files"] = files.get(item["id"], []) - - lft = item.pop("lft") - rght = item.pop("rght") - tree_id = item.pop("tree_id") - item["ancestors"] = list( + return assessmentmetadata_map, files_map, languages_map, tags_map + + def process_items(self, items, queryset, ancestor_lookup_method=None): + output = [] + assessmentmetadata, files_map, languages_map, tags = self.get_related_data_maps( + items, queryset + ) + for item in items: + item["assessmentmetadata"] = assessmentmetadata.get(item["id"]) + item["tags"] = tags.get(item["id"], []) + item["files"] = files_map.get(item["id"], []) + lang_id = item.pop("lang_id") + item["lang"] = languages_map.get(lang_id) + if ancestor_lookup_method: + item["ancestors"] = ancestor_lookup_method(item) + item["is_leaf"] = item.get("kind") != content_kinds.TOPIC + output.append(item) + return output + + +@method_decorator(cache_forever, name="dispatch") +class ContentNodeViewset(BaseContentNodeMixin, ReadOnlyValuesViewset): + pagination_class = OptionalPageNumberPagination + + def consolidate(self, items, queryset): + if items: + + # We need to batch our queries for ancestors as the size of the expression tree + # depends on the number of nodes that we are querying for. + ANCESTOR_BATCH_SIZE = 1000 + + if len(items) > ANCESTOR_BATCH_SIZE: + + ancestors_map = {} + + for i in range(0, len(items), ANCESTOR_BATCH_SIZE): + + for anc in ( + models.ContentNode.objects.filter( + id__in=[ + item["id"] + for item in items[i : i + ANCESTOR_BATCH_SIZE] + ] + ) + .get_ancestors() + .values("id", "title", "lft", "rght", "tree_id") + ): + ancestors_map[anc["id"]] = anc + + ancestors = sorted(ancestors_map.values(), key=lambda x: x["lft"]) + else: + ancestors = list( + queryset.get_ancestors() + .values("id", "title", "lft", "rght", "tree_id") + .order_by("lft") + ) + + def ancestor_lookup(item): + lft = item.get("lft") + rght = item.get("rght") + tree_id = item.get("tree_id") + return list( filter( lambda x: x["lft"] < lft and x["rght"] > rght @@ -355,12 +390,10 @@ def consolidate(self, items, queryset): ancestors, ) ) - item["is_leaf"] = item.get("kind") != content_kinds.TOPIC - output.append(item) - return output - def get_queryset(self): - return models.ContentNode.objects.filter(available=True) + return self.process_items(items, queryset, ancestor_lookup) + + return [] @list_route(methods=["get"]) def descendants(self, request): diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index dd58962010d..798dd0de4fd 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -18,6 +18,7 @@ from kolibri.core.auth.models import FacilityUser from kolibri.core.auth.test.helpers import provision_device from kolibri.core.content import models as content +from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder from kolibri.core.device.models import DevicePermissions from kolibri.core.device.models import DeviceSettings from kolibri.core.logger.models import ContentSessionLog @@ -233,13 +234,115 @@ def test_related_filter(self): ) self.assertEqual(response.data[0]["title"], "c2") + def _assert_nodes(self, data, nodes): + def map_language(lang): + if lang: + return { + f: getattr(lang, f) + for f in [ + "id", + "lang_code", + "lang_subcode", + "lang_name", + "lang_direction", + ] + } + + for actual, expected in zip(data, nodes): + assessmentmetadata = ( + expected.assessmentmetadata.all() + .values( + "assessment_item_ids", + "number_of_assessments", + "mastery_model", + "randomize", + "is_manipulable", + "contentnode", + ) + .first() + ) + files = [] + for f in expected.files.all(): + "local_file__id", + "local_file__available", + "local_file__file_size", + "local_file__extension", + "lang_id", + file = {} + for field in [ + "id", + "priority", + "preset", + "supplementary", + "thumbnail", + ]: + file[field] = getattr(f, field) + file["checksum"] = f.local_file_id + for field in [ + "available", + "file_size", + "extension", + ]: + file[field] = getattr(f.local_file, field) + file["lang"] = map_language(f.lang) + file["storage_url"] = f.get_storage_url() + files.append(file) + self.assertEqual( + actual, + { + "id": expected.id, + "available": expected.available, + "author": expected.author, + "channel_id": expected.channel_id, + "coach_content": expected.coach_content, + "content_id": expected.content_id, + "description": expected.description, + "kind": expected.kind, + "lang": map_language(expected.lang), + "license_description": expected.license_description, + "license_name": expected.license_name, + "license_owner": expected.license_owner, + "num_coach_contents": expected.num_coach_contents, + "options": expected.options, + "parent": expected.parent_id, + "sort_order": expected.sort_order, + "title": expected.title, + "lft": expected.lft, + "rght": expected.rght, + "tree_id": expected.tree_id, + "ancestors": list( + expected.get_ancestors().values( + "id", "title", "lft", "rght", "tree_id" + ) + ), + "tags": list( + expected.tags.all().values_list("tag_name", flat=True) + ), + "assessmentmetadata": assessmentmetadata, + "is_leaf": expected.kind != "topic", + "files": files, + }, + ) + def test_contentnode_list(self): root = content.ContentNode.objects.get(title="root") - expected_output = ( - root.get_descendants(include_self=True).filter(available=True).count() - ) + nodes = root.get_descendants(include_self=True).filter(available=True) + expected_output = len(nodes) + response = self.client.get(reverse("kolibri:core:contentnode-list")) + self.assertEqual(len(response.data), expected_output) + self._assert_nodes(response.data, nodes) + + def test_contentnode_list_long(self): + # This will make > 1000 nodes which should test our ancestor batching behaviour + builder = ChannelBuilder(levels=4) + builder.insert_into_default_db() + content.ContentNode.objects.update(available=True) + nodes = content.ContentNode.objects.filter(available=True) + expected_output = len(nodes) + self.assertGreater(expected_output, 1000) response = self.client.get(reverse("kolibri:core:contentnode-list")) self.assertEqual(len(response.data), expected_output) + self._assert_nodes(response.data, nodes) @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio") def test_contentnode_granular_network_import(self, stats_mock): From 4e91168fa8e36ec4e50c6b0a0fb43b731b8808aa Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 29 May 2021 06:18:42 -0700 Subject: [PATCH 02/12] Factor Retrieve and List out into mixins. --- kolibri/core/api.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/kolibri/core/api.py b/kolibri/core/api.py index 04a9f6d2533..531474341a3 100644 --- a/kolibri/core/api.py +++ b/kolibri/core/api.py @@ -117,7 +117,7 @@ def remove_invalid_fields(self, queryset, fields, view, request): return ordering -class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): +class BaseValuesViewset(viewsets.GenericViewSet): """ A viewset that uses a values call to get all model/queryset data in a single database query, rather than delegating serialization to a @@ -125,7 +125,7 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): """ # A tuple of values to get from the queryset - values = None + # values = None # A map of target_key, source_key where target_key is the final target_key that will be set # and source_key is the key on the object retrieved from the values call. # Alternatively, the source_key can be a callable that will be passed the object and return @@ -134,8 +134,8 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): field_map = {} def __init__(self, *args, **kwargs): - viewset = super(ReadOnlyValuesViewset, self).__init__(*args, **kwargs) - if not isinstance(self.values, tuple): + viewset = super(BaseValuesViewset, self).__init__(*args, **kwargs) + if not hasattr(self, "values") or not isinstance(self.values, tuple): raise TypeError("values must be defined as a tuple") self._values = tuple(self.values) if not isinstance(self.field_map, dict): @@ -205,6 +205,18 @@ def serialize(self, queryset): list(map(self._map_fields, values_queryset or [])), queryset ) + def serialize_object(self, **filter_kwargs): + queryset = self.get_queryset() + try: + filter_kwargs = filter_kwargs or self._get_lookup_filter() + return self.serialize(queryset.filter(**filter_kwargs))[0] + except IndexError: + raise Http404( + "No %s matches the given query." % queryset.model._meta.object_name + ) + + +class ListModelMixin(object): def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) @@ -218,20 +230,16 @@ def list(self, request, *args, **kwargs): return Response(self.serialize(queryset)) - def serialize_object(self, **filter_kwargs): - queryset = self.get_queryset() - try: - filter_kwargs = filter_kwargs or self._get_lookup_filter() - return self.serialize(queryset.filter(**filter_kwargs))[0] - except IndexError: - raise Http404( - "No %s matches the given query." % queryset.model._meta.object_name - ) +class RetrieveModelMixin(object): def retrieve(self, request, *args, **kwargs): return Response(self.serialize_object()) +class ReadOnlyValuesViewset(BaseValuesViewset, RetrieveModelMixin, ListModelMixin): + pass + + class CreateModelMixin(BaseCreateModelMixin): def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) From 2ed1c7f8ea9f63c6b6a21d282cac1dd42a49c050 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 29 May 2021 07:09:12 -0700 Subject: [PATCH 03/12] Add contentnode_tree endpoint. Add tests. --- kolibri/core/content/api.py | 176 +++++++++++++- kolibri/core/content/api_urls.py | 4 + .../core/content/test/test_channel_upgrade.py | 5 +- kolibri/core/content/test/test_content_app.py | 215 +++++++++++------- 4 files changed, 305 insertions(+), 95 deletions(-) diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index d6d41d18176..3dba0b5d46d 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -5,6 +5,7 @@ import requests from django.core.cache import cache +from django.core.exceptions import ValidationError from django.db.models import Exists from django.db.models import OuterRef from django.db.models import Q @@ -31,6 +32,7 @@ from rest_framework.generics import get_object_or_404 from rest_framework.response import Response +from kolibri.core.api import BaseValuesViewset from kolibri.core.api import ReadOnlyValuesViewset from kolibri.core.auth.middleware import session_exempt from kolibri.core.content import models @@ -383,11 +385,14 @@ def ancestor_lookup(item): rght = item.get("rght") tree_id = item.get("tree_id") return list( - filter( - lambda x: x["lft"] < lft - and x["rght"] > rght - and x["tree_id"] == tree_id, - ancestors, + map( + lambda x: {"id": x["id"], "title": x["title"]}, + filter( + lambda x: x["lft"] < lft + and x["rght"] > rght + and x["tree_id"] == tree_id, + ancestors, + ), ) ) @@ -719,6 +724,167 @@ def resume(self, request, **kwargs): return Response(self.serialize(queryset)) +@method_decorator(cache_forever, name="dispatch") +class ContentNodeTreeViewset(BaseContentNodeMixin, BaseValuesViewset): + # We fix the page size at 25 for a couple of reasons: + # 1. At this size the query appears to be relatively performant, and will deliver most of the tree + # data to the frontend in a single query. + # 2. In the case where the tree topology means that this will not produce the full query, the limit of + # 25 immediate children and 25 grand children means that we are at most using 1 + 25 + 25 * 25 = 651 + # SQL parameters in the query to get the nodes for serialization - this means that we should not ever + # run into an issue where we hit a SQL parameters limit in the queries in here. + # If we find that this page size is too high, we should lower it, but for the reasons noted above, we + # should not raise it. + page_size = 25 + + def consolidate(self, items, queryset): + if items: + return self.process_items(items, queryset) + return [] + + def validate_and_return_params(self, request): + depth = request.query_params.get("depth", 2) + lft__gt = request.query_params.get("lft__gt") + + try: + depth = int(depth) + if 1 > depth or depth > 2: + raise ValueError + except ValueError: + raise ValidationError("Depth query parameter must have the value 1 or 2") + + if lft__gt is not None: + try: + lft__gt = int(lft__gt) + if 1 > lft__gt: + raise ValueError + except ValueError: + raise ValidationError( + "lft__gt query parameter must be a positive integer if specified" + ) + return depth, lft__gt + + def get_grandchild_ids(self, child_ids, depth): + if depth == 2: + # Use this to keep track of how many grand children we have accumulated per child of the parent node + gc_by_parent = {} + # Iterate through the grand children of the parent node in lft order so we follow the tree traversal order + for gc in ( + self.filter_queryset(self.get_queryset()) + .filter(parent_id__in=child_ids) + .values("id", "parent_id") + .order_by("lft") + ): + # If we have not already added a list of nodes to the gc_by_parent map, initialize it here + if gc["parent_id"] not in gc_by_parent: + gc_by_parent[gc["parent_id"]] = [] + # If the number of grand children for a specific child node is less than the page size + # then we keep on adding them to both lists + # If not, we just skip this node, as we have already hit the page limit for the node that is + # its parent. + if len(gc_by_parent[gc["parent_id"]]) < self.page_size: + gc_by_parent[gc["parent_id"]].append(gc["id"]) + yield gc["id"] + + def retrieve(self, request, **kwargs): + """ + A nested, paginated representation of the children and grandchildren of a specific node + + :param request: request object + :param pk: id parent node + :return: an object representing the parent with a pagination object as "children" + """ + + # Get the model for the parent node here - we do this so that we trigger a 404 immediately if the node + # does not exist (or exists but is not available), and so that we have the Django model object later to + # use for the `get_ancestors` MPTT method. + parent_model = self.get_object() + + depth, lft__gt = self.validate_and_return_params(request) + + # Get a list of child_ids of the parent node up to the pagination limit + child_qs = self.get_queryset().filter(parent=parent_model) + if lft__gt is not None: + child_qs = child_qs.filter(lft__gt=lft__gt) + child_ids = child_qs.values_list("id", flat=True).order_by("lft")[ + 0 : self.page_size + ] + + # Get a flat list of ids for grandchildren we will be returning + gc_ids = self.get_grandchild_ids(child_ids, depth) + + # We explicitly order by lft here, so that the nodes are in tree traversal order, so we can iterate over them and build + # out our nested representation, being sure that any ancestors have already been processed. + nodes = self.serialize( + self.filter_queryset(self.get_queryset()) + .filter(Q(id=parent_model.id) | Q(id__in=child_ids) | Q(id__in=gc_ids)) + .order_by("lft") + ) + + # The serialized parent representation is the first node in the lft order + parent = nodes[0] + + # Do a query to get the parent's ancestors - for all other nodes, we can manually build + # the ancestors from the tree topology that we already know! + parent["ancestors"] = list(parent_model.get_ancestors().values("id", "title")) + + # Use this to keep track of direct children of the parent node + # this will allow us to do lookups for the grandchildren, in order + # to insert them into the "children" property + children_by_id = {} + + # Iterate through all the descendants that we have serialized + for desc in nodes[1:]: + # First check to see whether it is a direct child of the + # parent node that we initially queried + if desc["parent"] == parent_model.id: + # If so add them to the children_by_id map so that + # grandchildren descendants can reference them later + children_by_id[desc["id"]] = desc + # The parent of this descendant is the parent node + # for this query + desc_parent = parent + more_depth = 2 + elif desc["parent"] in children_by_id: + # Otherwise, check to see if our descendant's parent is in the + # children_by_id map - if it failed the first condition, + # it really should not fail this + desc_parent = children_by_id[desc["parent"]] + more_depth = 1 + else: + # If we get to here, we have a node that is not in the tree subsection we are + # trying to return, so we just ignore it. This shouldn't happen. + continue + if "children" not in desc_parent: + # If the parent of the descendant does not already have its `children` property + # initialized, do so here. + desc_parent["children"] = {"results": [], "more": None} + # The ancestors field for the descendant will be its parents ancestors, plus its parent! + desc["ancestors"] = desc_parent["ancestors"] + [ + {"id": desc_parent["id"], "title": desc_parent["title"]} + ] + # Add this descendant to the results for the children pagination object + desc_parent["children"]["results"].append(desc) + # Only bother updating the URL for more if we have hit the page size limit + # otherwise it will just continue to be None + if len(desc_parent["children"]["results"]) == self.page_size: + # Any subsequent queries to get siblings of this node can restrict themselves + # to looking for nodes with lft greater than the rght value of this descendant + lft__gt = desc["rght"] + # If the rght value of this descendant is exactly 1 less than the rght value of + # its parent, then there are no more children that can be queried. + # So only in this instance do we update the more URL + if desc["rght"] + 1 < desc_parent["rght"]: + params = request.query_params.copy() + params["lft__gt"] = lft__gt + params["depth"] = more_depth + desc_parent["children"]["more"] = { + "id": desc_parent["id"], + "params": params, + } + return Response(parent) + + # return the result of and-ing a list of queries def intersection(queries): if queries: diff --git a/kolibri/core/content/api_urls.py b/kolibri/core/content/api_urls.py index edc2bc5ef5d..c02e653fc82 100644 --- a/kolibri/core/content/api_urls.py +++ b/kolibri/core/content/api_urls.py @@ -6,6 +6,7 @@ from .api import ContentNodeGranularViewset from .api import ContentNodeProgressViewset from .api import ContentNodeSearchViewset +from .api import ContentNodeTreeViewset from .api import ContentNodeViewset from .api import FileViewset from .api import RemoteChannelViewSet @@ -14,6 +15,9 @@ router.register("channel", ChannelMetadataViewSet, base_name="channel") router.register(r"contentnode", ContentNodeViewset, base_name="contentnode") +router.register( + r"contentnode_tree", ContentNodeTreeViewset, base_name="contentnode_tree" +) router.register( r"contentnode_search", ContentNodeSearchViewset, base_name="contentnode_search" ) diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py index e814ba58b7e..834bb868c7a 100644 --- a/kolibri/core/content/test/test_channel_upgrade.py +++ b/kolibri/core/content/test/test_channel_upgrade.py @@ -52,8 +52,9 @@ class ChannelBuilder(object): "root_node", ) - def __init__(self, levels=3): + def __init__(self, levels=3, num_children=5): self.levels = levels + self.num_children = num_children self.modified = set() @@ -297,7 +298,7 @@ def data(self): def recurse_and_generate(self, parent_id, levels): children = [] - for i in range(0, 5): + for i in range(0, self.num_children): if levels == 0: node = self.generate_leaf(parent_id) else: diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index 798dd0de4fd..0c6b601bec6 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -234,95 +234,92 @@ def test_related_filter(self): ) self.assertEqual(response.data[0]["title"], "c2") - def _assert_nodes(self, data, nodes): - def map_language(lang): - if lang: - return { - f: getattr(lang, f) - for f in [ - "id", - "lang_code", - "lang_subcode", - "lang_name", - "lang_direction", - ] - } - - for actual, expected in zip(data, nodes): - assessmentmetadata = ( - expected.assessmentmetadata.all() - .values( - "assessment_item_ids", - "number_of_assessments", - "mastery_model", - "randomize", - "is_manipulable", - "contentnode", - ) - .first() - ) - files = [] - for f in expected.files.all(): - "local_file__id", - "local_file__available", - "local_file__file_size", - "local_file__extension", - "lang_id", - file = {} - for field in [ + def map_language(self, lang): + if lang: + return { + f: getattr(lang, f) + for f in [ "id", - "priority", - "preset", - "supplementary", - "thumbnail", - ]: - file[field] = getattr(f, field) - file["checksum"] = f.local_file_id - for field in [ - "available", - "file_size", - "extension", - ]: - file[field] = getattr(f.local_file, field) - file["lang"] = map_language(f.lang) - file["storage_url"] = f.get_storage_url() - files.append(file) - self.assertEqual( - actual, - { - "id": expected.id, - "available": expected.available, - "author": expected.author, - "channel_id": expected.channel_id, - "coach_content": expected.coach_content, - "content_id": expected.content_id, - "description": expected.description, - "kind": expected.kind, - "lang": map_language(expected.lang), - "license_description": expected.license_description, - "license_name": expected.license_name, - "license_owner": expected.license_owner, - "num_coach_contents": expected.num_coach_contents, - "options": expected.options, - "parent": expected.parent_id, - "sort_order": expected.sort_order, - "title": expected.title, - "lft": expected.lft, - "rght": expected.rght, - "tree_id": expected.tree_id, - "ancestors": list( - expected.get_ancestors().values( - "id", "title", "lft", "rght", "tree_id" - ) - ), - "tags": list( - expected.tags.all().values_list("tag_name", flat=True) - ), - "assessmentmetadata": assessmentmetadata, - "is_leaf": expected.kind != "topic", - "files": files, - }, + "lang_code", + "lang_subcode", + "lang_name", + "lang_direction", + ] + } + + def _assert_node(self, actual, expected): + assessmentmetadata = ( + expected.assessmentmetadata.all() + .values( + "assessment_item_ids", + "number_of_assessments", + "mastery_model", + "randomize", + "is_manipulable", + "contentnode", ) + .first() + ) + files = [] + for f in expected.files.all(): + "local_file__id", + "local_file__available", + "local_file__file_size", + "local_file__extension", + "lang_id", + file = {} + for field in [ + "id", + "priority", + "preset", + "supplementary", + "thumbnail", + ]: + file[field] = getattr(f, field) + file["checksum"] = f.local_file_id + for field in [ + "available", + "file_size", + "extension", + ]: + file[field] = getattr(f.local_file, field) + file["lang"] = self.map_language(f.lang) + file["storage_url"] = f.get_storage_url() + files.append(file) + self.assertEqual( + actual, + { + "id": expected.id, + "available": expected.available, + "author": expected.author, + "channel_id": expected.channel_id, + "coach_content": expected.coach_content, + "content_id": expected.content_id, + "description": expected.description, + "kind": expected.kind, + "lang": self.map_language(expected.lang), + "license_description": expected.license_description, + "license_name": expected.license_name, + "license_owner": expected.license_owner, + "num_coach_contents": expected.num_coach_contents, + "options": expected.options, + "parent": expected.parent_id, + "sort_order": expected.sort_order, + "title": expected.title, + "lft": expected.lft, + "rght": expected.rght, + "tree_id": expected.tree_id, + "ancestors": list(expected.get_ancestors().values("id", "title")), + "tags": list(expected.tags.all().values_list("tag_name", flat=True)), + "assessmentmetadata": assessmentmetadata, + "is_leaf": expected.kind != "topic", + "files": files, + }, + ) + + def _assert_nodes(self, data, nodes): + for actual, expected in zip(data, nodes): + self._assert_node(actual, expected) def test_contentnode_list(self): root = content.ContentNode.objects.get(title="root") @@ -334,7 +331,7 @@ def test_contentnode_list(self): def test_contentnode_list_long(self): # This will make > 1000 nodes which should test our ancestor batching behaviour - builder = ChannelBuilder(levels=4) + builder = ChannelBuilder(num_children=10) builder.insert_into_default_db() content.ContentNode.objects.update(available=True) nodes = content.ContentNode.objects.filter(available=True) @@ -344,6 +341,48 @@ def test_contentnode_list_long(self): self.assertEqual(len(response.data), expected_output) self._assert_nodes(response.data, nodes) + def _recurse_and_assert(self, data, nodes, recursion_depth=0): + for actual, expected in zip(data, nodes): + children = actual.pop("children", None) + self._assert_node(actual, expected) + if children: + child_nodes = content.ContentNode.objects.filter( + available=True, parent=expected + ) + if children["more"] is None: + self.assertEqual(len(child_nodes), len(children["results"])) + else: + self.assertGreater(len(child_nodes), len(children["results"])) + self.assertEqual(children["more"]["id"], expected.id) + self.assertEqual( + children["more"]["params"]["lft__gt"], child_nodes[24].rght + ) + self.assertEqual( + children["more"]["params"]["depth"], 2 - recursion_depth + ) + self._recurse_and_assert( + children["results"], + child_nodes, + recursion_depth=recursion_depth + 1, + ) + + def test_contentnode_tree(self): + root = content.ContentNode.objects.get(title="root") + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + self._recurse_and_assert([response.data], [root]) + + def test_contentnode_tree_long(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + self._recurse_and_assert([response.data], [root]) + @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio") def test_contentnode_granular_network_import(self, stats_mock): c1 = content.ContentNode.objects.get(title="root") From f862df12dfabca8dd88c51a56a569d700ce97bc6 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Sat, 29 May 2021 07:10:00 -0700 Subject: [PATCH 04/12] Add support for tree endpoint to contentnode resource. Simplify caching. --- .../assets/src/api-resources/contentNode.js | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js index e8f5bdbca8a..f8eb0213260 100644 --- a/kolibri/core/assets/src/api-resources/contentNode.js +++ b/kolibri/core/assets/src/api-resources/contentNode.js @@ -1,5 +1,8 @@ +import isPlainObject from 'lodash/isPlainObject'; import { Resource } from 'kolibri.lib.apiResource'; import Store from 'kolibri.coreVue.vuex.store'; +import urls from 'kolibri.urls'; +import cloneDeep from '../cloneDeep'; export default new Resource({ name: 'contentnode', @@ -34,4 +37,51 @@ export default new Resource({ fetchNextSteps(getParams) { return this.fetchDetailCollection('next_steps', Store.getters.currentUserId, getParams); }, + cache: {}, + fetchModel({ id }) { + if (this.cache[id]) { + return Promise.resolve(this.cache[id]); + } + return this.client({ url: this.modelUrl(id) }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + cacheData(data) { + if (Array.isArray(data)) { + for (let model of data) { + this.cacheData(model); + } + } else if (isPlainObject(data)) { + if (data[this.idKey]) { + this.cache[data[this.idKey]] = Object.assign( + this.cache[data[this.idKey]] || {}, + cloneDeep(data) + ); + if (data.children) { + this.cacheData(data.children); + } + } else if (data.results) { + for (let model of data.results) { + this.cacheData(model); + } + } + } + }, + fetchCollection(params) { + return this.client({ url: this.collectionUrl(), params }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + fetchTree(id, params) { + const url = urls['kolibri:core:contentnode_tree_detail'](id); + return this.client({ url, params }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + fetchMoreTree({ id, params }) { + return this.fetchTree(id, params); + }, }); From 9a0cc7079e62b4c1833b3fb7497bff093116aaa4 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Fri, 4 Jun 2021 14:06:04 -0700 Subject: [PATCH 05/12] Update ChannelBuilder caching to prevent weird cross-test issues. --- kolibri/core/content/test/test_channel_upgrade.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py index 834bb868c7a..10f60f0ebc6 100644 --- a/kolibri/core/content/test/test_channel_upgrade.py +++ b/kolibri/core/content/test/test_channel_upgrade.py @@ -67,6 +67,10 @@ def __init__(self, levels=3, num_children=5): self.generate_nodes_from_root_node() + @property + def cache_key(self): + return "{}_{}".format(self.levels, self.num_children) + def generate_new_tree(self): self.channel = self.channel_data() self.files = {} @@ -83,7 +87,7 @@ def generate_new_tree(self): ) def load_data(self): - data = copy.deepcopy(self.__TREE_CACHE[self.levels]) + data = copy.deepcopy(self.__TREE_CACHE[self.cache_key]) for key in self.tree_keys: setattr(self, key, data[key]) @@ -94,7 +98,7 @@ def save_data(self): for key in self.tree_keys: data[key] = getattr(self, key) - self.__TREE_CACHE[self.levels] = copy.deepcopy(data) + self.__TREE_CACHE[self.cache_key] = copy.deepcopy(data) def generate_nodes_from_root_node(self): self._django_nodes = ContentNode.objects.build_tree_nodes(self.root_node) From a12b3da7645cf70c4be3c07b0cc3577f81977ab0 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 14 Jun 2021 17:35:04 -0700 Subject: [PATCH 06/12] Add rudimentary automatic serializer generation for read only values viewsets. Only used for DRF YASG documentation generation. --- kolibri/core/api.py | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/kolibri/core/api.py b/kolibri/core/api.py index 531474341a3..6a8514553e1 100644 --- a/kolibri/core/api.py +++ b/kolibri/core/api.py @@ -10,6 +10,7 @@ from rest_framework.mixins import DestroyModelMixin from rest_framework.mixins import UpdateModelMixin as BaseUpdateModelMixin from rest_framework.response import Response +from rest_framework.serializers import ModelSerializer from rest_framework.serializers import Serializer from rest_framework.serializers import UUIDField from rest_framework.serializers import ValidationError @@ -143,11 +144,53 @@ def __init__(self, *args, **kwargs): self._field_map = self.field_map.copy() return viewset + def generate_serializer(self): + queryset = getattr(self, "queryset", None) + if queryset is None: + try: + queryset = self.get_queryset() + except Exception: + pass + model = getattr(queryset, "model", None) + if model is None: + return Serializer + mapped_fields = {v: k for k, v in self.field_map.items() if isinstance(v, str)} + fields = [] + extra_kwargs = {} + for value in self.values: + try: + model._meta.get_field(value) + if value in mapped_fields: + extra_kwargs[mapped_fields[value]] = {"source": value} + value = mapped_fields[value] + fields.append(value) + except Exception: + pass + + meta = type( + "Meta", + (object,), + { + "fields": fields, + "read_only_fields": fields, + "model": model, + "extra_kwargs": extra_kwargs, + }, + ) + CustomSerializer = type( + "{}Serializer".format(self.__class__.__name__), + (ModelSerializer,), + {"Meta": meta}, + ) + + return CustomSerializer + def get_serializer_class(self): if self.serializer_class is not None: return self.serializer_class # Hack to prevent the renderer logic from breaking completely. - return Serializer + self.__class__.serializer_class = self.generate_serializer() + return self.__class__.serializer_class def _get_lookup_filter(self): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field From 3199a591f22efebaeb103bf11bafb60e518dff51 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 14 Jun 2021 18:34:21 -0700 Subject: [PATCH 07/12] Add more documentation of returned content node metadata. --- .../assets/src/api-resources/contentNode.js | 90 +++++++++++++++++++ kolibri/core/content/api.py | 24 +++++ 2 files changed, 114 insertions(+) diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js index f8eb0213260..a21c57bbc15 100644 --- a/kolibri/core/assets/src/api-resources/contentNode.js +++ b/kolibri/core/assets/src/api-resources/contentNode.js @@ -4,6 +4,81 @@ import Store from 'kolibri.coreVue.vuex.store'; import urls from 'kolibri.urls'; import cloneDeep from '../cloneDeep'; +/** + * Type definition for Language metadata + * @typedef {Object} Language + * @property {string} id - an IETF language tag + * @property {string} lang_code - the ISO 639‑1 language code + * @property {string} lang_subcode - the regional identifier + * @property {string} lang_name - the name of the language in that language + * @property {('ltr'|'rtl'|)} lang_direction - Direction of the language's script, + * top to bottom is not supported currently + */ + +/** + * Type definition for AssessmentMetadata + * @typedef {Object} AssessmentMetadata + * @property {string[]} assessment_item_ids - an array of ids for assessment items + * @property {number} number_of_assessments - the length of assessment_item_ids + * @property {Object} mastery_model - object describing the mastery criterion for finishing practice + * @property {boolean} randomize - whether to randomize the order of assessments + * @property {boolean} is_manipulable - Whether this assessment can be programmatically updated + */ + +/** + * Type definition for File + * @typedef {Object} File + * @property {string} id - id of the file object + * @property {string} checksum - md5 checksum of the file, used to generate the file name + * @property {boolean} available - whether the file is available on the server + * @property {number} file_size - file_size in bytes + * @property {string} extension - file extension, also used to generate the file name + * @property {string} preset - preset, the role that the file plays for this content node + * @property {Language|null} lang - The language of the File + * @property {boolean} supplementary - Whether this file is optional + * @property {boolean} thumbnail - Whether this file is a thumbnail + */ + +/** + * Type definition for ContentNode metadata + * @typedef {Object} ContentNode + * @property {string} id - unique id of the ContentNode + * @property {string} channel_id - unique channel_id of the channel that the ContentNode is in + * @property {string} content_id - identifier that is common across all instances of this resource + * @property {string} title - A title that summarizes this ContentNode for the user + * @property {string} description - detailed description of the ContentNode + * @property {string} author - author of the ContentNode + * @property {string} thumbnail_url - URL for the thumbnail for this ContentNode, + * this may be any valid URL format including base64 encoded or blob URL + * @property {boolean} available - Whether the ContentNode has all necessary files for rendering + * @property {boolean} coach_content - Whether the ContentNode is intended only for coach users + * @property {Language|null} lang - The primary language of the ContentNode + * @property {string} license_description - The description of the license, which may be localized + * @property {string} license_name - The human readable name of the license, localized + * @property {string} license_owner - The name of the person or organization that holds copyright + * @property {number} num_coach_contents - Number of coach contents that are descendants of this + * @property {string} parent - The unique id of the parent of this ContentNode + * @property {number} sort_order - The order of display for this node in its channel + * if depth recursion was not deep enough + * @property {string[]} tags - Tags that apply to this content + * @property {boolean} is_leaf - Whether is a leaf resource or not + * @property {AssessmentMetadata|null} assessmentmetadata - Additional metadata for assessments + * @property {File[]} files - array of file objects associated with this ContentNode + * @property {Object[]} ancestors - array of objects with 'id' and 'title' properties + * @property {Children} [children] - optional pagination object with children of this ContentNode + */ + +/** + * Type definition for children pagination object + * @typedef {Object} Children + * @property {Object} more - parameters for requesting more objects + * @property {string} more.id - the id of the parent of these child nodes + * @property {Object} more.params - the get parameters that should be used for requesting more nodes + * @property {number} more.params.depth - 1 or 2, how deep the nesting should be returned + * @property {number} more.params.lft__gt - integer value to return a lft value greater than + * @property {ContentNode[]} results - the array of ContentNodes for this page + */ + export default new Resource({ name: 'contentnode', useContentCacheKey: true, @@ -74,6 +149,13 @@ export default new Resource({ return response.data; }); }, + /** + * A method to request paginated tree data from the backend + * @param {string} id - the id of the parent node for this request + * @param {Object} params - the GET parameters to return more results, + * may be both pagination and non-pagination specific parameters + * @return {Promise} Promise that resolves with the model data + */ fetchTree(id, params) { const url = urls['kolibri:core:contentnode_tree_detail'](id); return this.client({ url, params }).then(response => { @@ -81,6 +163,14 @@ export default new Resource({ return response.data; }); }, + /** + * A method to simplify requesting more items from a previously paginated response from fetchTree + * @param {Object} more - the 'more' property of the 'children' pagination object from a response. + * @param {string} more.id - the id of the parent node for this request + * @param {Object} more.params - the GET parameters to return more results, + * may be both pagination and non-pagination specific parameters + * @return {Promise} Promise that resolves with the model data + */ fetchMoreTree({ id, params }) { return this.fetchTree(id, params); }, diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index 3dba0b5d46d..cd735ac281b 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -790,6 +790,26 @@ def retrieve(self, request, **kwargs): """ A nested, paginated representation of the children and grandchildren of a specific node + GET parameters on request can be: + depth - a value of either 1 or 2 indicating the depth to recurse the tree, either 1 or 2 levels + if this parameter is missing it will default to 2. + lft__gt - a value to return child nodes with a lft value greater than this, if missing defaults to None + + The pagination object returned for "children" will have this form: + { + "results": [...], + "more": { + "id": , + "params": { + lft__gt: , + depth: {1,2}, + }, + } + } + + The "more" property describes the "id" required to do URL reversal on this endpoint, and the params that should + be passed as query parameters to get the next set of results for pagination. + :param request: request object :param pk: id parent node :return: an object representing the parent with a pagination object as "children" @@ -844,12 +864,16 @@ def retrieve(self, request, **kwargs): # The parent of this descendant is the parent node # for this query desc_parent = parent + # When we request more results for pagination, we want to return + # both nodes at this level, and the nodes at the lower level more_depth = 2 elif desc["parent"] in children_by_id: # Otherwise, check to see if our descendant's parent is in the # children_by_id map - if it failed the first condition, # it really should not fail this desc_parent = children_by_id[desc["parent"]] + # When we request more results for pagination, we only want to return + # nodes at this level, and not any of its children more_depth = 1 else: # If we get to here, we have a node that is not in the tree subsection we are From 7b82e9db5d857b0e8df6386172530ee623e79a30 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 15 Jun 2021 08:10:54 -0700 Subject: [PATCH 08/12] Always return a clone of the cache to prevent the cache being modified. --- kolibri/core/assets/src/api-resources/contentNode.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js index a21c57bbc15..a98ce7c1d54 100644 --- a/kolibri/core/assets/src/api-resources/contentNode.js +++ b/kolibri/core/assets/src/api-resources/contentNode.js @@ -115,7 +115,7 @@ export default new Resource({ cache: {}, fetchModel({ id }) { if (this.cache[id]) { - return Promise.resolve(this.cache[id]); + return Promise.resolve(cloneDeep(this.cache[id])); } return this.client({ url: this.modelUrl(id) }).then(response => { this.cacheData(response.data); From 73fe54d184c4848b4442199be5091a67de537f1a Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 15 Jun 2021 08:38:56 -0700 Subject: [PATCH 09/12] Add tests for query parameters for contentnode tree viewset. --- .../core/content/test/test_channel_upgrade.py | 15 +++++-- kolibri/core/content/test/test_content_app.py | 45 +++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py index 10f60f0ebc6..87256c614f4 100644 --- a/kolibri/core/content/test/test_channel_upgrade.py +++ b/kolibri/core/content/test/test_channel_upgrade.py @@ -61,7 +61,6 @@ def __init__(self, levels=3, num_children=5): try: self.load_data() except KeyError: - print("key error {}".format(self.levels)) self.generate_new_tree() self.save_data() @@ -87,10 +86,18 @@ def generate_new_tree(self): ) def load_data(self): - data = copy.deepcopy(self.__TREE_CACHE[self.cache_key]) + try: + data = copy.deepcopy(self.__TREE_CACHE[self.cache_key]) - for key in self.tree_keys: - setattr(self, key, data[key]) + for key in self.tree_keys: + setattr(self, key, data[key]) + except KeyError: + print( + "No tree cache found for {} levels and {} children per level".format( + self.levels, self.num_children + ) + ) + raise def save_data(self): data = {} diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index 0c6b601bec6..d79088b69a0 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -383,6 +383,51 @@ def test_contentnode_tree_long(self): ) self._recurse_and_assert([response.data], [root]) + def test_contentnode_tree_depth_1(self): + root = content.ContentNode.objects.get(title="root") + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}), + data={"depth": 1}, + ) + self._recurse_and_assert([response.data], [root]) + + def test_contentnode_tree_lft__gt(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + lft__gt = content.ContentNode.objects.filter(parent=root)[24].rght + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}), + data={"lft__gt": lft__gt}, + ) + self.assertEqual(len(response.data["children"]["results"]), 5) + self.assertIsNone(response.data["children"]["more"]) + first_node = content.ContentNode.objects.filter(parent=root)[25] + self._recurse_and_assert( + [response.data["children"]["results"][0]], [first_node], recursion_depth=1 + ) + + def test_contentnode_tree_more(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + first_child = response.data["children"]["results"][0] + self.assertEqual(first_child["children"]["more"]["params"]["depth"], 1) + nested_page_response = self.client.get( + reverse( + "kolibri:core:contentnode_tree-detail", + kwargs={"pk": first_child["children"]["more"]["id"]}, + ), + data=first_child["children"]["more"]["params"], + ) + self.assertEqual(len(nested_page_response.data["children"]["results"]), 5) + self.assertIsNone(nested_page_response.data["children"]["more"]) + @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio") def test_contentnode_granular_network_import(self, stats_mock): c1 = content.ContentNode.objects.get(title="root") From 5234a960b72a9289840b7b679dff8d7e0af8c093 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 15 Jun 2021 14:46:37 -0700 Subject: [PATCH 10/12] Properly limit ancestor batch size. --- kolibri/core/content/api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index cd735ac281b..20af6070b08 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -352,7 +352,9 @@ def consolidate(self, items, queryset): # We need to batch our queries for ancestors as the size of the expression tree # depends on the number of nodes that we are querying for. - ANCESTOR_BATCH_SIZE = 1000 + # On Windows, the SQL parameter limit is 999, and an ancestors call can produce + # 3 parameters per node in the queryset, so this should max out the parameters at 750. + ANCESTOR_BATCH_SIZE = 250 if len(items) > ANCESTOR_BATCH_SIZE: From 5b2de43bc9505bcbcf26980482a977dd55049e4c Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 15 Jun 2021 15:10:25 -0700 Subject: [PATCH 11/12] Order by tag name to ensure consistent ordering of tags in API results. --- kolibri/core/content/api.py | 10 +++++++--- kolibri/core/content/test/test_content_app.py | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index 20af6070b08..526595dc35a 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -314,9 +314,13 @@ def get_related_data_maps(self, items, queryset): tags_map = {} - for t in models.ContentTag.objects.filter(tagged_content__in=queryset).values( - "tag_name", - "tagged_content", + for t in ( + models.ContentTag.objects.filter(tagged_content__in=queryset) + .values( + "tag_name", + "tagged_content", + ) + .order_by("tag_name") ): if t["tagged_content"] not in tags_map: tags_map[t["tagged_content"]] = [t["tag_name"]] diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index d79088b69a0..b452af17a98 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -310,7 +310,11 @@ def _assert_node(self, actual, expected): "rght": expected.rght, "tree_id": expected.tree_id, "ancestors": list(expected.get_ancestors().values("id", "title")), - "tags": list(expected.tags.all().values_list("tag_name", flat=True)), + "tags": list( + expected.tags.all() + .order_by("tag_name") + .values_list("tag_name", flat=True) + ), "assessmentmetadata": assessmentmetadata, "is_leaf": expected.kind != "topic", "files": files, From 8225c03090dc42a249677ed7ce846c6eb49722f1 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Thu, 17 Jun 2021 13:36:06 -0700 Subject: [PATCH 12/12] Skip large data insertions on postgres. --- kolibri/core/content/test/test_content_app.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index b452af17a98..85b263f3635 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -2,10 +2,12 @@ To run this test, type this in command line """ import datetime +import unittest import uuid import mock import requests +from django.conf import settings from django.core.cache import cache from django.core.urlresolvers import reverse from django.test import TestCase @@ -333,6 +335,11 @@ def test_contentnode_list(self): self.assertEqual(len(response.data), expected_output) self._assert_nodes(response.data, nodes) + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) def test_contentnode_list_long(self): # This will make > 1000 nodes which should test our ancestor batching behaviour builder = ChannelBuilder(num_children=10) @@ -377,6 +384,11 @@ def test_contentnode_tree(self): ) self._recurse_and_assert([response.data], [root]) + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) def test_contentnode_tree_long(self): builder = ChannelBuilder(levels=2, num_children=30) builder.insert_into_default_db() @@ -395,6 +407,11 @@ def test_contentnode_tree_depth_1(self): ) self._recurse_and_assert([response.data], [root]) + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) def test_contentnode_tree_lft__gt(self): builder = ChannelBuilder(levels=2, num_children=30) builder.insert_into_default_db() @@ -412,6 +429,11 @@ def test_contentnode_tree_lft__gt(self): [response.data["children"]["results"][0]], [first_node], recursion_depth=1 ) + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) def test_contentnode_tree_more(self): builder = ChannelBuilder(levels=2, num_children=30) builder.insert_into_default_db()