diff --git a/kolibri/core/api.py b/kolibri/core/api.py index 04a9f6d2533..6a8514553e1 100644 --- a/kolibri/core/api.py +++ b/kolibri/core/api.py @@ -10,6 +10,7 @@ from rest_framework.mixins import DestroyModelMixin from rest_framework.mixins import UpdateModelMixin as BaseUpdateModelMixin from rest_framework.response import Response +from rest_framework.serializers import ModelSerializer from rest_framework.serializers import Serializer from rest_framework.serializers import UUIDField from rest_framework.serializers import ValidationError @@ -117,7 +118,7 @@ def remove_invalid_fields(self, queryset, fields, view, request): return ordering -class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): +class BaseValuesViewset(viewsets.GenericViewSet): """ A viewset that uses a values call to get all model/queryset data in a single database query, rather than delegating serialization to a @@ -125,7 +126,7 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): """ # A tuple of values to get from the queryset - values = None + # values = None # A map of target_key, source_key where target_key is the final target_key that will be set # and source_key is the key on the object retrieved from the values call. # Alternatively, the source_key can be a callable that will be passed the object and return @@ -134,8 +135,8 @@ class ReadOnlyValuesViewset(viewsets.ReadOnlyModelViewSet): field_map = {} def __init__(self, *args, **kwargs): - viewset = super(ReadOnlyValuesViewset, self).__init__(*args, **kwargs) - if not isinstance(self.values, tuple): + viewset = super(BaseValuesViewset, self).__init__(*args, **kwargs) + if not hasattr(self, "values") or not isinstance(self.values, tuple): raise TypeError("values must be defined as a tuple") self._values = tuple(self.values) if not isinstance(self.field_map, dict): @@ -143,11 +144,53 @@ def __init__(self, *args, **kwargs): self._field_map = self.field_map.copy() return viewset + def generate_serializer(self): + queryset = getattr(self, "queryset", None) + if queryset is None: + try: + queryset = self.get_queryset() + except Exception: + pass + model = getattr(queryset, "model", None) + if model is None: + return Serializer + mapped_fields = {v: k for k, v in self.field_map.items() if isinstance(v, str)} + fields = [] + extra_kwargs = {} + for value in self.values: + try: + model._meta.get_field(value) + if value in mapped_fields: + extra_kwargs[mapped_fields[value]] = {"source": value} + value = mapped_fields[value] + fields.append(value) + except Exception: + pass + + meta = type( + "Meta", + (object,), + { + "fields": fields, + "read_only_fields": fields, + "model": model, + "extra_kwargs": extra_kwargs, + }, + ) + CustomSerializer = type( + "{}Serializer".format(self.__class__.__name__), + (ModelSerializer,), + {"Meta": meta}, + ) + + return CustomSerializer + def get_serializer_class(self): if self.serializer_class is not None: return self.serializer_class # Hack to prevent the renderer logic from breaking completely. - return Serializer + self.__class__.serializer_class = self.generate_serializer() + return self.__class__.serializer_class def _get_lookup_filter(self): lookup_url_kwarg = self.lookup_url_kwarg or self.lookup_field @@ -205,6 +248,18 @@ def serialize(self, queryset): list(map(self._map_fields, values_queryset or [])), queryset ) + def serialize_object(self, **filter_kwargs): + queryset = self.get_queryset() + try: + filter_kwargs = filter_kwargs or self._get_lookup_filter() + return self.serialize(queryset.filter(**filter_kwargs))[0] + except IndexError: + raise Http404( + "No %s matches the given query." % queryset.model._meta.object_name + ) + + +class ListModelMixin(object): def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) @@ -218,20 +273,16 @@ def list(self, request, *args, **kwargs): return Response(self.serialize(queryset)) - def serialize_object(self, **filter_kwargs): - queryset = self.get_queryset() - try: - filter_kwargs = filter_kwargs or self._get_lookup_filter() - return self.serialize(queryset.filter(**filter_kwargs))[0] - except IndexError: - raise Http404( - "No %s matches the given query." % queryset.model._meta.object_name - ) +class RetrieveModelMixin(object): def retrieve(self, request, *args, **kwargs): return Response(self.serialize_object()) +class ReadOnlyValuesViewset(BaseValuesViewset, RetrieveModelMixin, ListModelMixin): + pass + + class CreateModelMixin(BaseCreateModelMixin): def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) diff --git a/kolibri/core/assets/src/api-resources/contentNode.js b/kolibri/core/assets/src/api-resources/contentNode.js index e8f5bdbca8a..a98ce7c1d54 100644 --- a/kolibri/core/assets/src/api-resources/contentNode.js +++ b/kolibri/core/assets/src/api-resources/contentNode.js @@ -1,5 +1,83 @@ +import isPlainObject from 'lodash/isPlainObject'; import { Resource } from 'kolibri.lib.apiResource'; import Store from 'kolibri.coreVue.vuex.store'; +import urls from 'kolibri.urls'; +import cloneDeep from '../cloneDeep'; + +/** + * Type definition for Language metadata + * @typedef {Object} Language + * @property {string} id - an IETF language tag + * @property {string} lang_code - the ISO 639‑1 language code + * @property {string} lang_subcode - the regional identifier + * @property {string} lang_name - the name of the language in that language + * @property {('ltr'|'rtl'|)} lang_direction - Direction of the language's script, + * top to bottom is not supported currently + */ + +/** + * Type definition for AssessmentMetadata + * @typedef {Object} AssessmentMetadata + * @property {string[]} assessment_item_ids - an array of ids for assessment items + * @property {number} number_of_assessments - the length of assessment_item_ids + * @property {Object} mastery_model - object describing the mastery criterion for finishing practice + * @property {boolean} randomize - whether to randomize the order of assessments + * @property {boolean} is_manipulable - Whether this assessment can be programmatically updated + */ + +/** + * Type definition for File + * @typedef {Object} File + * @property {string} id - id of the file object + * @property {string} checksum - md5 checksum of the file, used to generate the file name + * @property {boolean} available - whether the file is available on the server + * @property {number} file_size - file_size in bytes + * @property {string} extension - file extension, also used to generate the file name + * @property {string} preset - preset, the role that the file plays for this content node + * @property {Language|null} lang - The language of the File + * @property {boolean} supplementary - Whether this file is optional + * @property {boolean} thumbnail - Whether this file is a thumbnail + */ + +/** + * Type definition for ContentNode metadata + * @typedef {Object} ContentNode + * @property {string} id - unique id of the ContentNode + * @property {string} channel_id - unique channel_id of the channel that the ContentNode is in + * @property {string} content_id - identifier that is common across all instances of this resource + * @property {string} title - A title that summarizes this ContentNode for the user + * @property {string} description - detailed description of the ContentNode + * @property {string} author - author of the ContentNode + * @property {string} thumbnail_url - URL for the thumbnail for this ContentNode, + * this may be any valid URL format including base64 encoded or blob URL + * @property {boolean} available - Whether the ContentNode has all necessary files for rendering + * @property {boolean} coach_content - Whether the ContentNode is intended only for coach users + * @property {Language|null} lang - The primary language of the ContentNode + * @property {string} license_description - The description of the license, which may be localized + * @property {string} license_name - The human readable name of the license, localized + * @property {string} license_owner - The name of the person or organization that holds copyright + * @property {number} num_coach_contents - Number of coach contents that are descendants of this + * @property {string} parent - The unique id of the parent of this ContentNode + * @property {number} sort_order - The order of display for this node in its channel + * if depth recursion was not deep enough + * @property {string[]} tags - Tags that apply to this content + * @property {boolean} is_leaf - Whether is a leaf resource or not + * @property {AssessmentMetadata|null} assessmentmetadata - Additional metadata for assessments + * @property {File[]} files - array of file objects associated with this ContentNode + * @property {Object[]} ancestors - array of objects with 'id' and 'title' properties + * @property {Children} [children] - optional pagination object with children of this ContentNode + */ + +/** + * Type definition for children pagination object + * @typedef {Object} Children + * @property {Object} more - parameters for requesting more objects + * @property {string} more.id - the id of the parent of these child nodes + * @property {Object} more.params - the get parameters that should be used for requesting more nodes + * @property {number} more.params.depth - 1 or 2, how deep the nesting should be returned + * @property {number} more.params.lft__gt - integer value to return a lft value greater than + * @property {ContentNode[]} results - the array of ContentNodes for this page + */ export default new Resource({ name: 'contentnode', @@ -34,4 +112,66 @@ export default new Resource({ fetchNextSteps(getParams) { return this.fetchDetailCollection('next_steps', Store.getters.currentUserId, getParams); }, + cache: {}, + fetchModel({ id }) { + if (this.cache[id]) { + return Promise.resolve(cloneDeep(this.cache[id])); + } + return this.client({ url: this.modelUrl(id) }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + cacheData(data) { + if (Array.isArray(data)) { + for (let model of data) { + this.cacheData(model); + } + } else if (isPlainObject(data)) { + if (data[this.idKey]) { + this.cache[data[this.idKey]] = Object.assign( + this.cache[data[this.idKey]] || {}, + cloneDeep(data) + ); + if (data.children) { + this.cacheData(data.children); + } + } else if (data.results) { + for (let model of data.results) { + this.cacheData(model); + } + } + } + }, + fetchCollection(params) { + return this.client({ url: this.collectionUrl(), params }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + /** + * A method to request paginated tree data from the backend + * @param {string} id - the id of the parent node for this request + * @param {Object} params - the GET parameters to return more results, + * may be both pagination and non-pagination specific parameters + * @return {Promise} Promise that resolves with the model data + */ + fetchTree(id, params) { + const url = urls['kolibri:core:contentnode_tree_detail'](id); + return this.client({ url, params }).then(response => { + this.cacheData(response.data); + return response.data; + }); + }, + /** + * A method to simplify requesting more items from a previously paginated response from fetchTree + * @param {Object} more - the 'more' property of the 'children' pagination object from a response. + * @param {string} more.id - the id of the parent node for this request + * @param {Object} more.params - the GET parameters to return more results, + * may be both pagination and non-pagination specific parameters + * @return {Promise} Promise that resolves with the model data + */ + fetchMoreTree({ id, params }) { + return this.fetchTree(id, params); + }, }); diff --git a/kolibri/core/content/api.py b/kolibri/core/content/api.py index a84904f68de..526595dc35a 100644 --- a/kolibri/core/content/api.py +++ b/kolibri/core/content/api.py @@ -5,6 +5,7 @@ import requests from django.core.cache import cache +from django.core.exceptions import ValidationError from django.db.models import Exists from django.db.models import OuterRef from django.db.models import Q @@ -31,6 +32,7 @@ from rest_framework.generics import get_object_or_404 from rest_framework.response import Response +from kolibri.core.api import BaseValuesViewset from kolibri.core.api import ReadOnlyValuesViewset from kolibri.core.auth.middleware import session_exempt from kolibri.core.content import models @@ -210,25 +212,6 @@ class OptionalPageNumberPagination(ValuesViewsetPageNumberPagination): page_size_query_param = "page_size" -def map_lang(obj): - keys = ["id", "lang_code", "lang_subcode", "lang_name", "lang_direction"] - - lower_case = set(["id", "lang_code", "lang_subcode"]) - - output = {} - - for key in keys: - output[key] = obj.pop("lang__" + key) - if key in lower_case and output[key]: - output[key] = output[key].lower() - - if not any(output.values()): - # All keys are null so return None - return None - - return output - - def map_file(file): file["checksum"] = file.pop("local_file__id") file["available"] = file.pop("local_file__available") @@ -241,15 +224,17 @@ def map_file(file): "extension": file["extension"], } ) - file["lang"] = map_lang(file) return file -@method_decorator(cache_forever, name="dispatch") -class ContentNodeViewset(ReadOnlyValuesViewset): +class BaseContentNodeMixin(object): + """ + A base mixin for viewsets that need to return the same format of data + serialization for ContentNodes. + """ + filter_backends = (DjangoFilterBackend,) filter_class = ContentNodeFilter - pagination_class = OptionalPageNumberPagination values = ( "id", @@ -260,12 +245,7 @@ class ContentNodeViewset(ReadOnlyValuesViewset): "content_id", "description", "kind", - # Language keys - "lang__id", - "lang__lang_code", - "lang__lang_subcode", - "lang__lang_name", - "lang__lang_direction", + "lang_id", "license_description", "license_name", "license_owner", @@ -279,29 +259,28 @@ class ContentNodeViewset(ReadOnlyValuesViewset): "tree_id", ) - field_map = {"lang": map_lang} - - def consolidate(self, items, queryset): - output = [] + def get_queryset(self): + return models.ContentNode.objects.filter(available=True) - if items: - assessmentmetadata = { - a["contentnode"]: a - for a in models.AssessmentMetaData.objects.filter( - contentnode__in=queryset - ).values( - "assessment_item_ids", - "number_of_assessments", - "mastery_model", - "randomize", - "is_manipulable", - "contentnode", - ) - } + def get_related_data_maps(self, items, queryset): + assessmentmetadata_map = { + a["contentnode"]: a + for a in models.AssessmentMetaData.objects.filter( + contentnode__in=queryset + ).values( + "assessment_item_ids", + "number_of_assessments", + "mastery_model", + "randomize", + "is_manipulable", + "contentnode", + ) + } - files = {} + files_map = {} - for f in models.File.objects.filter(contentnode__in=queryset).values( + files = list( + models.File.objects.filter(contentnode__in=queryset).values( "id", "contentnode", "local_file__id", @@ -310,57 +289,122 @@ def consolidate(self, items, queryset): "local_file__file_size", "local_file__extension", "preset", - "lang__id", - "lang__lang_code", - "lang__lang_subcode", - "lang__lang_name", - "lang__lang_direction", + "lang_id", "supplementary", "thumbnail", - ): - if f["contentnode"] not in files: - files[f["contentnode"]] = [] - files[f["contentnode"]].append(map_file(f)) + ) + ) - ancestors = queryset.get_ancestors().values( - "id", "title", "lft", "rght", "tree_id" + lang_ids = set([obj["lang_id"] for obj in items + files]) + + languages_map = { + lang["id"]: lang + for lang in models.Language.objects.filter(id__in=lang_ids).values( + "id", "lang_code", "lang_subcode", "lang_name", "lang_direction" ) + } - tags = {} + for f in files: + contentnode_id = f.pop("contentnode") + if contentnode_id not in files_map: + files_map[contentnode_id] = [] + lang_id = f.pop("lang_id") + f["lang"] = languages_map.get(lang_id) + files_map[contentnode_id].append(map_file(f)) - for t in models.ContentTag.objects.filter( - tagged_content__in=queryset - ).values( + tags_map = {} + + for t in ( + models.ContentTag.objects.filter(tagged_content__in=queryset) + .values( "tag_name", "tagged_content", - ): - if t["tagged_content"] not in tags: - tags[t["tagged_content"]] = [t["tag_name"]] - else: - tags[t["tagged_content"]].append(t["tag_name"]) - - for item in items: - item["assessmentmetadata"] = assessmentmetadata.get(item["id"]) - item["tags"] = tags.get(item["id"], []) - item["files"] = files.get(item["id"], []) - - lft = item.pop("lft") - rght = item.pop("rght") - tree_id = item.pop("tree_id") - item["ancestors"] = list( - filter( - lambda x: x["lft"] < lft - and x["rght"] > rght - and x["tree_id"] == tree_id, - ancestors, + ) + .order_by("tag_name") + ): + if t["tagged_content"] not in tags_map: + tags_map[t["tagged_content"]] = [t["tag_name"]] + else: + tags_map[t["tagged_content"]].append(t["tag_name"]) + + return assessmentmetadata_map, files_map, languages_map, tags_map + + def process_items(self, items, queryset, ancestor_lookup_method=None): + output = [] + assessmentmetadata, files_map, languages_map, tags = self.get_related_data_maps( + items, queryset + ) + for item in items: + item["assessmentmetadata"] = assessmentmetadata.get(item["id"]) + item["tags"] = tags.get(item["id"], []) + item["files"] = files_map.get(item["id"], []) + lang_id = item.pop("lang_id") + item["lang"] = languages_map.get(lang_id) + if ancestor_lookup_method: + item["ancestors"] = ancestor_lookup_method(item) + item["is_leaf"] = item.get("kind") != content_kinds.TOPIC + output.append(item) + return output + + +@method_decorator(cache_forever, name="dispatch") +class ContentNodeViewset(BaseContentNodeMixin, ReadOnlyValuesViewset): + pagination_class = OptionalPageNumberPagination + + def consolidate(self, items, queryset): + if items: + + # We need to batch our queries for ancestors as the size of the expression tree + # depends on the number of nodes that we are querying for. + # On Windows, the SQL parameter limit is 999, and an ancestors call can produce + # 3 parameters per node in the queryset, so this should max out the parameters at 750. + ANCESTOR_BATCH_SIZE = 250 + + if len(items) > ANCESTOR_BATCH_SIZE: + + ancestors_map = {} + + for i in range(0, len(items), ANCESTOR_BATCH_SIZE): + + for anc in ( + models.ContentNode.objects.filter( + id__in=[ + item["id"] + for item in items[i : i + ANCESTOR_BATCH_SIZE] + ] + ) + .get_ancestors() + .values("id", "title", "lft", "rght", "tree_id") + ): + ancestors_map[anc["id"]] = anc + + ancestors = sorted(ancestors_map.values(), key=lambda x: x["lft"]) + else: + ancestors = list( + queryset.get_ancestors() + .values("id", "title", "lft", "rght", "tree_id") + .order_by("lft") + ) + + def ancestor_lookup(item): + lft = item.get("lft") + rght = item.get("rght") + tree_id = item.get("tree_id") + return list( + map( + lambda x: {"id": x["id"], "title": x["title"]}, + filter( + lambda x: x["lft"] < lft + and x["rght"] > rght + and x["tree_id"] == tree_id, + ancestors, + ), ) ) - item["is_leaf"] = item.get("kind") != content_kinds.TOPIC - output.append(item) - return output - def get_queryset(self): - return models.ContentNode.objects.filter(available=True) + return self.process_items(items, queryset, ancestor_lookup) + + return [] @list_route(methods=["get"]) def descendants(self, request): @@ -686,6 +730,191 @@ def resume(self, request, **kwargs): return Response(self.serialize(queryset)) +@method_decorator(cache_forever, name="dispatch") +class ContentNodeTreeViewset(BaseContentNodeMixin, BaseValuesViewset): + # We fix the page size at 25 for a couple of reasons: + # 1. At this size the query appears to be relatively performant, and will deliver most of the tree + # data to the frontend in a single query. + # 2. In the case where the tree topology means that this will not produce the full query, the limit of + # 25 immediate children and 25 grand children means that we are at most using 1 + 25 + 25 * 25 = 651 + # SQL parameters in the query to get the nodes for serialization - this means that we should not ever + # run into an issue where we hit a SQL parameters limit in the queries in here. + # If we find that this page size is too high, we should lower it, but for the reasons noted above, we + # should not raise it. + page_size = 25 + + def consolidate(self, items, queryset): + if items: + return self.process_items(items, queryset) + return [] + + def validate_and_return_params(self, request): + depth = request.query_params.get("depth", 2) + lft__gt = request.query_params.get("lft__gt") + + try: + depth = int(depth) + if 1 > depth or depth > 2: + raise ValueError + except ValueError: + raise ValidationError("Depth query parameter must have the value 1 or 2") + + if lft__gt is not None: + try: + lft__gt = int(lft__gt) + if 1 > lft__gt: + raise ValueError + except ValueError: + raise ValidationError( + "lft__gt query parameter must be a positive integer if specified" + ) + return depth, lft__gt + + def get_grandchild_ids(self, child_ids, depth): + if depth == 2: + # Use this to keep track of how many grand children we have accumulated per child of the parent node + gc_by_parent = {} + # Iterate through the grand children of the parent node in lft order so we follow the tree traversal order + for gc in ( + self.filter_queryset(self.get_queryset()) + .filter(parent_id__in=child_ids) + .values("id", "parent_id") + .order_by("lft") + ): + # If we have not already added a list of nodes to the gc_by_parent map, initialize it here + if gc["parent_id"] not in gc_by_parent: + gc_by_parent[gc["parent_id"]] = [] + # If the number of grand children for a specific child node is less than the page size + # then we keep on adding them to both lists + # If not, we just skip this node, as we have already hit the page limit for the node that is + # its parent. + if len(gc_by_parent[gc["parent_id"]]) < self.page_size: + gc_by_parent[gc["parent_id"]].append(gc["id"]) + yield gc["id"] + + def retrieve(self, request, **kwargs): + """ + A nested, paginated representation of the children and grandchildren of a specific node + + GET parameters on request can be: + depth - a value of either 1 or 2 indicating the depth to recurse the tree, either 1 or 2 levels + if this parameter is missing it will default to 2. + lft__gt - a value to return child nodes with a lft value greater than this, if missing defaults to None + + The pagination object returned for "children" will have this form: + { + "results": [...], + "more": { + "id": , + "params": { + lft__gt: , + depth: {1,2}, + }, + } + } + + The "more" property describes the "id" required to do URL reversal on this endpoint, and the params that should + be passed as query parameters to get the next set of results for pagination. + + :param request: request object + :param pk: id parent node + :return: an object representing the parent with a pagination object as "children" + """ + + # Get the model for the parent node here - we do this so that we trigger a 404 immediately if the node + # does not exist (or exists but is not available), and so that we have the Django model object later to + # use for the `get_ancestors` MPTT method. + parent_model = self.get_object() + + depth, lft__gt = self.validate_and_return_params(request) + + # Get a list of child_ids of the parent node up to the pagination limit + child_qs = self.get_queryset().filter(parent=parent_model) + if lft__gt is not None: + child_qs = child_qs.filter(lft__gt=lft__gt) + child_ids = child_qs.values_list("id", flat=True).order_by("lft")[ + 0 : self.page_size + ] + + # Get a flat list of ids for grandchildren we will be returning + gc_ids = self.get_grandchild_ids(child_ids, depth) + + # We explicitly order by lft here, so that the nodes are in tree traversal order, so we can iterate over them and build + # out our nested representation, being sure that any ancestors have already been processed. + nodes = self.serialize( + self.filter_queryset(self.get_queryset()) + .filter(Q(id=parent_model.id) | Q(id__in=child_ids) | Q(id__in=gc_ids)) + .order_by("lft") + ) + + # The serialized parent representation is the first node in the lft order + parent = nodes[0] + + # Do a query to get the parent's ancestors - for all other nodes, we can manually build + # the ancestors from the tree topology that we already know! + parent["ancestors"] = list(parent_model.get_ancestors().values("id", "title")) + + # Use this to keep track of direct children of the parent node + # this will allow us to do lookups for the grandchildren, in order + # to insert them into the "children" property + children_by_id = {} + + # Iterate through all the descendants that we have serialized + for desc in nodes[1:]: + # First check to see whether it is a direct child of the + # parent node that we initially queried + if desc["parent"] == parent_model.id: + # If so add them to the children_by_id map so that + # grandchildren descendants can reference them later + children_by_id[desc["id"]] = desc + # The parent of this descendant is the parent node + # for this query + desc_parent = parent + # When we request more results for pagination, we want to return + # both nodes at this level, and the nodes at the lower level + more_depth = 2 + elif desc["parent"] in children_by_id: + # Otherwise, check to see if our descendant's parent is in the + # children_by_id map - if it failed the first condition, + # it really should not fail this + desc_parent = children_by_id[desc["parent"]] + # When we request more results for pagination, we only want to return + # nodes at this level, and not any of its children + more_depth = 1 + else: + # If we get to here, we have a node that is not in the tree subsection we are + # trying to return, so we just ignore it. This shouldn't happen. + continue + if "children" not in desc_parent: + # If the parent of the descendant does not already have its `children` property + # initialized, do so here. + desc_parent["children"] = {"results": [], "more": None} + # The ancestors field for the descendant will be its parents ancestors, plus its parent! + desc["ancestors"] = desc_parent["ancestors"] + [ + {"id": desc_parent["id"], "title": desc_parent["title"]} + ] + # Add this descendant to the results for the children pagination object + desc_parent["children"]["results"].append(desc) + # Only bother updating the URL for more if we have hit the page size limit + # otherwise it will just continue to be None + if len(desc_parent["children"]["results"]) == self.page_size: + # Any subsequent queries to get siblings of this node can restrict themselves + # to looking for nodes with lft greater than the rght value of this descendant + lft__gt = desc["rght"] + # If the rght value of this descendant is exactly 1 less than the rght value of + # its parent, then there are no more children that can be queried. + # So only in this instance do we update the more URL + if desc["rght"] + 1 < desc_parent["rght"]: + params = request.query_params.copy() + params["lft__gt"] = lft__gt + params["depth"] = more_depth + desc_parent["children"]["more"] = { + "id": desc_parent["id"], + "params": params, + } + return Response(parent) + + # return the result of and-ing a list of queries def intersection(queries): if queries: diff --git a/kolibri/core/content/api_urls.py b/kolibri/core/content/api_urls.py index edc2bc5ef5d..c02e653fc82 100644 --- a/kolibri/core/content/api_urls.py +++ b/kolibri/core/content/api_urls.py @@ -6,6 +6,7 @@ from .api import ContentNodeGranularViewset from .api import ContentNodeProgressViewset from .api import ContentNodeSearchViewset +from .api import ContentNodeTreeViewset from .api import ContentNodeViewset from .api import FileViewset from .api import RemoteChannelViewSet @@ -14,6 +15,9 @@ router.register("channel", ChannelMetadataViewSet, base_name="channel") router.register(r"contentnode", ContentNodeViewset, base_name="contentnode") +router.register( + r"contentnode_tree", ContentNodeTreeViewset, base_name="contentnode_tree" +) router.register( r"contentnode_search", ContentNodeSearchViewset, base_name="contentnode_search" ) diff --git a/kolibri/core/content/test/test_channel_upgrade.py b/kolibri/core/content/test/test_channel_upgrade.py index e814ba58b7e..87256c614f4 100644 --- a/kolibri/core/content/test/test_channel_upgrade.py +++ b/kolibri/core/content/test/test_channel_upgrade.py @@ -52,20 +52,24 @@ class ChannelBuilder(object): "root_node", ) - def __init__(self, levels=3): + def __init__(self, levels=3, num_children=5): self.levels = levels + self.num_children = num_children self.modified = set() try: self.load_data() except KeyError: - print("key error {}".format(self.levels)) self.generate_new_tree() self.save_data() self.generate_nodes_from_root_node() + @property + def cache_key(self): + return "{}_{}".format(self.levels, self.num_children) + def generate_new_tree(self): self.channel = self.channel_data() self.files = {} @@ -82,10 +86,18 @@ def generate_new_tree(self): ) def load_data(self): - data = copy.deepcopy(self.__TREE_CACHE[self.levels]) + try: + data = copy.deepcopy(self.__TREE_CACHE[self.cache_key]) - for key in self.tree_keys: - setattr(self, key, data[key]) + for key in self.tree_keys: + setattr(self, key, data[key]) + except KeyError: + print( + "No tree cache found for {} levels and {} children per level".format( + self.levels, self.num_children + ) + ) + raise def save_data(self): data = {} @@ -93,7 +105,7 @@ def save_data(self): for key in self.tree_keys: data[key] = getattr(self, key) - self.__TREE_CACHE[self.levels] = copy.deepcopy(data) + self.__TREE_CACHE[self.cache_key] = copy.deepcopy(data) def generate_nodes_from_root_node(self): self._django_nodes = ContentNode.objects.build_tree_nodes(self.root_node) @@ -297,7 +309,7 @@ def data(self): def recurse_and_generate(self, parent_id, levels): children = [] - for i in range(0, 5): + for i in range(0, self.num_children): if levels == 0: node = self.generate_leaf(parent_id) else: diff --git a/kolibri/core/content/test/test_content_app.py b/kolibri/core/content/test/test_content_app.py index dd58962010d..85b263f3635 100644 --- a/kolibri/core/content/test/test_content_app.py +++ b/kolibri/core/content/test/test_content_app.py @@ -2,10 +2,12 @@ To run this test, type this in command line """ import datetime +import unittest import uuid import mock import requests +from django.conf import settings from django.core.cache import cache from django.core.urlresolvers import reverse from django.test import TestCase @@ -18,6 +20,7 @@ from kolibri.core.auth.models import FacilityUser from kolibri.core.auth.test.helpers import provision_device from kolibri.core.content import models as content +from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder from kolibri.core.device.models import DevicePermissions from kolibri.core.device.models import DeviceSettings from kolibri.core.logger.models import ContentSessionLog @@ -233,13 +236,223 @@ def test_related_filter(self): ) self.assertEqual(response.data[0]["title"], "c2") + def map_language(self, lang): + if lang: + return { + f: getattr(lang, f) + for f in [ + "id", + "lang_code", + "lang_subcode", + "lang_name", + "lang_direction", + ] + } + + def _assert_node(self, actual, expected): + assessmentmetadata = ( + expected.assessmentmetadata.all() + .values( + "assessment_item_ids", + "number_of_assessments", + "mastery_model", + "randomize", + "is_manipulable", + "contentnode", + ) + .first() + ) + files = [] + for f in expected.files.all(): + "local_file__id", + "local_file__available", + "local_file__file_size", + "local_file__extension", + "lang_id", + file = {} + for field in [ + "id", + "priority", + "preset", + "supplementary", + "thumbnail", + ]: + file[field] = getattr(f, field) + file["checksum"] = f.local_file_id + for field in [ + "available", + "file_size", + "extension", + ]: + file[field] = getattr(f.local_file, field) + file["lang"] = self.map_language(f.lang) + file["storage_url"] = f.get_storage_url() + files.append(file) + self.assertEqual( + actual, + { + "id": expected.id, + "available": expected.available, + "author": expected.author, + "channel_id": expected.channel_id, + "coach_content": expected.coach_content, + "content_id": expected.content_id, + "description": expected.description, + "kind": expected.kind, + "lang": self.map_language(expected.lang), + "license_description": expected.license_description, + "license_name": expected.license_name, + "license_owner": expected.license_owner, + "num_coach_contents": expected.num_coach_contents, + "options": expected.options, + "parent": expected.parent_id, + "sort_order": expected.sort_order, + "title": expected.title, + "lft": expected.lft, + "rght": expected.rght, + "tree_id": expected.tree_id, + "ancestors": list(expected.get_ancestors().values("id", "title")), + "tags": list( + expected.tags.all() + .order_by("tag_name") + .values_list("tag_name", flat=True) + ), + "assessmentmetadata": assessmentmetadata, + "is_leaf": expected.kind != "topic", + "files": files, + }, + ) + + def _assert_nodes(self, data, nodes): + for actual, expected in zip(data, nodes): + self._assert_node(actual, expected) + def test_contentnode_list(self): root = content.ContentNode.objects.get(title="root") - expected_output = ( - root.get_descendants(include_self=True).filter(available=True).count() - ) + nodes = root.get_descendants(include_self=True).filter(available=True) + expected_output = len(nodes) response = self.client.get(reverse("kolibri:core:contentnode-list")) self.assertEqual(len(response.data), expected_output) + self._assert_nodes(response.data, nodes) + + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) + def test_contentnode_list_long(self): + # This will make > 1000 nodes which should test our ancestor batching behaviour + builder = ChannelBuilder(num_children=10) + builder.insert_into_default_db() + content.ContentNode.objects.update(available=True) + nodes = content.ContentNode.objects.filter(available=True) + expected_output = len(nodes) + self.assertGreater(expected_output, 1000) + response = self.client.get(reverse("kolibri:core:contentnode-list")) + self.assertEqual(len(response.data), expected_output) + self._assert_nodes(response.data, nodes) + + def _recurse_and_assert(self, data, nodes, recursion_depth=0): + for actual, expected in zip(data, nodes): + children = actual.pop("children", None) + self._assert_node(actual, expected) + if children: + child_nodes = content.ContentNode.objects.filter( + available=True, parent=expected + ) + if children["more"] is None: + self.assertEqual(len(child_nodes), len(children["results"])) + else: + self.assertGreater(len(child_nodes), len(children["results"])) + self.assertEqual(children["more"]["id"], expected.id) + self.assertEqual( + children["more"]["params"]["lft__gt"], child_nodes[24].rght + ) + self.assertEqual( + children["more"]["params"]["depth"], 2 - recursion_depth + ) + self._recurse_and_assert( + children["results"], + child_nodes, + recursion_depth=recursion_depth + 1, + ) + + def test_contentnode_tree(self): + root = content.ContentNode.objects.get(title="root") + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + self._recurse_and_assert([response.data], [root]) + + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) + def test_contentnode_tree_long(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + self._recurse_and_assert([response.data], [root]) + + def test_contentnode_tree_depth_1(self): + root = content.ContentNode.objects.get(title="root") + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}), + data={"depth": 1}, + ) + self._recurse_and_assert([response.data], [root]) + + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) + def test_contentnode_tree_lft__gt(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + lft__gt = content.ContentNode.objects.filter(parent=root)[24].rght + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}), + data={"lft__gt": lft__gt}, + ) + self.assertEqual(len(response.data["children"]["results"]), 5) + self.assertIsNone(response.data["children"]["more"]) + first_node = content.ContentNode.objects.filter(parent=root)[25] + self._recurse_and_assert( + [response.data["children"]["results"][0]], [first_node], recursion_depth=1 + ) + + @unittest.skipIf( + getattr(settings, "DATABASES")["default"]["ENGINE"] + == "django.db.backends.postgresql", + "Skipping postgres as not as vulnerable to large queries and large insertions are less performant", + ) + def test_contentnode_tree_more(self): + builder = ChannelBuilder(levels=2, num_children=30) + builder.insert_into_default_db() + content.ContentNode.objects.all().update(available=True) + root = content.ContentNode.objects.get(id=builder.root_node["id"]) + response = self.client.get( + reverse("kolibri:core:contentnode_tree-detail", kwargs={"pk": root.id}) + ) + first_child = response.data["children"]["results"][0] + self.assertEqual(first_child["children"]["more"]["params"]["depth"], 1) + nested_page_response = self.client.get( + reverse( + "kolibri:core:contentnode_tree-detail", + kwargs={"pk": first_child["children"]["more"]["id"]}, + ), + data=first_child["children"]["more"]["params"], + ) + self.assertEqual(len(nested_page_response.data["children"]["results"]), 5) + self.assertIsNone(nested_page_response.data["children"]["more"]) @mock.patch("kolibri.core.content.api.get_channel_stats_from_studio") def test_contentnode_granular_network_import(self, stats_mock):