From 357204c936e9d905c567465e2423ef8df86296a1 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 2 Jun 2022 16:42:36 +0530 Subject: [PATCH 001/313] Optimized import from other channels search --- .../contentcuration/dev_settings.py | 11 ++++ .../contentcuration/tests/testdata.py | 18 +++--- .../contentcuration/viewsets/contentnode.py | 3 +- contentcuration/search/tests.py | 55 +++++++++++++++++++ .../search/viewsets/contentnode.py | 2 +- 5 files changed, 79 insertions(+), 10 deletions(-) diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py index d81d23a993..3dcbd5bf34 100644 --- a/contentcuration/contentcuration/dev_settings.py +++ b/contentcuration/contentcuration/dev_settings.py @@ -6,3 +6,14 @@ ROOT_URLCONF = "contentcuration.dev_urls" INSTALLED_APPS += ("drf_yasg",) + +REST_FRAMEWORK = { + 'DEFAULT_PERMISSION_CLASSES': ( + 'rest_framework.permissions.IsAuthenticated', + ), + 'DEFAULT_AUTHENTICATION_CLASSES': ( + 'rest_framework.authentication.SessionAuthentication', + 'rest_framework.authentication.BasicAuthentication', + 'rest_framework.authentication.TokenAuthentication', + ) +} diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index 50895337b1..5560e853a5 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -195,21 +195,23 @@ def node(data, parent=None): return new_node -def tree(parent=None): +def tree(parent=None, tree_data=None): # Read from json fixture - filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"]) - with open(filepath, "rb") as jsonfile: - data = json.load(jsonfile) + if tree_data is None: + filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"]) + with open(filepath, "rb") as jsonfile: + tree_data = json.load(jsonfile) - return node(data, parent) + return node(tree_data, parent) -def channel(name="testchannel"): +def channel(name="testchannel", create_main_tree=True, main_tree_data=None): channel = cc.Channel.objects.create(name=name) channel.save() - channel.main_tree = tree() - channel.save() + if create_main_tree: + channel.main_tree = tree(tree_data=main_tree_data) + channel.save() return channel diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py index c7bb2e222a..44169a7532 100644 --- a/contentcuration/contentcuration/viewsets/contentnode.py +++ b/contentcuration/contentcuration/viewsets/contentnode.py @@ -610,7 +610,8 @@ def delete_from_changes(self, changes): def dict_if_none(obj, field_name=None): - return obj[field_name] if obj[field_name] else {} + value = obj.get(field_name) + return value if value else {} # Apply mixin first to override ValuesViewset diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests.py index 2341fc9d1b..9d0aff4f4d 100644 --- a/contentcuration/search/tests.py +++ b/contentcuration/search/tests.py @@ -29,3 +29,58 @@ def test_filter_channels_by_edit(self): ) self.assertEqual(response.status_code, 200, response.content) self.assertNotEqual(response.data["results"], []) + + def test_search_result(self): + # Create two users + user_a = testdata.user(email="a@a.com") + user_b = testdata.user(email="b@b.com") + + # Create two channels with two editors + test_tree_data = { + "node_id": "00000000000000000000000000000000", + "title": "Root topic node", + "kind_id": "topic", + "children": [ + { + "node_id": "00000000000000000000000000000001", + "title": "Kolibri video", + "kind_id": "video", + }, + ] + } + + channel_a = testdata.channel(name="user_a_channel", main_tree_data=test_tree_data) + channel_a.editors.add(user_a) + + channel_b = testdata.channel(name="user_b_channel", create_main_tree=False) + channel_b.editors.add(user_b) + + # Publish channel_a + channel_a.main_tree.publishing = False + channel_a.main_tree.changed = False + channel_a.main_tree.published = True + channel_a.main_tree.save() + channel_a.public = True + channel_a.save() + + # Import resources from channel_a to channel_b + channel_a.main_tree.copy_to(channel_b.main_tree, batch_size=1) + channel_b.main_tree.refresh_from_db() + + # Send request from user_b to the search endpoint + self.client.force_authenticate(user=user_b) + response = self.client.get( + reverse("search-list"), + data={ + "channel_list": "public", + "keywords": "video" + }, + format="json", + ) + + # Assert whether the location_ids are of accessible nodes or not + kolibri_video_node = channel_b.main_tree.get_descendants().filter(title="Kolibri video").first() + + # The ids in location_ids should be of channel_b's ContentNode only + self.assertEqual(len(response.data["results"][0]["location_ids"]), 1) + self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 54b5f437aa..7736b1f98a 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -162,7 +162,7 @@ def annotate_queryset(self, queryset): 2. Annotate lists of content node and channel pks """ # Get accessible content nodes that match the content id - content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter( + content_id_query = queryset.filter( content_id=OuterRef("content_id") ) From d24416cfbfd176d9dd0d3172b646a18629e8e449 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 15 Jun 2022 13:47:24 -0700 Subject: [PATCH 002/313] Add search test for channel filtering and location_ids handling --- .../contentcuration/tests/testdata.py | 2 + contentcuration/search/tests.py | 75 +++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index 5560e853a5..b857dfb513 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -145,6 +145,7 @@ def node(data, parent=None): sort_order=data.get('sort_order', 1), complete=True, extra_fields=data.get('extra_fields'), + grade_levels="{}", ) new_node.save() video_file = fileobj_video(contents=b"Video File") @@ -171,6 +172,7 @@ def node(data, parent=None): content_id=data.get('content_id') or data['node_id'], sort_order=data.get('sort_order', 1), complete=True, + grade_levels="{}", ) new_node.save() diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests.py index 9d0aff4f4d..bcb3b2aa99 100644 --- a/contentcuration/search/tests.py +++ b/contentcuration/search/tests.py @@ -2,6 +2,7 @@ from django.urls import reverse +from contentcuration.models import Channel from contentcuration.tests import testdata from contentcuration.tests.base import StudioAPITestCase @@ -84,3 +85,77 @@ def test_search_result(self): # The ids in location_ids should be of channel_b's ContentNode only self.assertEqual(len(response.data["results"][0]["location_ids"]), 1) self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id) + + def test_channel_list_filter_and_location_ids(self): + users = [] + channels = [] + for i in range(4): + user = testdata.user(email="a{}@a.com".format(i)) + users.append(user) + channel = Channel.objects.create(name="user_a{}_channel".format(i)) + channel.save() + channels.append(channel) + channel.editors.add(user) + + public_channel, editable_channel, viewable_channel, inaccessible_channel = channels + + public_video_node = testdata.node({ + "title": "Kolibri video", + "kind_id": "video", + }, parent=public_channel.main_tree) + public_video_node.complete = True + public_video_node.published = True + public_video_node.changed = False + public_video_node.save() + + public_channel.main_tree.published = True + public_channel.main_tree.changed = False + public_channel.main_tree.save() + + public_channel.public = True + public_channel.save() + + user_b = users[1] + viewable_channel.viewers.add(user_b) + + public_video_node.refresh_from_db() + public_video_node.copy_to(target=editable_channel.main_tree) + public_video_node.copy_to(target=viewable_channel.main_tree) + public_video_node.copy_to(target=inaccessible_channel.main_tree) + + editable_channel.main_tree.refresh_from_db() + editable_video_node = editable_channel.main_tree.get_descendants().first() + viewable_channel.main_tree.refresh_from_db() + viewable_video_node = viewable_channel.main_tree.get_descendants().first() + inaccessible_channel.main_tree.refresh_from_db() + inaccessible_video_node = inaccessible_channel.main_tree.get_descendants().first() + + # Send request from user_b to the search endpoint + self.client.force_authenticate(user=user_b) + + for channel_list in ("public", "edit", "view"): + response = self.client.get( + reverse("search-list"), + data={ + "channel_list": channel_list, + "keywords": "video" + }, + format="json", + ) + + for result in response.data["results"]: + self.assertNotEqual(result["id"], inaccessible_video_node.id) + + if channel_list == "public": + self.assertEqual(result["id"], public_video_node.id) + elif channel_list == "edit": + self.assertEqual(result["id"], editable_video_node.id) + elif channel_list == "view": + self.assertEqual(result["id"], viewable_video_node.id) + + location_ids = result["location_ids"] + self.assertEqual(len(location_ids), 3) + self.assertIn(editable_video_node.id, location_ids) + self.assertIn(viewable_video_node.id, location_ids) + self.assertIn(public_video_node.id, location_ids) + self.assertNotIn(inaccessible_video_node.id, location_ids) From 676526e4f7de518066f3af8acc4aa9f539a66ba2 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 30 Jun 2022 13:18:26 +0530 Subject: [PATCH 003/313] Fix autodiscovery of search tests --- contentcuration/search/tests/__init__.py | 0 .../search/{tests.py => tests/test_search.py} | 0 contentcuration/search/viewsets/contentnode.py | 17 ++++++++++------- 3 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 contentcuration/search/tests/__init__.py rename contentcuration/search/{tests.py => tests/test_search.py} (100%) diff --git a/contentcuration/search/tests/__init__.py b/contentcuration/search/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests/test_search.py similarity index 100% rename from contentcuration/search/tests.py rename to contentcuration/search/tests/test_search.py diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 7736b1f98a..9c61c6092a 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -13,18 +13,19 @@ from django_filters.rest_framework import CharFilter from le_utils.constants import content_kinds from le_utils.constants import roles +from rest_framework.permissions import IsAuthenticated from contentcuration.models import Channel from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.utils.pagination import CachedListPagination from contentcuration.viewsets.base import RequiredFilterSet +from contentcuration.viewsets.base import ValuesViewset from contentcuration.viewsets.common import NotNullMapArrayAgg from contentcuration.viewsets.common import SQArrayAgg from contentcuration.viewsets.common import SQCount from contentcuration.viewsets.common import UUIDFilter from contentcuration.viewsets.common import UUIDInFilter -from contentcuration.viewsets.contentnode import ContentNodeViewSet class ListPagination(CachedListPagination): @@ -65,9 +66,11 @@ def filter_channel_list(self, queryset, name, value): def filter_keywords(self, queryset, name, value): filter_query = Q(title__icontains=value) | Q(description__icontains=value) + tags_node_ids = ContentNode.tags.through.objects.filter( contenttag__tag_name__icontains=value - ).values_list("contentnode_id", flat=True)[:250] + ).values_list("contentnode_id", flat=True) + # Check if we have a Kolibri node id or ids and add them to the search if so. # Add to, rather than replace, the filters so that we never misinterpret a search term as a UUID. # node_ids = uuid_re.findall(value) + list(tags_node_ids) @@ -77,10 +80,8 @@ def filter_keywords(self, queryset, name, value): filter_query |= Q(node_id=node_id) filter_query |= Q(content_id=node_id) filter_query |= Q(id=node_id) - for node_id in tags_node_ids: - filter_query |= Q(id=node_id) - return queryset.filter(filter_query) + return queryset.filter(Q(id__in=list(tags_node_ids)) | filter_query) def filter_author(self, queryset, name, value): return queryset.filter( @@ -130,9 +131,11 @@ class Meta: ) -class SearchContentNodeViewSet(ContentNodeViewSet): +class SearchContentNodeViewSet(ValuesViewset): + queryset = ContentNode.objects.all() filterset_class = ContentNodeFilter pagination_class = ListPagination + permission_classes = [IsAuthenticated] values = ( "id", "content_id", @@ -162,7 +165,7 @@ def annotate_queryset(self, queryset): 2. Annotate lists of content node and channel pks """ # Get accessible content nodes that match the content id - content_id_query = queryset.filter( + content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter( content_id=OuterRef("content_id") ) From 6d40c5559556adb9894740270bc7e6bc3e67208d Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sat, 9 Jul 2022 19:47:40 +0530 Subject: [PATCH 004/313] Remove location_ids, zero db queries on descendant resource count --- .../contentcuration/dev_settings.py | 4 ++ .../views/ImportFromChannels/BrowsingCard.vue | 9 +-- .../search/viewsets/contentnode.py | 72 ++++++------------- requirements-dev.in | 1 + requirements-dev.txt | 6 +- 5 files changed, 32 insertions(+), 60 deletions(-) diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py index 3dcbd5bf34..9df785afd1 100644 --- a/contentcuration/contentcuration/dev_settings.py +++ b/contentcuration/contentcuration/dev_settings.py @@ -1,4 +1,6 @@ # flake8: noqa +from dotenv import load_dotenv + from .not_production_settings import * # noqa DEBUG = True @@ -17,3 +19,5 @@ 'rest_framework.authentication.TokenAuthentication', ) } + +load_dotenv(override=True) # take environment variables from .env. diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue index d8e3a30a30..f3451c96b8 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue @@ -155,13 +155,8 @@ } return this.$tr('resourcesCount', { count }); }, - numLocations() { - return this.node.location_ids.length; - }, goToLocationLabel() { - return this.numLocations > 1 - ? this.$tr('goToPluralLocationsAction', { count: this.numLocations }) - : this.$tr('goToSingleLocationAction'); + return this.$tr('goToSingleLocationAction'); }, isTopic() { return this.node.kind === ContentKindsNames.TOPIC; @@ -184,8 +179,6 @@ $trs: { tagsList: 'Tags: {tags}', goToSingleLocationAction: 'Go to location', - goToPluralLocationsAction: - 'In {count, number} {count, plural, one {location} other {locations}}', addToClipboardAction: 'Copy to clipboard', resourcesCount: '{count, number} {count, plural, one {resource} other {resources}}', coach: 'Resource for coaches', diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 9c61c6092a..8fab7f4bbe 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -1,14 +1,12 @@ import re -from django.db.models import Case +from django.db.models import ExpressionWrapper from django.db.models import F from django.db.models import IntegerField from django.db.models import OuterRef from django.db.models import Q from django.db.models import Subquery from django.db.models import Value -from django.db.models import When -from django.db.models.functions import Coalesce from django_filters.rest_framework import BooleanFilter from django_filters.rest_framework import CharFilter from le_utils.constants import content_kinds @@ -22,10 +20,9 @@ from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.base import ValuesViewset from contentcuration.viewsets.common import NotNullMapArrayAgg -from contentcuration.viewsets.common import SQArrayAgg -from contentcuration.viewsets.common import SQCount from contentcuration.viewsets.common import UUIDFilter from contentcuration.viewsets.common import UUIDInFilter +from contentcuration.viewsets.contentnode import get_title class ListPagination(CachedListPagination): @@ -136,6 +133,7 @@ class SearchContentNodeViewSet(ValuesViewset): filterset_class = ContentNodeFilter pagination_class = ListPagination permission_classes = [IsAuthenticated] + values = ( "id", "content_id", @@ -154,71 +152,43 @@ class SearchContentNodeViewSet(ValuesViewset): "modified", "parent_id", "changed", - "location_ids", "content_tags", "original_channel_name", ) + field_map = { + "title": get_title, + } + + def get_queryset(self): + queryset = super(SearchContentNodeViewSet, self).get_queryset() + return ContentNode._annotate_channel_id(queryset) + def annotate_queryset(self, queryset): """ 1. Do a distinct by 'content_id,' using the original node if possible 2. Annotate lists of content node and channel pks """ - # Get accessible content nodes that match the content id - content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter( - content_id=OuterRef("content_id") - ) - - # Combine by unique content id - deduped_content_query = ( - content_id_query.filter(content_id=OuterRef("content_id")) - .annotate( - is_original=Case( - When(original_source_node_id=F("node_id"), then=Value(1)), - default=Value(2), - output_field=IntegerField(), - ), - ) - .order_by("is_original", "created") - ) - queryset = queryset.filter( - pk__in=Subquery(deduped_content_query.values_list("id", flat=True)[:1]) - ).order_by() - thumbnails = File.objects.filter( contentnode=OuterRef("id"), preset__thumbnail=True ) - descendant_resources = ( - ContentNode.objects.filter( - tree_id=OuterRef("tree_id"), - lft__gt=OuterRef("lft"), - rght__lt=OuterRef("rght"), - ) - .exclude(kind_id=content_kinds.TOPIC) - .values("id", "role_visibility", "changed") - .order_by() - ) - original_channel_name = Coalesce( - Subquery( - Channel.objects.filter(pk=OuterRef("original_channel_id")).values( - "name" - )[:1] - ), - Subquery( - Channel.objects.filter(main_tree__tree_id=OuterRef("tree_id")).values( - "name" - )[:1] - ), + descendant_resources_count = ExpressionWrapper(((F("rght") - F("lft") - Value(1)) / Value(2)), output_field=IntegerField()) + + channel_name = Subquery( + Channel.objects.filter(pk=OuterRef("channel_id")).values( + "name" + )[:1] ) + queryset = queryset.annotate( - location_ids=SQArrayAgg(content_id_query, field="id"), - resource_count=SQCount(descendant_resources, field="id"), + resource_count=descendant_resources_count, thumbnail_checksum=Subquery(thumbnails.values("checksum")[:1]), thumbnail_extension=Subquery( thumbnails.values("file_format__extension")[:1] ), content_tags=NotNullMapArrayAgg("tags__tag_name"), - original_channel_name=original_channel_name, + original_channel_name=channel_name, ) + return queryset diff --git a/requirements-dev.in b/requirements-dev.in index 6d4a04ff49..ea6f900871 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -41,3 +41,4 @@ tabulate==0.8.2 fonttools flower==0.9.4 minio==7.1.1 +python-dotenv diff --git a/requirements-dev.txt b/requirements-dev.txt index 2961f78cec..0c21ce1c18 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile +# This file is autogenerated by pip-compile with python 3.8 # To update, run: # # pip-compile requirements-dev.in @@ -256,6 +256,8 @@ python-dateutil==2.8.1 # via # -c requirements.txt # faker +python-dotenv==0.20.0 + # via -r requirements-dev.in python-jsonrpc-server==0.4.0 # via python-language-server python-language-server==0.36.2 @@ -286,6 +288,8 @@ rope==0.19.0 # via -r requirements-dev.in ruamel-yaml==0.17.4 # via drf-yasg +ruamel-yaml-clib==0.2.6 + # via ruamel-yaml service-factory==0.1.6 # via -r requirements-dev.in six==1.16.0 From 094f05f7ef0e995ebccf2ba5da9aa41b8267a152 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Tue, 12 Jul 2022 14:19:28 +0530 Subject: [PATCH 005/313] Upgrade django debug toolbar and fix its settings --- .../contentcuration/debug/__init__.py | 0 .../contentcuration/debug/middleware.py | 47 ------------------- .../contentcuration/debug_panel_settings.py | 2 +- requirements-dev.in | 2 +- requirements-dev.txt | 2 +- 5 files changed, 3 insertions(+), 50 deletions(-) delete mode 100644 contentcuration/contentcuration/debug/__init__.py delete mode 100644 contentcuration/contentcuration/debug/middleware.py diff --git a/contentcuration/contentcuration/debug/__init__.py b/contentcuration/contentcuration/debug/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contentcuration/contentcuration/debug/middleware.py b/contentcuration/contentcuration/debug/middleware.py deleted file mode 100644 index 803a94f89b..0000000000 --- a/contentcuration/contentcuration/debug/middleware.py +++ /dev/null @@ -1,47 +0,0 @@ -import threading -import time - -import debug_panel.urls -from debug_panel.cache import cache -from debug_panel.middleware import DebugPanelMiddleware -from django.urls import reverse - - -class CustomDebugPanelMiddleware(DebugPanelMiddleware): - """ - Custom version to fix SQL escaping: - https://github.com/recamshak/django-debug-panel/issues/17#issuecomment-366268893 - """ - - def process_response(self, request, response): - """ - Store the DebugToolbarMiddleware rendered toolbar into a cache store. - The data stored in the cache are then reachable from an URL that is appened - to the HTTP response header under the 'X-debug-data-url' key. - """ - toolbar = self.__class__.debug_toolbars.get( - threading.current_thread().ident, None - ) - - response = super(DebugPanelMiddleware, self).process_response(request, response) - - if toolbar: - # for django-debug-toolbar >= 1.4 - for panel in reversed(toolbar.enabled_panels): - if ( - hasattr(panel, "generate_stats") and not panel.get_stats() - ): # PATCH HERE - panel.generate_stats(request, response) - - cache_key = "%f" % time.time() - cache.set(cache_key, toolbar.render_toolbar()) - - response["X-debug-data-url"] = request.build_absolute_uri( - reverse( - "debug_data", - urlconf=debug_panel.urls, - kwargs={"cache_key": cache_key}, - ) - ) - - return response diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py index 79f9ddac6e..a61b573aab 100644 --- a/contentcuration/contentcuration/debug_panel_settings.py +++ b/contentcuration/contentcuration/debug_panel_settings.py @@ -17,7 +17,7 @@ def custom_show_toolbar(request): # if debug_panel exists, add it to our INSTALLED_APPS INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler") # noqa F405 MIDDLEWARE += ( # noqa F405 - "contentcuration.debug.middleware.CustomDebugPanelMiddleware", + "debug_toolbar.middleware.DebugToolbarMiddleware", ) DEBUG_TOOLBAR_CONFIG = { "SHOW_TOOLBAR_CALLBACK": custom_show_toolbar, diff --git a/requirements-dev.in b/requirements-dev.in index ea6f900871..a398514373 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -2,7 +2,7 @@ python-language-server django-concurrent-test-helper==0.7.0 django-debug-panel==0.8.3 -django-debug-toolbar==1.9.1 +django-debug-toolbar==3.5.0 flake8==3.4.1 whitenoise Pympler diff --git a/requirements-dev.txt b/requirements-dev.txt index 0c21ce1c18..1b98ce6bc3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -81,7 +81,7 @@ django-concurrent-test-helper==0.7.0 # via -r requirements-dev.in django-debug-panel==0.8.3 # via -r requirements-dev.in -django-debug-toolbar==1.9.1 +django-debug-toolbar==3.5.0 # via # -r requirements-dev.in # django-debug-panel From 701ddc9a0176882c077839479e0bc0990367ddf1 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 22 Jul 2022 15:02:25 +0530 Subject: [PATCH 006/313] Remove unnecessary dev settings --- .../contentcuration/debug_panel_settings.py | 8 ++++++-- contentcuration/contentcuration/dev_settings.py | 15 --------------- requirements-dev.in | 1 - requirements-dev.txt | 2 -- 4 files changed, 6 insertions(+), 20 deletions(-) diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py index a61b573aab..5dc3c6783f 100644 --- a/contentcuration/contentcuration/debug_panel_settings.py +++ b/contentcuration/contentcuration/debug_panel_settings.py @@ -1,8 +1,12 @@ from .dev_settings import * # noqa -# These endpoints will throw an error on the django debug panel +# These endpoints will throw an error on the django debug panel. EXCLUDED_DEBUG_URLS = [ "/content/storage", + + # Disabling task API because as soon as the task API gets polled + # the current request data gets overwritten. + "/api/task", ] DEBUG_PANEL_ACTIVE = True @@ -14,7 +18,7 @@ def custom_show_toolbar(request): ) # noqa F405 -# if debug_panel exists, add it to our INSTALLED_APPS +# if debug_panel exists, add it to our INSTALLED_APPS. INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler") # noqa F405 MIDDLEWARE += ( # noqa F405 "debug_toolbar.middleware.DebugToolbarMiddleware", diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py index 9df785afd1..d81d23a993 100644 --- a/contentcuration/contentcuration/dev_settings.py +++ b/contentcuration/contentcuration/dev_settings.py @@ -1,6 +1,4 @@ # flake8: noqa -from dotenv import load_dotenv - from .not_production_settings import * # noqa DEBUG = True @@ -8,16 +6,3 @@ ROOT_URLCONF = "contentcuration.dev_urls" INSTALLED_APPS += ("drf_yasg",) - -REST_FRAMEWORK = { - 'DEFAULT_PERMISSION_CLASSES': ( - 'rest_framework.permissions.IsAuthenticated', - ), - 'DEFAULT_AUTHENTICATION_CLASSES': ( - 'rest_framework.authentication.SessionAuthentication', - 'rest_framework.authentication.BasicAuthentication', - 'rest_framework.authentication.TokenAuthentication', - ) -} - -load_dotenv(override=True) # take environment variables from .env. diff --git a/requirements-dev.in b/requirements-dev.in index a398514373..8a83c768c5 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -41,4 +41,3 @@ tabulate==0.8.2 fonttools flower==0.9.4 minio==7.1.1 -python-dotenv diff --git a/requirements-dev.txt b/requirements-dev.txt index 1b98ce6bc3..cf06183f09 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -256,8 +256,6 @@ python-dateutil==2.8.1 # via # -c requirements.txt # faker -python-dotenv==0.20.0 - # via -r requirements-dev.in python-jsonrpc-server==0.4.0 # via python-language-server python-language-server==0.36.2 From 1a14749e79047fcbe52670419650fd59c2cc3e40 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 22 Jul 2022 15:33:52 +0530 Subject: [PATCH 007/313] Add .envrc to .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 19bfec72a5..63b5c6d221 100644 --- a/.gitignore +++ b/.gitignore @@ -65,7 +65,8 @@ docs/_build/ # PyBuilder target/ -# virtualenv, pipenv +# virtualenv, pipenv, direnv +.envrc .env venv .venv From bff595cdb78b0c7089bd931b82461e831077df26 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 12 Aug 2022 21:57:44 +0530 Subject: [PATCH 008/313] Add vector search column & indexes, also GiST trigram index --- .../0138_contentnode_search_vector.py | 34 +++++++++++++++++++ contentcuration/contentcuration/models.py | 19 +++++++++++ contentcuration/contentcuration/settings.py | 1 + .../contentcuration/utils/publish.py | 7 +++- .../search/viewsets/contentnode.py | 21 +++--------- 5 files changed, 64 insertions(+), 18 deletions(-) create mode 100644 contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py diff --git a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py new file mode 100644 index 0000000000..e3b24b8905 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py @@ -0,0 +1,34 @@ +# Generated by Django 3.2.13 on 2022-08-10 19:20 +import django.contrib.postgres.indexes +import django.contrib.postgres.search +from django.contrib.postgres.operations import AddIndexConcurrently +from django.contrib.postgres.operations import TrigramExtension +from django.db import migrations + + +class Migration(migrations.Migration): + + atomic = False + + dependencies = [ + ('contentcuration', '0137_channelhistory'), + ] + + operations = [ + # Installs the pg_trgm module that comes pre-bundled with PostgreSQL 9.6. + TrigramExtension(), + + migrations.AddField( + model_name='contentnode', + name='title_description_search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True), + ), + AddIndexConcurrently( + model_name='contentnode', + index=django.contrib.postgres.indexes.GinIndex(fields=['title_description_search_vector'], name='node_search_vector_gin_idx'), + ), + AddIndexConcurrently( + model_name='contenttag', + index=django.contrib.postgres.indexes.GistIndex(fields=['tag_name'], name='contenttag_tag_name_gist_idx', opclasses=['gist_trgm_ops']), + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index f7fea1aee2..e72e05b012 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -13,6 +13,10 @@ from django.contrib.auth.base_user import AbstractBaseUser from django.contrib.auth.base_user import BaseUserManager from django.contrib.auth.models import PermissionsMixin +from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.indexes import GistIndex +from django.contrib.postgres.search import SearchVector +from django.contrib.postgres.search import SearchVectorField from django.core.cache import cache from django.core.exceptions import MultipleObjectsReturned from django.core.exceptions import ObjectDoesNotExist @@ -1082,6 +1086,9 @@ def delete(self, *args, **kwargs): self.secret_token.delete() +CONTENT_TAG_NAME__INDEX_NAME = "contenttag_tag_name_gist_idx" + + class ContentTag(models.Model): id = UUIDField(primary_key=True, default=uuid.uuid4) tag_name = models.CharField(max_length=50) @@ -1093,6 +1100,7 @@ def __str__(self): class Meta: unique_together = ['tag_name', 'channel'] + indexes = [GistIndex(fields=["tag_name"], name=CONTENT_TAG_NAME__INDEX_NAME, opclasses=["gist_trgm_ops"])] def delegate_manager(method): @@ -1136,6 +1144,12 @@ def __str__(self): NODE_ID_INDEX_NAME = "node_id_idx" NODE_MODIFIED_INDEX_NAME = "node_modified_idx" NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx" +NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx" + +# Ours postgres full text search configuration. +POSTGRES_FTS_CONFIG = "simple" +# Search vector to create tsvector of title and description concatenated. +POSTGRES_SEARCH_VECTOR = SearchVector("title", "description", config=POSTGRES_FTS_CONFIG) class ContentNode(MPTTModel, models.Model): @@ -1231,6 +1245,10 @@ class ContentNode(MPTTModel, models.Model): # this duration should be in seconds. suggested_duration = models.IntegerField(blank=True, null=True, help_text="Suggested duration for the content node (in seconds)") + # A field to store the ts_vector form of (title + ' ' + description). + # This significantly increases the search performance. + title_description_search_vector = SearchVectorField(blank=True, null=True) + objects = CustomContentNodeTreeManager() # Track all updates and ignore a blacklist of attributes @@ -1830,6 +1848,7 @@ class Meta: indexes = [ models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME), models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME), + GinIndex(fields=["title_description_search_vector"], name=NODE_SEARCH_VECTOR_GIN_INDEX_NAME), ] diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index ae637542ba..b79c3b641e 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -86,6 +86,7 @@ 'webpack_loader', 'django_filters', 'mathfilters', + 'django.contrib.postgres', ) SESSION_ENGINE = "django.contrib.sessions.backends.cached_db" diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 8dfd5ef428..88b48be574 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -187,6 +187,10 @@ def queue_get_return_none_when_empty(): logging.debug("Mapping node with id {id}".format( id=node.pk)) + # Update tsvector for this node. + node.title_description_search_vector = ccmodels.POSTGRES_SEARCH_VECTOR + node.save(update_fields=["title_description_search_vector"]) + if node.get_descendants(include_self=True).exclude(kind_id=content_kinds.TOPIC).exists() and node.complete: children = (node.children.all()) node_queue.extend(children) @@ -428,7 +432,8 @@ def process_assessment_metadata(ccnode, kolibrinode): exercise_data_type = "" if exercise_data.get('mastery_model'): exercise_data_type = exercise_data.get('mastery_model') - if exercise_data.get('option') and exercise_data.get('option').get('completion_criteria') and exercise_data.get('option').get('completion_criteria').get('mastery_model'): + if exercise_data.get('option') and exercise_data.get('option').get('completion_criteria') \ + and exercise_data.get('option').get('completion_criteria').get('mastery_model'): exercise_data_type = exercise_data.get('option').get('completion_criteria').get('mastery_model') mastery_model = {'type': exercise_data_type or exercises.M_OF_N} diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 8fab7f4bbe..864897db25 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -1,5 +1,6 @@ import re +from django.contrib.postgres.search import SearchQuery from django.db.models import ExpressionWrapper from django.db.models import F from django.db.models import IntegerField @@ -16,6 +17,7 @@ from contentcuration.models import Channel from contentcuration.models import ContentNode from contentcuration.models import File +from contentcuration.models import POSTGRES_FTS_CONFIG from contentcuration.utils.pagination import CachedListPagination from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.base import ValuesViewset @@ -62,23 +64,8 @@ def filter_channel_list(self, queryset, name, value): return queryset.filter(channel_id__in=list(channel_ids)) def filter_keywords(self, queryset, name, value): - filter_query = Q(title__icontains=value) | Q(description__icontains=value) - - tags_node_ids = ContentNode.tags.through.objects.filter( - contenttag__tag_name__icontains=value - ).values_list("contentnode_id", flat=True) - - # Check if we have a Kolibri node id or ids and add them to the search if so. - # Add to, rather than replace, the filters so that we never misinterpret a search term as a UUID. - # node_ids = uuid_re.findall(value) + list(tags_node_ids) - node_ids = uuid_re.findall(value) - for node_id in node_ids: - # check for the major ID types - filter_query |= Q(node_id=node_id) - filter_query |= Q(content_id=node_id) - filter_query |= Q(id=node_id) - - return queryset.filter(Q(id__in=list(tags_node_ids)) | filter_query) + search_tsquery = SearchQuery(value=value, config=POSTGRES_FTS_CONFIG, search_type="plain") + return queryset.filter(title_description_search_vector=search_tsquery) def filter_author(self, queryset, name, value): return queryset.filter( From c0e55ee704ecf7d7ba997c2b6108233097489653 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 12 Aug 2022 22:21:46 +0530 Subject: [PATCH 009/313] Remove cyclic migration conflicts --- ...de_search_vector.py => 0141_contentnode_search_vector.py} | 2 +- contentcuration/search/viewsets/contentnode.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) rename contentcuration/contentcuration/migrations/{0138_contentnode_search_vector.py => 0141_contentnode_search_vector.py} (95%) diff --git a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py similarity index 95% rename from contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py rename to contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py index e3b24b8905..551f42d5b7 100644 --- a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py +++ b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py @@ -11,7 +11,7 @@ class Migration(migrations.Migration): atomic = False dependencies = [ - ('contentcuration', '0137_channelhistory'), + ('contentcuration', '0140_delete_task'), ] operations = [ diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 864897db25..e192a3ccb7 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -24,7 +24,6 @@ from contentcuration.viewsets.common import NotNullMapArrayAgg from contentcuration.viewsets.common import UUIDFilter from contentcuration.viewsets.common import UUIDInFilter -from contentcuration.viewsets.contentnode import get_title class ListPagination(CachedListPagination): @@ -143,10 +142,6 @@ class SearchContentNodeViewSet(ValuesViewset): "original_channel_name", ) - field_map = { - "title": get_title, - } - def get_queryset(self): queryset = super(SearchContentNodeViewSet, self).get_queryset() return ContentNode._annotate_channel_id(queryset) From 34e8436886516e92478dce5f364a2774ce63f113 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 12 Aug 2022 22:25:57 +0530 Subject: [PATCH 010/313] Fix wrong indentation happened due to merge conflict --- .../contentcuration/utils/publish.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 213682b539..732d92d125 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -198,17 +198,17 @@ def queue_get_return_none_when_empty(): children = (node.children.all()) node_queue.extend(children) - kolibrinode = create_bare_contentnode(node, default_language, channel_id, channel_name) - - if node.kind.kind == content_kinds.EXERCISE: - exercise_data = process_assessment_metadata(node, kolibrinode) - if force_exercises or node.changed or not \ - node.files.filter(preset_id=format_presets.EXERCISE).exists(): - create_perseus_exercise(node, kolibrinode, exercise_data, user_id=user_id) - elif node.kind.kind == content_kinds.SLIDESHOW: - create_slideshow_manifest(node, kolibrinode, user_id=user_id) - create_associated_file_objects(kolibrinode, node) - map_tags_to_node(kolibrinode, node) + kolibrinode = create_bare_contentnode(node, default_language, channel_id, channel_name) + + if node.kind.kind == content_kinds.EXERCISE: + exercise_data = process_assessment_metadata(node, kolibrinode) + if force_exercises or node.changed or not \ + node.files.filter(preset_id=format_presets.EXERCISE).exists(): + create_perseus_exercise(node, kolibrinode, exercise_data, user_id=user_id) + elif node.kind.kind == content_kinds.SLIDESHOW: + create_slideshow_manifest(node, kolibrinode, user_id=user_id) + create_associated_file_objects(kolibrinode, node) + map_tags_to_node(kolibrinode, node) if progress_tracker: progress_tracker.increment(increment=percent_per_node) From e00c512f40718858c154e6e3b0584287a836cef8 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sun, 14 Aug 2022 19:07:20 +0530 Subject: [PATCH 011/313] Add a command for setting tsvectors and fix tests --- .../management/commands/set_tsvectors.py | 71 ++++++++++++++++++ contentcuration/contentcuration/models.py | 12 +++ contentcuration/contentcuration/settings.py | 3 +- contentcuration/search/tests/test_search.py | 73 ++----------------- .../search/viewsets/contentnode.py | 15 +--- 5 files changed, 96 insertions(+), 78 deletions(-) create mode 100644 contentcuration/contentcuration/management/commands/set_tsvectors.py diff --git a/contentcuration/contentcuration/management/commands/set_tsvectors.py b/contentcuration/contentcuration/management/commands/set_tsvectors.py new file mode 100644 index 0000000000..6ed9819ff5 --- /dev/null +++ b/contentcuration/contentcuration/management/commands/set_tsvectors.py @@ -0,0 +1,71 @@ +""" +This command sets tsvector in title_description_search_vector field in batches. +The batches are created on the basis of channel_id. This enables resumption. Also helps +in cases of failure or memory overflow. +""" +import logging as logmodule + +from django.core.cache import cache +from django.core.management.base import BaseCommand + +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import POSTGRES_SEARCH_VECTOR + + +logmodule.basicConfig(level=logmodule.INFO) +logging = logmodule.getLogger(__name__) + + +UPDATED_TS_VECTORS_CACHE_KEY = "tsvectors_updated_for_channel_ids" +UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY = "tsvectors_updated_for_null_channels" + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--public", + action="store_true", + help="Set tsvector for only the public channel nodes instead of all nodes.", + ) + parser.add_argument( + "--no-cache", + action="store_true", + help="Disables the cache. This updates all previously updated nodes.", + ) + + def handle(self, *args, **options): + if options["no_cache"]: + updated_channel_ids = [] + do_update_nodes_with_null_channel_id = True + else: + updated_channel_ids = [] if cache.get(UPDATED_TS_VECTORS_CACHE_KEY) is None else cache.get(UPDATED_TS_VECTORS_CACHE_KEY) + do_update_nodes_with_null_channel_id = not cache.get(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY) + + if options["public"]: + to_update_channel_ids = list(Channel.get_public_channels().exclude(id__in=updated_channel_ids).values_list("id", flat=True)) + do_update_nodes_with_null_channel_id = False + logging.info("Started setting tsvector for public channel nodes.") + else: + to_update_channel_ids = list(Channel.objects.exclude(id__in=updated_channel_ids).values_list("id", flat=True)) + logging.info("Started setting tsvector for all nodes.") + + annotated_contentnode_qs = ContentNode._annotate_channel_id(ContentNode.objects) + + for channel_id in to_update_channel_ids: + logging.info("Setting tsvector for nodes of channel {}.".format(channel_id)) + annotated_contentnode_qs.filter(channel_id=channel_id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) + updated_channel_ids.append(channel_id) + cache.set(UPDATED_TS_VECTORS_CACHE_KEY, updated_channel_ids, None) + logging.info("Finished setting tsvector for nodes of channel {}.".format(channel_id)) + + if do_update_nodes_with_null_channel_id: + logging.info("Setting tsvector for nodes with NULL channel_id.") + annotated_contentnode_qs.filter(channel_id__isnull=True).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) + cache.set(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY, True, None) + logging.info("Finished setting tsvector for nodes with NULL channel_id.") + + if options["public"]: + logging.info("Finished setting tsvector for public channel nodes.") + else: + logging.info("Finished setting tsvector for all nodes.") diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index cebbfea959..8b1c331097 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -14,6 +14,7 @@ from django.contrib.auth.models import PermissionsMixin from django.contrib.postgres.indexes import GinIndex from django.contrib.postgres.indexes import GistIndex +from django.contrib.postgres.search import SearchQuery from django.contrib.postgres.search import SearchVector from django.contrib.postgres.search import SearchVectorField from django.contrib.sessions.models import Session @@ -1356,6 +1357,12 @@ def filter_view_queryset(cls, queryset, user): | Q(public=True) ) + @classmethod + def search(self, queryset, search_term): + search_query = Q(title_description_search_vector=SearchQuery(value=search_term, config=POSTGRES_FTS_CONFIG, search_type="plain")) + tags_query = Q(tags__tag_name__icontains=search_term) + return queryset.filter(search_query | tags_query) + @raise_if_unsaved def get_root(self): # Only topics can be root nodes @@ -1839,8 +1846,10 @@ def set_default_learning_activity(self): def save(self, skip_lock=False, *args, **kwargs): if self._state.adding: + is_create = True self.on_create() else: + is_create = False self.on_update() # Logic borrowed from mptt - do a simple check to see if we have changed @@ -1884,6 +1893,9 @@ def save(self, skip_lock=False, *args, **kwargs): if changed_ids: ContentNode.objects.filter(id__in=changed_ids).update(changed=True) + if is_create: + ContentNode.filter_by_pk(pk=self.id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) + # Copied from MPTT save.alters_data = True diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index 92ed582a57..1e3cdb1e2c 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -221,7 +221,8 @@ } IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False - +TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES") or False +TSVECTOR_SET_FOR_ALL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_NODES") or False DATABASE_ROUTERS = [ "kolibri_content.router.ContentDBRouter", diff --git a/contentcuration/search/tests/test_search.py b/contentcuration/search/tests/test_search.py index bcb3b2aa99..6b3d34cc41 100644 --- a/contentcuration/search/tests/test_search.py +++ b/contentcuration/search/tests/test_search.py @@ -31,67 +31,14 @@ def test_filter_channels_by_edit(self): self.assertEqual(response.status_code, 200, response.content) self.assertNotEqual(response.data["results"], []) - def test_search_result(self): - # Create two users - user_a = testdata.user(email="a@a.com") - user_b = testdata.user(email="b@b.com") - - # Create two channels with two editors - test_tree_data = { - "node_id": "00000000000000000000000000000000", - "title": "Root topic node", - "kind_id": "topic", - "children": [ - { - "node_id": "00000000000000000000000000000001", - "title": "Kolibri video", - "kind_id": "video", - }, - ] - } - - channel_a = testdata.channel(name="user_a_channel", main_tree_data=test_tree_data) - channel_a.editors.add(user_a) - - channel_b = testdata.channel(name="user_b_channel", create_main_tree=False) - channel_b.editors.add(user_b) - - # Publish channel_a - channel_a.main_tree.publishing = False - channel_a.main_tree.changed = False - channel_a.main_tree.published = True - channel_a.main_tree.save() - channel_a.public = True - channel_a.save() - - # Import resources from channel_a to channel_b - channel_a.main_tree.copy_to(channel_b.main_tree, batch_size=1) - channel_b.main_tree.refresh_from_db() - - # Send request from user_b to the search endpoint - self.client.force_authenticate(user=user_b) - response = self.client.get( - reverse("search-list"), - data={ - "channel_list": "public", - "keywords": "video" - }, - format="json", - ) - - # Assert whether the location_ids are of accessible nodes or not - kolibri_video_node = channel_b.main_tree.get_descendants().filter(title="Kolibri video").first() - - # The ids in location_ids should be of channel_b's ContentNode only - self.assertEqual(len(response.data["results"][0]["location_ids"]), 1) - self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id) - - def test_channel_list_filter_and_location_ids(self): + def test_search(self): users = [] channels = [] + for i in range(4): user = testdata.user(email="a{}@a.com".format(i)) users.append(user) + channel = Channel.objects.create(name="user_a{}_channel".format(i)) channel.save() channels.append(channel) @@ -99,6 +46,7 @@ def test_channel_list_filter_and_location_ids(self): public_channel, editable_channel, viewable_channel, inaccessible_channel = channels + # Create public video node and publish it. public_video_node = testdata.node({ "title": "Kolibri video", "kind_id": "video", @@ -108,13 +56,14 @@ def test_channel_list_filter_and_location_ids(self): public_video_node.changed = False public_video_node.save() + # Publish the public_channel. public_channel.main_tree.published = True public_channel.main_tree.changed = False public_channel.main_tree.save() - public_channel.public = True public_channel.save() + # Set user_b viewable channel. user_b = users[1] viewable_channel.viewers.add(user_b) @@ -123,6 +72,7 @@ def test_channel_list_filter_and_location_ids(self): public_video_node.copy_to(target=viewable_channel.main_tree) public_video_node.copy_to(target=inaccessible_channel.main_tree) + # Get different nodes based on access. editable_channel.main_tree.refresh_from_db() editable_video_node = editable_channel.main_tree.get_descendants().first() viewable_channel.main_tree.refresh_from_db() @@ -130,7 +80,7 @@ def test_channel_list_filter_and_location_ids(self): inaccessible_channel.main_tree.refresh_from_db() inaccessible_video_node = inaccessible_channel.main_tree.get_descendants().first() - # Send request from user_b to the search endpoint + # Send request from user_b to the search endpoint. self.client.force_authenticate(user=user_b) for channel_list in ("public", "edit", "view"): @@ -152,10 +102,3 @@ def test_channel_list_filter_and_location_ids(self): self.assertEqual(result["id"], editable_video_node.id) elif channel_list == "view": self.assertEqual(result["id"], viewable_video_node.id) - - location_ids = result["location_ids"] - self.assertEqual(len(location_ids), 3) - self.assertIn(editable_video_node.id, location_ids) - self.assertIn(viewable_video_node.id, location_ids) - self.assertIn(public_video_node.id, location_ids) - self.assertNotIn(inaccessible_video_node.id, location_ids) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index e192a3ccb7..df5cf831cb 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -1,6 +1,5 @@ import re -from django.contrib.postgres.search import SearchQuery from django.db.models import ExpressionWrapper from django.db.models import F from django.db.models import IntegerField @@ -17,7 +16,6 @@ from contentcuration.models import Channel from contentcuration.models import ContentNode from contentcuration.models import File -from contentcuration.models import POSTGRES_FTS_CONFIG from contentcuration.utils.pagination import CachedListPagination from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.base import ValuesViewset @@ -32,9 +30,6 @@ class ListPagination(CachedListPagination): max_page_size = 100 -uuid_re = re.compile("([a-f0-9]{32})") - - class ContentNodeFilter(RequiredFilterSet): keywords = CharFilter(method="filter_keywords") languages = CharFilter(method="filter_languages") @@ -63,8 +58,7 @@ def filter_channel_list(self, queryset, name, value): return queryset.filter(channel_id__in=list(channel_ids)) def filter_keywords(self, queryset, name, value): - search_tsquery = SearchQuery(value=value, config=POSTGRES_FTS_CONFIG, search_type="plain") - return queryset.filter(title_description_search_vector=search_tsquery) + return ContentNode.search(queryset=queryset, search_term=value) def filter_author(self, queryset, name, value): return queryset.filter( @@ -115,7 +109,6 @@ class Meta: class SearchContentNodeViewSet(ValuesViewset): - queryset = ContentNode.objects.all() filterset_class = ContentNodeFilter pagination_class = ListPagination permission_classes = [IsAuthenticated] @@ -143,13 +136,11 @@ class SearchContentNodeViewSet(ValuesViewset): ) def get_queryset(self): - queryset = super(SearchContentNodeViewSet, self).get_queryset() - return ContentNode._annotate_channel_id(queryset) + return ContentNode._annotate_channel_id(ContentNode.objects) def annotate_queryset(self, queryset): """ - 1. Do a distinct by 'content_id,' using the original node if possible - 2. Annotate lists of content node and channel pks + Annotates thumbnails, resources count and channel name. """ thumbnails = File.objects.filter( contentnode=OuterRef("id"), preset__thumbnail=True From f3280d94432d8d54fdb1070b286b6b35ad849cf5 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sun, 14 Aug 2022 20:43:13 +0530 Subject: [PATCH 012/313] Remove grade_level default to pass failing tests --- contentcuration/contentcuration/settings.py | 2 -- contentcuration/contentcuration/tests/testdata.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index 1e3cdb1e2c..a8e5e8678b 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -221,8 +221,6 @@ } IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False -TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES") or False -TSVECTOR_SET_FOR_ALL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_NODES") or False DATABASE_ROUTERS = [ "kolibri_content.router.ContentDBRouter", diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index b857dfb513..5560e853a5 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -145,7 +145,6 @@ def node(data, parent=None): sort_order=data.get('sort_order', 1), complete=True, extra_fields=data.get('extra_fields'), - grade_levels="{}", ) new_node.save() video_file = fileobj_video(contents=b"Video File") @@ -172,7 +171,6 @@ def node(data, parent=None): content_id=data.get('content_id') or data['node_id'], sort_order=data.get('sort_order', 1), complete=True, - grade_levels="{}", ) new_node.save() From 3ff0edd040d1ba5a2ab0f958b4e173152821c85a Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 8 Sep 2022 13:02:32 +0530 Subject: [PATCH 013/313] Full text search models and data migrations --- .../contentcuration/debug_panel_settings.py | 5 +- .../management/commands/set_tsvectors.py | 71 ------------------- .../0141_contentnode_search_vector.py | 34 --------- contentcuration/contentcuration/models.py | 27 ------- .../contentcuration/viewsets/contentnode.py | 3 +- contentcuration/search/admin.py | 1 - contentcuration/search/management/__init__.py | 0 .../search/management/commands/__init__.py | 0 .../commands/set_channel_tsvectors.py | 57 +++++++++++++++ .../commands/set_contentnode_tsvectors.py | 60 ++++++++++++++++ .../search/migrations/0003_fulltextsearch.py | 54 ++++++++++++++ contentcuration/search/models.py | 53 ++++++++++++++ 12 files changed, 228 insertions(+), 137 deletions(-) delete mode 100644 contentcuration/contentcuration/management/commands/set_tsvectors.py delete mode 100644 contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py delete mode 100644 contentcuration/search/admin.py create mode 100644 contentcuration/search/management/__init__.py create mode 100644 contentcuration/search/management/commands/__init__.py create mode 100644 contentcuration/search/management/commands/set_channel_tsvectors.py create mode 100644 contentcuration/search/management/commands/set_contentnode_tsvectors.py create mode 100644 contentcuration/search/migrations/0003_fulltextsearch.py diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py index 5dc3c6783f..c097acbbc6 100644 --- a/contentcuration/contentcuration/debug_panel_settings.py +++ b/contentcuration/contentcuration/debug_panel_settings.py @@ -4,9 +4,10 @@ EXCLUDED_DEBUG_URLS = [ "/content/storage", - # Disabling task API because as soon as the task API gets polled + # Disabling sync API because as soon as the sync API gets polled # the current request data gets overwritten. - "/api/task", + # Can be removed after websockets deployment. + "/api/sync", ] DEBUG_PANEL_ACTIVE = True diff --git a/contentcuration/contentcuration/management/commands/set_tsvectors.py b/contentcuration/contentcuration/management/commands/set_tsvectors.py deleted file mode 100644 index 6ed9819ff5..0000000000 --- a/contentcuration/contentcuration/management/commands/set_tsvectors.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -This command sets tsvector in title_description_search_vector field in batches. -The batches are created on the basis of channel_id. This enables resumption. Also helps -in cases of failure or memory overflow. -""" -import logging as logmodule - -from django.core.cache import cache -from django.core.management.base import BaseCommand - -from contentcuration.models import Channel -from contentcuration.models import ContentNode -from contentcuration.models import POSTGRES_SEARCH_VECTOR - - -logmodule.basicConfig(level=logmodule.INFO) -logging = logmodule.getLogger(__name__) - - -UPDATED_TS_VECTORS_CACHE_KEY = "tsvectors_updated_for_channel_ids" -UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY = "tsvectors_updated_for_null_channels" - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument( - "--public", - action="store_true", - help="Set tsvector for only the public channel nodes instead of all nodes.", - ) - parser.add_argument( - "--no-cache", - action="store_true", - help="Disables the cache. This updates all previously updated nodes.", - ) - - def handle(self, *args, **options): - if options["no_cache"]: - updated_channel_ids = [] - do_update_nodes_with_null_channel_id = True - else: - updated_channel_ids = [] if cache.get(UPDATED_TS_VECTORS_CACHE_KEY) is None else cache.get(UPDATED_TS_VECTORS_CACHE_KEY) - do_update_nodes_with_null_channel_id = not cache.get(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY) - - if options["public"]: - to_update_channel_ids = list(Channel.get_public_channels().exclude(id__in=updated_channel_ids).values_list("id", flat=True)) - do_update_nodes_with_null_channel_id = False - logging.info("Started setting tsvector for public channel nodes.") - else: - to_update_channel_ids = list(Channel.objects.exclude(id__in=updated_channel_ids).values_list("id", flat=True)) - logging.info("Started setting tsvector for all nodes.") - - annotated_contentnode_qs = ContentNode._annotate_channel_id(ContentNode.objects) - - for channel_id in to_update_channel_ids: - logging.info("Setting tsvector for nodes of channel {}.".format(channel_id)) - annotated_contentnode_qs.filter(channel_id=channel_id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) - updated_channel_ids.append(channel_id) - cache.set(UPDATED_TS_VECTORS_CACHE_KEY, updated_channel_ids, None) - logging.info("Finished setting tsvector for nodes of channel {}.".format(channel_id)) - - if do_update_nodes_with_null_channel_id: - logging.info("Setting tsvector for nodes with NULL channel_id.") - annotated_contentnode_qs.filter(channel_id__isnull=True).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) - cache.set(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY, True, None) - logging.info("Finished setting tsvector for nodes with NULL channel_id.") - - if options["public"]: - logging.info("Finished setting tsvector for public channel nodes.") - else: - logging.info("Finished setting tsvector for all nodes.") diff --git a/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py deleted file mode 100644 index 551f42d5b7..0000000000 --- a/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py +++ /dev/null @@ -1,34 +0,0 @@ -# Generated by Django 3.2.13 on 2022-08-10 19:20 -import django.contrib.postgres.indexes -import django.contrib.postgres.search -from django.contrib.postgres.operations import AddIndexConcurrently -from django.contrib.postgres.operations import TrigramExtension -from django.db import migrations - - -class Migration(migrations.Migration): - - atomic = False - - dependencies = [ - ('contentcuration', '0140_delete_task'), - ] - - operations = [ - # Installs the pg_trgm module that comes pre-bundled with PostgreSQL 9.6. - TrigramExtension(), - - migrations.AddField( - model_name='contentnode', - name='title_description_search_vector', - field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True), - ), - AddIndexConcurrently( - model_name='contentnode', - index=django.contrib.postgres.indexes.GinIndex(fields=['title_description_search_vector'], name='node_search_vector_gin_idx'), - ), - AddIndexConcurrently( - model_name='contenttag', - index=django.contrib.postgres.indexes.GistIndex(fields=['tag_name'], name='contenttag_tag_name_gist_idx', opclasses=['gist_trgm_ops']), - ), - ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 8b1c331097..a2a4af786d 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -12,11 +12,6 @@ from django.contrib.auth.base_user import AbstractBaseUser from django.contrib.auth.base_user import BaseUserManager from django.contrib.auth.models import PermissionsMixin -from django.contrib.postgres.indexes import GinIndex -from django.contrib.postgres.indexes import GistIndex -from django.contrib.postgres.search import SearchQuery -from django.contrib.postgres.search import SearchVector -from django.contrib.postgres.search import SearchVectorField from django.contrib.sessions.models import Session from django.core.cache import cache from django.core.exceptions import MultipleObjectsReturned @@ -1111,7 +1106,6 @@ def __str__(self): class Meta: unique_together = ['tag_name', 'channel'] - indexes = [GistIndex(fields=["tag_name"], name=CONTENT_TAG_NAME__INDEX_NAME, opclasses=["gist_trgm_ops"])] def delegate_manager(method): @@ -1156,11 +1150,6 @@ def __str__(self): NODE_MODIFIED_INDEX_NAME = "node_modified_idx" NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx" NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx" - -# Ours postgres full text search configuration. -POSTGRES_FTS_CONFIG = "simple" -# Search vector to create tsvector of title and description concatenated. -POSTGRES_SEARCH_VECTOR = SearchVector("title", "description", config=POSTGRES_FTS_CONFIG) CONTENTNODE_TREE_ID_CACHE_KEY = "contentnode_{pk}__tree_id" @@ -1257,10 +1246,6 @@ class ContentNode(MPTTModel, models.Model): # this duration should be in seconds. suggested_duration = models.IntegerField(blank=True, null=True, help_text="Suggested duration for the content node (in seconds)") - # A field to store the ts_vector form of (title + ' ' + description). - # This significantly increases the search performance. - title_description_search_vector = SearchVectorField(blank=True, null=True) - objects = CustomContentNodeTreeManager() # Track all updates and ignore a blacklist of attributes @@ -1357,12 +1342,6 @@ def filter_view_queryset(cls, queryset, user): | Q(public=True) ) - @classmethod - def search(self, queryset, search_term): - search_query = Q(title_description_search_vector=SearchQuery(value=search_term, config=POSTGRES_FTS_CONFIG, search_type="plain")) - tags_query = Q(tags__tag_name__icontains=search_term) - return queryset.filter(search_query | tags_query) - @raise_if_unsaved def get_root(self): # Only topics can be root nodes @@ -1846,10 +1825,8 @@ def set_default_learning_activity(self): def save(self, skip_lock=False, *args, **kwargs): if self._state.adding: - is_create = True self.on_create() else: - is_create = False self.on_update() # Logic borrowed from mptt - do a simple check to see if we have changed @@ -1893,9 +1870,6 @@ def save(self, skip_lock=False, *args, **kwargs): if changed_ids: ContentNode.objects.filter(id__in=changed_ids).update(changed=True) - if is_create: - ContentNode.filter_by_pk(pk=self.id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR) - # Copied from MPTT save.alters_data = True @@ -1938,7 +1912,6 @@ class Meta: indexes = [ models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME), models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME), - GinIndex(fields=["title_description_search_vector"], name=NODE_SEARCH_VECTOR_GIN_INDEX_NAME), ] diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py index dd942b161e..cc6ad0727f 100644 --- a/contentcuration/contentcuration/viewsets/contentnode.py +++ b/contentcuration/contentcuration/viewsets/contentnode.py @@ -633,8 +633,7 @@ def delete_from_changes(self, changes): def dict_if_none(obj, field_name=None): - value = obj.get(field_name) - return value if value else {} + return obj[field_name] if obj[field_name] else {} # Apply mixin first to override ValuesViewset diff --git a/contentcuration/search/admin.py b/contentcuration/search/admin.py deleted file mode 100644 index 846f6b4061..0000000000 --- a/contentcuration/search/admin.py +++ /dev/null @@ -1 +0,0 @@ -# Register your models here. diff --git a/contentcuration/search/management/__init__.py b/contentcuration/search/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/search/management/commands/__init__.py b/contentcuration/search/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py new file mode 100644 index 0000000000..4d27927f53 --- /dev/null +++ b/contentcuration/search/management/commands/set_channel_tsvectors.py @@ -0,0 +1,57 @@ +""" +This command inserts in bulk channel tsvectors to the ChannelFullTextSearch table. +""" +import logging as logmodule +import time + +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import OuterRef +from search.models import CHANNEL_KEYWORDS_TSVECTOR +from search.models import ChannelFullTextSearch + +from contentcuration.models import Channel +from contentcuration.viewsets.channel import primary_token_subquery + + +logmodule.basicConfig(level=logmodule.INFO) +logging = logmodule.getLogger("command") + +CHUNKSIZE = 5000 + + +class Command(BaseCommand): + + def handle(self, *args, **options): + start = time.time() + + channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id"))) + + channel_query = (Channel.objects.select_related("main_tree") + .filter(channel_not_already_inserted_query, deleted=False, main_tree__published=True) + .annotate(primary_channel_token=primary_token_subquery, + keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) + .values("id", "keywords_tsvector")) + + insertable_channels = list(channel_query[:CHUNKSIZE]) + total_channel_tsvectors_inserted = 0 + + while insertable_channels: + logging.info("Inserting channel tsvectors.") + + insert_objs = list() + for channel in insertable_channels: + obj = ChannelFullTextSearch(channel_id=channel["id"], keywords_tsvector=channel["keywords_tsvector"]) + insert_objs.append(obj) + + inserted_objs_list = ChannelFullTextSearch.objects.bulk_create(insert_objs) + + current_inserts_count = len(inserted_objs_list) + total_channel_tsvectors_inserted = total_channel_tsvectors_inserted + current_inserts_count + + logging.info("Inserted {} channel tsvectors.".format(current_inserts_count)) + + insertable_channels = list(channel_query[:CHUNKSIZE]) + + logging.info("Completed! successfully inserted total of {} channel tsvectors in {} seconds.".format( + total_channel_tsvectors_inserted, time.time() - start)) diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py new file mode 100644 index 0000000000..58cf9350e8 --- /dev/null +++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py @@ -0,0 +1,60 @@ +""" +This command inserts in bulk contentnode tsvectors to the ContentNodeFullTextSearch table. +""" +import logging as logmodule +import time + +from django.contrib.postgres.aggregates import StringAgg +from django.core.management.base import BaseCommand +from django.db.models import Exists +from django.db.models import OuterRef +from search.models import CONTENTNODE_AUTHOR_TSVECTOR +from search.models import CONTENTNODE_KEYWORDS_TSVECTOR +from search.models import ContentNodeFullTextSearch + +from contentcuration.models import ContentNode + + +logmodule.basicConfig(level=logmodule.INFO) +logging = logmodule.getLogger("command") + +CHUNKSIZE = 10000 + + +class Command(BaseCommand): + + def handle(self, *args, **options): + start = time.time() + + tsvector_not_already_inserted_query = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"))) + + tsvector_node_query = (ContentNode._annotate_channel_id(ContentNode.objects) + .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), + keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, + author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) + .filter(tsvector_not_already_inserted_query, published=True) + .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by()) + + insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE]) + total_tsvectors_inserted = 0 + + while insertable_nodes_tsvector: + logging.info("Inserting contentnode tsvectors.") + + insert_objs = list() + for node in insertable_nodes_tsvector: + if node["channel_id"]: + obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], + keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) + insert_objs.append(obj) + + inserted_objs_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs) + + current_inserts_count = len(inserted_objs_list) + total_tsvectors_inserted = total_tsvectors_inserted + current_inserts_count + + logging.info("Inserted {} contentnode tsvectors.".format(current_inserts_count)) + + insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE]) + + logging.info("Completed! Successfully inserted total of {} contentnode tsvectors in {} seconds.".format(total_tsvectors_inserted, time.time() - start)) diff --git a/contentcuration/search/migrations/0003_fulltextsearch.py b/contentcuration/search/migrations/0003_fulltextsearch.py new file mode 100644 index 0000000000..2885a4655f --- /dev/null +++ b/contentcuration/search/migrations/0003_fulltextsearch.py @@ -0,0 +1,54 @@ +# Generated by Django 3.2.14 on 2022-09-08 07:19 +import uuid + +import django.contrib.postgres.indexes +import django.contrib.postgres.search +import django.db.models.deletion +from django.contrib.postgres.operations import AddIndexConcurrently +from django.db import migrations +from django.db import models + +import contentcuration.models + + +class Migration(migrations.Migration): + + atomic = False + + dependencies = [ + ('contentcuration', '0140_delete_task'), + ('search', '0002_auto_20201215_2110'), + ] + + operations = [ + migrations.CreateModel( + name='ContentNodeFullTextSearch', + fields=[ + ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)), + ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), + ('author_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_nodes_fts', to='contentcuration.channel')), + ('contentnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')), + ], + ), + migrations.CreateModel( + name='ChannelFullTextSearch', + fields=[ + ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)), + ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), + ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')), + ], + ), + AddIndexConcurrently( + model_name='contentnodefulltextsearch', + index=django.contrib.postgres.indexes.GinIndex(fields=['keywords_tsvector'], name='node_keywords_tsv__gin_idx'), + ), + AddIndexConcurrently( + model_name='contentnodefulltextsearch', + index=django.contrib.postgres.indexes.GinIndex(fields=['author_tsvector'], name='node_author_tsv__gin_idx'), + ), + AddIndexConcurrently( + model_name='channelfulltextsearch', + index=django.contrib.postgres.indexes.GinIndex(fields=['keywords_tsvector'], name='channel_keywords_tsv__gin_idx'), + ), + ] diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py index e1e550576b..64b0472c8a 100644 --- a/contentcuration/search/models.py +++ b/contentcuration/search/models.py @@ -1,8 +1,15 @@ import uuid from django.conf import settings +from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.search import SearchVector +from django.contrib.postgres.search import SearchVectorField from django.db import models +from contentcuration.models import Channel +from contentcuration.models import ContentNode +from contentcuration.models import UUIDField as StudioUUIDField + class SavedSearch(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) @@ -13,3 +20,49 @@ class SavedSearch(models.Model): saved_by = models.ForeignKey( settings.AUTH_USER_MODEL, related_name="searches", on_delete=models.CASCADE ) + + +POSTGRES_FTS_CONFIG = "simple" + +CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags") +CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider") +CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token") +CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + + +class ContentNodeFullTextSearch(models.Model): + id = StudioUUIDField(primary_key=True, default=uuid.uuid4) + + # The contentnode that this record points to. + contentnode = models.ForeignKey(ContentNode, on_delete=models.CASCADE, related_name="node_fts") + + # The channel to which the contentnode belongs. Channel cannot be NULL because we only allow + # searches to be made inside channels. + channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_nodes_fts") + + # This stores the keywords as tsvector. + keywords_tsvector = SearchVectorField(null=True, blank=True) + + # This stores the author as tsvector. + author_tsvector = SearchVectorField(null=True, blank=True) + + class Meta: + indexes = [GinIndex(fields=["keywords_tsvector"], name="node_keywords_tsv__gin_idx"), + GinIndex(fields=["author_tsvector"], name="node_author_tsv__gin_idx")] + + +class ChannelFullTextSearch(models.Model): + id = StudioUUIDField(primary_key=True, default=uuid.uuid4) + + # The channel to which this record points. + channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_fts") + + # This stores the channel keywords as tsvector for super fast searches. + keywords_tsvector = SearchVectorField(null=True, blank=True) + + class Meta: + indexes = [GinIndex(fields=["keywords_tsvector"], name="channel_keywords_tsv__gin_idx")] From 974b69e8edae9d039edce8b603ea8f5001ce70e0 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 8 Sep 2022 13:18:32 +0530 Subject: [PATCH 014/313] Resolve conflicts and remove old index refs --- contentcuration/contentcuration/models.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index a2a4af786d..33ef23355a 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1092,9 +1092,6 @@ def delete(self, *args, **kwargs): self.secret_token.delete() -CONTENT_TAG_NAME__INDEX_NAME = "contenttag_tag_name_gist_idx" - - class ContentTag(models.Model): id = UUIDField(primary_key=True, default=uuid.uuid4) tag_name = models.CharField(max_length=50) @@ -1149,7 +1146,6 @@ def __str__(self): NODE_ID_INDEX_NAME = "node_id_idx" NODE_MODIFIED_INDEX_NAME = "node_modified_idx" NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx" -NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx" CONTENTNODE_TREE_ID_CACHE_KEY = "contentnode_{pk}__tree_id" From 717effa958c81047ab853819ae1565376175fa94 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Thu, 8 Sep 2022 13:05:59 -0700 Subject: [PATCH 015/313] Implement webpack performance suggestions from docs. Only use full hashes for production builds. Turn off pathinfo for development. --- webpack.config.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/webpack.config.js b/webpack.config.js index 6179e50b13..be0d1cc069 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -51,10 +51,11 @@ module.exports = (env = {}) => { htmlScreenshot: ['./shared/utils/htmlScreenshot.js'], }, output: { - filename: '[name]-[fullhash].js', - chunkFilename: '[name]-[id]-[fullhash].js', + filename: dev ? '[name].js' : '[name]-[fullhash].js', + chunkFilename: dev ? '[name]-[id].js' : '[name]-[id]-[fullhash].js', path: bundleOutputDir, publicPath: dev ? 'http://127.0.0.1:4000/dist/' : '/static/studio/', + pathinfo: !dev, }, devServer: { port: 4000, @@ -97,8 +98,8 @@ module.exports = (env = {}) => { filename: path.resolve(djangoProjectDir, 'build', 'webpack-stats.json'), }), new MiniCssExtractPlugin({ - filename: '[name]-[fullhash].css', - chunkFilename: '[name]-[fullhash]-[id].css', + filename: dev ? '[name].css' :'[name]-[fullhash].css', + chunkFilename: dev ? '[name]-[id].css' :'[name]-[fullhash]-[id].css', }), new WebpackRTLPlugin({ minify: false, From e15b015f17f11b703f98fcf667b2464e1aaf0576 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sat, 10 Sep 2022 15:25:19 +0530 Subject: [PATCH 016/313] feat: full text search! --- .../contentcuration/utils/publish.py | 75 +++++++++++ .../contentcuration/viewsets/channel.py | 26 ++-- contentcuration/search/constants.py | 23 ++++ .../commands/set_channel_tsvectors.py | 2 +- .../commands/set_contentnode_tsvectors.py | 4 +- contentcuration/search/models.py | 13 -- contentcuration/search/serializers.py | 18 --- contentcuration/search/utils.py | 9 ++ .../search/viewsets/contentnode.py | 123 ++++++++++-------- 9 files changed, 185 insertions(+), 108 deletions(-) create mode 100644 contentcuration/search/constants.py delete mode 100644 contentcuration/search/serializers.py create mode 100644 contentcuration/search/utils.py diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 66e8b37e49..afe4abdef4 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -14,13 +14,17 @@ from itertools import chain from django.conf import settings +from django.contrib.postgres.aggregates import StringAgg from django.core.files import File from django.core.files.storage import default_storage as storage from django.core.management import call_command from django.db.models import Count +from django.db.models import Exists from django.db.models import Max +from django.db.models import OuterRef from django.db.models import Q from django.db.models import Sum +from django.db.models import Value from django.db.utils import IntegrityError from django.template.loader import render_to_string from django.utils import timezone @@ -37,6 +41,13 @@ from le_utils.constants import roles from past.builtins import basestring from past.utils import old_div +from search.constants import CHANNEL_KEYWORDS_TSVECTOR +from search.constants import CONTENTNODE_AUTHOR_TSVECTOR +from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR +from search.constants import CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR +from search.constants import CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR +from search.models import ChannelFullTextSearch +from search.models import ContentNodeFullTextSearch from contentcuration import models as ccmodels from contentcuration.decorators import delay_user_storage_calculation @@ -808,6 +819,64 @@ def fill_published_fields(channel, version_notes): channel.save() +def create_or_update_tsvectors(channel_id): + """ + Create or update tsvectors for the channel and all its content nodes. + """ + # Update or create channel tsvector entry. + logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id)) + + from contentcuration.viewsets.channel import primary_token_subquery + + channel = (ccmodels.Channel.objects + .annotate(primary_channel_token=primary_token_subquery, + keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) + .get(pk=channel_id)) + + if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists(): + update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel.keywords_tsvector) + logging.info("Updated {} channel tsvector.".format(update_count)) + else: + obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel.keywords_tsvector) + obj.save() + logging.info("Created 1 channel tsvector.") + + # Update or create contentnodes tsvector entry for channel_id. + logging.info("Setting tsvectors for all contentnodes in channel {}.".format(channel_id)) + + if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists(): + + # First, delete nodes that are no longer in main_tree. + nodes_no_longer_in_main_tree = ~Exists(channel.main_tree.get_family().filter(id=OuterRef("contentnode_id"))) + ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete() + + # Now, all remaining nodes are in main_tree, so let's update them. + update_count = (ContentNodeFullTextSearch.objects.filter(channel_id=channel_id) + .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" ")) + .update(keywords_tsvector=CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR)) + + # Insert newly created nodes. + nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"))) + nodes_to_insert = (channel.main_tree.get_family() + .filter(nodes_not_having_tsvector_record) + .annotate(channel_id=Value(channel_id), + contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), + keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, + author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) + .values("id", "channel_id", "keywords_tsvector", "author_tsvector")) + + insert_objs = list() + + for node in nodes_to_insert: + obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], + keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) + insert_objs.append(obj) + + inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs) + + logging.info("Successfully inserted {} and updated {} contentnode tsvectors.".format(len(inserted_nodes_list), update_count)) + + @delay_user_storage_calculation def publish_channel( user_id, @@ -818,6 +887,8 @@ def publish_channel( send_email=False, progress_tracker=None, language=settings.LANGUAGE_CODE, + + ): """ :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None @@ -843,6 +914,10 @@ def publish_channel( if channel.public: delete_public_channel_cache_keys() + # Enqueue tsvector task to update or create channel tsvectors and all its + # contentnodes tsvector entries. + create_or_update_tsvectors(channel_id=channel_id) + if send_email: with override(language): send_emails(channel, user_id, version_notes=version_notes) diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py index 517e66f0fe..ca5c487e0a 100644 --- a/contentcuration/contentcuration/viewsets/channel.py +++ b/contentcuration/contentcuration/viewsets/channel.py @@ -24,6 +24,9 @@ from rest_framework.serializers import CharField from rest_framework.serializers import FloatField from rest_framework.serializers import IntegerField +from search.models import ChannelFullTextSearch +from search.models import ContentNodeFullTextSearch +from search.utils import get_fts_search_query from contentcuration.decorators import cache_no_user_data from contentcuration.models import Change @@ -119,23 +122,12 @@ def filter_deleted(self, queryset, name, value): return queryset.filter(deleted=value) def filter_keywords(self, queryset, name, value): - # TODO: Wait until we show more metadata on cards to add this back in - # keywords_query = self.main_tree_query.filter( - # Q(tags__tag_name__icontains=value) - # | Q(author__icontains=value) - # | Q(aggregator__icontains=value) - # | Q(provider__icontains=value) - # ) - return queryset.annotate( - # keyword_match_count=SQCount(keywords_query, field="content_id"), - primary_token=primary_token_subquery, - ).filter( - Q(name__icontains=value) - | Q(description__icontains=value) - | Q(pk__istartswith=value) - | Q(primary_token=value.replace("-", "")) - # | Q(keyword_match_count__gt=0) - ) + channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter( + keywords_tsvector=get_fts_search_query(value.replace("-", "")), channel_id=OuterRef("id")))) + contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter( + Q(keywords_tsvector=get_fts_search_query(value)) | Q(author_tsvector=get_fts_search_query(value)), channel_id=OuterRef("id")))) + + return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query)) def filter_languages(self, queryset, name, value): languages = value.split(",") diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py new file mode 100644 index 0000000000..9b4a29d246 --- /dev/null +++ b/contentcuration/search/constants.py @@ -0,0 +1,23 @@ +from django.contrib.postgres.search import SearchVector + +# Postgres full text search configuration. We use "simple" to make search +# language agnostic. +POSTGRES_FTS_CONFIG = "simple" + +# ContentNode vectors and search fields. +CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags") +CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider") +CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS = ("contentnode__id", "channel_id", "contentnode__node_id", "contentnode__content_id", + "contentnode__tree_id", "contentnode__title", "contentnode__description", "contentnode_tags") +CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS = ("contentnode__author", "contentnode__aggregator", "contentnode__provider") +CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) + +# Channel vector and search fields. +CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token") +CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py index 4d27927f53..305e9a8adb 100644 --- a/contentcuration/search/management/commands/set_channel_tsvectors.py +++ b/contentcuration/search/management/commands/set_channel_tsvectors.py @@ -7,7 +7,7 @@ from django.core.management.base import BaseCommand from django.db.models import Exists from django.db.models import OuterRef -from search.models import CHANNEL_KEYWORDS_TSVECTOR +from search.constants import CHANNEL_KEYWORDS_TSVECTOR from search.models import ChannelFullTextSearch from contentcuration.models import Channel diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py index 58cf9350e8..fbc862fae6 100644 --- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py +++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py @@ -8,8 +8,8 @@ from django.core.management.base import BaseCommand from django.db.models import Exists from django.db.models import OuterRef -from search.models import CONTENTNODE_AUTHOR_TSVECTOR -from search.models import CONTENTNODE_KEYWORDS_TSVECTOR +from search.constants import CONTENTNODE_AUTHOR_TSVECTOR +from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR from search.models import ContentNodeFullTextSearch from contentcuration.models import ContentNode diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py index 64b0472c8a..31793d4de7 100644 --- a/contentcuration/search/models.py +++ b/contentcuration/search/models.py @@ -2,7 +2,6 @@ from django.conf import settings from django.contrib.postgres.indexes import GinIndex -from django.contrib.postgres.search import SearchVector from django.contrib.postgres.search import SearchVectorField from django.db import models @@ -22,18 +21,6 @@ class SavedSearch(models.Model): ) -POSTGRES_FTS_CONFIG = "simple" - -CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags") -CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) - -CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider") -CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) - -CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token") -CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) - - class ContentNodeFullTextSearch(models.Model): id = StudioUUIDField(primary_key=True, default=uuid.uuid4) diff --git a/contentcuration/search/serializers.py b/contentcuration/search/serializers.py deleted file mode 100644 index 4137c0e47f..0000000000 --- a/contentcuration/search/serializers.py +++ /dev/null @@ -1,18 +0,0 @@ -from contentcuration import models as cc_models -from rest_framework import serializers - - -class ContentSearchResultSerializer(serializers.ModelSerializer): - - class Meta: - model = cc_models.ContentNode - fields = ( - 'id', - 'original_channel_id', - 'source_channel_id', - 'title', - 'kind', - 'tags', - 'children', - 'tree_id' - ) diff --git a/contentcuration/search/utils.py b/contentcuration/search/utils.py new file mode 100644 index 0000000000..9cb205bb47 --- /dev/null +++ b/contentcuration/search/utils.py @@ -0,0 +1,9 @@ +from django.contrib.postgres.search import SearchQuery +from search.constants import POSTGRES_FTS_CONFIG + + +def get_fts_search_query(value): + """ + Returns a `SearchQuery` with our postgres full text search config set on it. + """ + return SearchQuery(value=value, config=POSTGRES_FTS_CONFIG) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index df5cf831cb..72c50aea3c 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -7,18 +7,20 @@ from django.db.models import Q from django.db.models import Subquery from django.db.models import Value +from django.db.models.functions import Coalesce from django_filters.rest_framework import BooleanFilter from django_filters.rest_framework import CharFilter from le_utils.constants import content_kinds from le_utils.constants import roles from rest_framework.permissions import IsAuthenticated +from search.models import ContentNodeFullTextSearch +from search.utils import get_fts_search_query from contentcuration.models import Channel -from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.utils.pagination import CachedListPagination +from contentcuration.viewsets.base import ReadOnlyValuesViewset from contentcuration.viewsets.base import RequiredFilterSet -from contentcuration.viewsets.base import ValuesViewset from contentcuration.viewsets.common import NotNullMapArrayAgg from contentcuration.viewsets.common import UUIDFilter from contentcuration.viewsets.common import UUIDInFilter @@ -48,110 +50,117 @@ def filter_channel_list(self, queryset, name, value): user = not self.request.user.is_anonymous and self.request.user channel_ids = [] if value == "public": - channel_ids = Channel.objects.filter(public=True, deleted=False).values_list("id", flat=True) + channel_ids = Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True) elif value == "edit" and user: - channel_ids = user.editable_channels.values_list("id", flat=True) + channel_ids = user.editable_channels.filter(main_tree__published=True).values_list("id", flat=True) elif value == "bookmark" and user: - channel_ids = user.bookmarked_channels.values_list("id", flat=True) + channel_ids = user.bookmarked_channels.filter(main_tree__published=True).values_list("id", flat=True) elif value == "view" and user: - channel_ids = user.view_only_channels.values_list("id", flat=True) + channel_ids = user.view_only_channels.filter(main_tree__published=True).values_list("id", flat=True) return queryset.filter(channel_id__in=list(channel_ids)) def filter_keywords(self, queryset, name, value): - return ContentNode.search(queryset=queryset, search_term=value) + return queryset.filter(Q(keywords_tsvector=get_fts_search_query(value)) + | Q(author_tsvector=get_fts_search_query(value))) def filter_author(self, queryset, name, value): - return queryset.filter( - Q(author__icontains=value) - | Q(aggregator__icontains=value) - | Q(provider__icontains=value) - ) + return queryset.filter(author_tsvector=get_fts_search_query(value)) def filter_languages(self, queryset, name, value): - return queryset.filter(language__lang_code__in=value.split(",")) + return queryset.filter(contentnode__language__lang_code__in=value.split(",")) def filter_licenses(self, queryset, name, value): licenses = [int(li) for li in value.split(",")] - return queryset.filter(license__in=licenses) + return queryset.filter(contentnode__license__in=licenses) def filter_kinds(self, queryset, name, value): - return queryset.filter(kind_id__in=value.split(",")) + return queryset.filter(contentnode__kind_id__in=value.split(",")) def filter_coach(self, queryset, name, value): - return queryset.filter(role_visibility=roles.COACH) + return queryset.filter(contentnode__role_visibility=roles.COACH) def filter_resources(self, queryset, name, value): - return queryset.exclude(kind_id=content_kinds.TOPIC) + return queryset.exclude(contentnode__kind_id=content_kinds.TOPIC) def filter_assessments(self, queryset, name, value): - return queryset.filter(kind_id=content_kinds.EXERCISE) + return queryset.filter(contentnode__kind_id=content_kinds.EXERCISE) def filter_created_after(self, queryset, name, value): date = re.search(r"(\d{4})-0?(\d+)-(\d+)", value) return queryset.filter( - created__year__gte=date.group(1), - created__month__gte=date.group(2), - created__day__gte=date.group(3), - ) - - class Meta: - model = ContentNode - fields = ( - "keywords", - "languages", - "licenses", - "kinds", - "coach", - "author", - "resources", - "assessments", + contentnode__created__year__gte=date.group(1), + contentnode__created__month__gte=date.group(2), + contentnode__created__day__gte=date.group(3), ) -class SearchContentNodeViewSet(ValuesViewset): +class SearchContentNodeViewSet(ReadOnlyValuesViewset): filterset_class = ContentNodeFilter pagination_class = ListPagination permission_classes = [IsAuthenticated] + field_map = { + "id": "contentnode__id", + "content_id": "contentnode__content_id", + "node_id": "contentnode__node_id", + "title": "contentnode__title", + "description": "contentnode__description", + "author": "contentnode__author", + "provider": "contentnode__provider", + "kind__kind": "contentnode__kind__kind", + "thumbnail_encoding": "contentnode__thumbnail_encoding", + "published": "contentnode__published", + "modified": "contentnode__modified", + "parent_id": "contentnode__parent_id", + "changed": "contentnode__changed", + } + values = ( - "id", - "content_id", - "node_id", - "title", - "description", - "author", - "provider", - "kind__kind", + "contentnode__id", + "contentnode__content_id", + "contentnode__node_id", + "contentnode__title", + "contentnode__description", + "contentnode__author", + "contentnode__provider", + "contentnode__kind__kind", + "contentnode__thumbnail_encoding", + "contentnode__published", + "contentnode__modified", + "contentnode__parent_id", + "contentnode__changed", "channel_id", "resource_count", "thumbnail_checksum", "thumbnail_extension", - "thumbnail_encoding", - "published", - "modified", - "parent_id", - "changed", "content_tags", "original_channel_name", ) def get_queryset(self): - return ContentNode._annotate_channel_id(ContentNode.objects) + return ContentNodeFullTextSearch.objects.select_related("contentnode") def annotate_queryset(self, queryset): """ Annotates thumbnails, resources count and channel name. """ thumbnails = File.objects.filter( - contentnode=OuterRef("id"), preset__thumbnail=True + contentnode=OuterRef("contentnode__id"), preset__thumbnail=True ) - descendant_resources_count = ExpressionWrapper(((F("rght") - F("lft") - Value(1)) / Value(2)), output_field=IntegerField()) + descendant_resources_count = ExpressionWrapper(((F("contentnode__rght") - F("contentnode__lft") - Value(1)) / Value(2)), output_field=IntegerField()) - channel_name = Subquery( - Channel.objects.filter(pk=OuterRef("channel_id")).values( - "name" - )[:1] + original_channel_name = Coalesce( + Subquery( + Channel.objects.filter(pk=OuterRef("contentnode__original_channel_id")).values( + "name" + )[:1] + ), + Subquery( + Channel.objects.filter(main_tree__tree_id=OuterRef("contentnode__tree_id")).values( + "name" + )[:1] + ), ) queryset = queryset.annotate( @@ -160,8 +169,8 @@ def annotate_queryset(self, queryset): thumbnail_extension=Subquery( thumbnails.values("file_format__extension")[:1] ), - content_tags=NotNullMapArrayAgg("tags__tag_name"), - original_channel_name=channel_name, + content_tags=NotNullMapArrayAgg("contentnode__tags__tag_name"), + original_channel_name=original_channel_name, ) return queryset From aa62dc2017a334d7c32710e99834f57cf1754fff Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 14 Sep 2022 18:07:29 +0530 Subject: [PATCH 017/313] Sync tsvectors on publish! --- .../views/ImportFromChannels/ChannelList.vue | 1 + .../contentcuration/utils/publish.py | 65 ++++++++++--------- .../contentcuration/viewsets/channel.py | 7 +- contentcuration/search/constants.py | 7 -- .../search/viewsets/contentnode.py | 21 +++--- 5 files changed, 52 insertions(+), 49 deletions(-) diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue index 76dc9e1f8c..43de8e8ce4 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue @@ -112,6 +112,7 @@ [this.channelFilter]: true, page: this.$route.query.page || 1, exclude: this.currentChannelId, + published: true, }).then(page => { this.pageCount = page.total_pages; this.channels = page.results; diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index afe4abdef4..2b49e13b24 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -44,8 +44,6 @@ from search.constants import CHANNEL_KEYWORDS_TSVECTOR from search.constants import CONTENTNODE_AUTHOR_TSVECTOR from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR -from search.constants import CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR -from search.constants import CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR from search.models import ChannelFullTextSearch from search.models import ContentNodeFullTextSearch @@ -819,9 +817,10 @@ def fill_published_fields(channel, version_notes): channel.save() -def create_or_update_tsvectors(channel_id): +def sync_contentnode_and_channel_tsvectors(channel_id): """ - Create or update tsvectors for the channel and all its content nodes. + Creates, deletes and updates tsvectors of the channel and all its content nodes + to reflect the current state of channel's main tree. """ # Update or create channel tsvector entry. logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id)) @@ -829,52 +828,63 @@ def create_or_update_tsvectors(channel_id): from contentcuration.viewsets.channel import primary_token_subquery channel = (ccmodels.Channel.objects + .filter(pk=channel_id) .annotate(primary_channel_token=primary_token_subquery, keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) - .get(pk=channel_id)) + .values("keywords_tsvector", "main_tree__tree_id") + .get()) if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists(): - update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel.keywords_tsvector) + update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"]) logging.info("Updated {} channel tsvector.".format(update_count)) else: - obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel.keywords_tsvector) + obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"]) obj.save() logging.info("Created 1 channel tsvector.") # Update or create contentnodes tsvector entry for channel_id. - logging.info("Setting tsvectors for all contentnodes in channel {}.".format(channel_id)) + logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id)) - if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists(): + nodes_tsvector_query = (ccmodels.ContentNode.objects + .filter(tree_id=channel["main_tree__tree_id"]) + .annotate(channel_id=Value(channel_id), + contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), + keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, + author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) + .order_by()) + if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists(): # First, delete nodes that are no longer in main_tree. - nodes_no_longer_in_main_tree = ~Exists(channel.main_tree.get_family().filter(id=OuterRef("contentnode_id"))) + nodes_no_longer_in_main_tree = ~Exists(ccmodels.ContentNode.objects.filter(id=OuterRef("contentnode_id"), tree_id=channel["main_tree__tree_id"])) ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete() # Now, all remaining nodes are in main_tree, so let's update them. - update_count = (ContentNodeFullTextSearch.objects.filter(channel_id=channel_id) - .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" ")) - .update(keywords_tsvector=CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR)) + # Update only changed nodes. + nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True) + + update_objs = list() + for node in nodes_to_update: + corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first() + if corresponding_contentnode: + node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"] + node.author_tsvector = corresponding_contentnode["author_tsvector"] + update_objs.append(node) + ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"]) + del update_objs # Insert newly created nodes. - nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"))) - nodes_to_insert = (channel.main_tree.get_family() + nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id)) + nodes_to_insert = (nodes_tsvector_query .filter(nodes_not_having_tsvector_record) - .annotate(channel_id=Value(channel_id), - contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), - keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, - author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) .values("id", "channel_id", "keywords_tsvector", "author_tsvector")) insert_objs = list() - for node in nodes_to_insert: obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) insert_objs.append(obj) - inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs) - - logging.info("Successfully inserted {} and updated {} contentnode tsvectors.".format(len(inserted_nodes_list), update_count)) + logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list))) @delay_user_storage_calculation @@ -887,8 +897,6 @@ def publish_channel( send_email=False, progress_tracker=None, language=settings.LANGUAGE_CODE, - - ): """ :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None @@ -900,8 +908,9 @@ def publish_channel( set_channel_icon_encoding(channel) kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker) increment_channel_version(channel) - mark_all_nodes_as_published(channel) add_tokens_to_channel(channel) + sync_contentnode_and_channel_tsvectors(channel_id=channel.id) + mark_all_nodes_as_published(channel) fill_published_fields(channel, version_notes) # Attributes not getting set for some reason, so just save it here @@ -914,10 +923,6 @@ def publish_channel( if channel.public: delete_public_channel_cache_keys() - # Enqueue tsvector task to update or create channel tsvectors and all its - # contentnodes tsvector entries. - create_or_update_tsvectors(channel_id=channel_id) - if send_email: with override(language): send_emails(channel, user_id, version_notes=version_notes) diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py index ca5c487e0a..90b9a8f84d 100644 --- a/contentcuration/contentcuration/viewsets/channel.py +++ b/contentcuration/contentcuration/viewsets/channel.py @@ -122,10 +122,13 @@ def filter_deleted(self, queryset, name, value): return queryset.filter(deleted=value) def filter_keywords(self, queryset, name, value): + search_query = get_fts_search_query(value) + dash_replaced_search_query = get_fts_search_query(value.replace("-", "")) + channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter( - keywords_tsvector=get_fts_search_query(value.replace("-", "")), channel_id=OuterRef("id")))) + Q(keywords_tsvector=search_query) | Q(keywords_tsvector=dash_replaced_search_query), channel_id=OuterRef("id")))) contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter( - Q(keywords_tsvector=get_fts_search_query(value)) | Q(author_tsvector=get_fts_search_query(value)), channel_id=OuterRef("id")))) + Q(keywords_tsvector=search_query) | Q(author_tsvector=search_query), channel_id=OuterRef("id")))) return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query)) diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py index 9b4a29d246..1ac316c3ae 100644 --- a/contentcuration/search/constants.py +++ b/contentcuration/search/constants.py @@ -11,13 +11,6 @@ CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider") CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) -CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS = ("contentnode__id", "channel_id", "contentnode__node_id", "contentnode__content_id", - "contentnode__tree_id", "contentnode__title", "contentnode__description", "contentnode_tags") -CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) - -CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS = ("contentnode__author", "contentnode__aggregator", "contentnode__provider") -CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) - # Channel vector and search fields. CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token") CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 72c50aea3c..691dfbe70e 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -49,19 +49,22 @@ class ContentNodeFilter(RequiredFilterSet): def filter_channel_list(self, queryset, name, value): user = not self.request.user.is_anonymous and self.request.user channel_ids = [] + if value == "public": - channel_ids = Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True) + channel_ids = Channel.get_public_channels().values_list("id", flat=True) elif value == "edit" and user: - channel_ids = user.editable_channels.filter(main_tree__published=True).values_list("id", flat=True) + channel_ids = user.editable_channels.values_list("id", flat=True) elif value == "bookmark" and user: - channel_ids = user.bookmarked_channels.filter(main_tree__published=True).values_list("id", flat=True) + channel_ids = user.bookmarked_channels.values_list("id", flat=True) elif value == "view" and user: - channel_ids = user.view_only_channels.filter(main_tree__published=True).values_list("id", flat=True) + channel_ids = user.view_only_channels.values_list("id", flat=True) + return queryset.filter(channel_id__in=list(channel_ids)) def filter_keywords(self, queryset, name, value): - return queryset.filter(Q(keywords_tsvector=get_fts_search_query(value)) - | Q(author_tsvector=get_fts_search_query(value))) + search_query = get_fts_search_query(value) + return queryset.filter(Q(keywords_tsvector=search_query) + | Q(author_tsvector=search_query)) def filter_author(self, queryset, name, value): return queryset.filter(author_tsvector=get_fts_search_query(value)) @@ -98,6 +101,7 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset): filterset_class = ContentNodeFilter pagination_class = ListPagination permission_classes = [IsAuthenticated] + queryset = ContentNodeFullTextSearch.objects.all() field_map = { "id": "contentnode__id", @@ -137,12 +141,9 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset): "original_channel_name", ) - def get_queryset(self): - return ContentNodeFullTextSearch.objects.select_related("contentnode") - def annotate_queryset(self, queryset): """ - Annotates thumbnails, resources count and channel name. + Annotates thumbnails, resources count and original channel name. """ thumbnails = File.objects.filter( contentnode=OuterRef("contentnode__id"), preset__thumbnail=True From 2672512de766ba0970a4be9bdc622b656521961a Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 14 Sep 2022 20:19:40 +0530 Subject: [PATCH 018/313] fix: tests and ready for merge! <3 --- .../contentcuration/tests/testdata.py | 18 +++--- contentcuration/search/tests/test_search.py | 62 +++++++++++-------- 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index 5560e853a5..50895337b1 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -195,23 +195,21 @@ def node(data, parent=None): return new_node -def tree(parent=None, tree_data=None): +def tree(parent=None): # Read from json fixture - if tree_data is None: - filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"]) - with open(filepath, "rb") as jsonfile: - tree_data = json.load(jsonfile) + filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"]) + with open(filepath, "rb") as jsonfile: + data = json.load(jsonfile) - return node(tree_data, parent) + return node(data, parent) -def channel(name="testchannel", create_main_tree=True, main_tree_data=None): +def channel(name="testchannel"): channel = cc.Channel.objects.create(name=name) channel.save() - if create_main_tree: - channel.main_tree = tree(tree_data=main_tree_data) - channel.save() + channel.main_tree = tree() + channel.save() return channel diff --git a/contentcuration/search/tests/test_search.py b/contentcuration/search/tests/test_search.py index 6b3d34cc41..8489ece577 100644 --- a/contentcuration/search/tests/test_search.py +++ b/contentcuration/search/tests/test_search.py @@ -3,28 +3,39 @@ from django.urls import reverse from contentcuration.models import Channel +from contentcuration.models import ContentNode from contentcuration.tests import testdata from contentcuration.tests.base import StudioAPITestCase +from contentcuration.utils.publish import sync_contentnode_and_channel_tsvectors + + +def dummy_publish(channel): + channel_nodes = ContentNode.objects.filter(tree_id=channel.main_tree.tree_id) + for node in channel_nodes: + node.published = True + node.changed = False + node.save() + sync_contentnode_and_channel_tsvectors(channel_id=channel.id) class SearchViewsetTestCase(StudioAPITestCase): + def setUp(self): + super().setUp() + self.channel = testdata.channel() + self.user = testdata.user() + self.channel.editors.add(self.user) + dummy_publish(self.channel) def test_filter_exclude_channels(self): - user = testdata.user() - self.client.force_authenticate(user=user) - channel = testdata.channel() - channel.editors.add(user) + self.client.force_authenticate(user=self.user) response = self.client.get( - reverse("search-list"), data={"exclude_channel": channel.id}, format="json", + reverse("search-list"), data={"exclude_channel": self.channel.id}, format="json", ) self.assertEqual(response.status_code, 200, response.content) self.assertEqual(response.data["results"], []) def test_filter_channels_by_edit(self): - user = testdata.user() - self.client.force_authenticate(user=user) - channel = testdata.channel() - channel.editors.add(user) + self.client.force_authenticate(user=self.user) response = self.client.get( reverse("search-list"), data={"channel_list": "edit"}, format="json", ) @@ -35,6 +46,7 @@ def test_search(self): users = [] channels = [] + # Create channels, users. for i in range(4): user = testdata.user(email="a{}@a.com".format(i)) users.append(user) @@ -46,20 +58,11 @@ def test_search(self): public_channel, editable_channel, viewable_channel, inaccessible_channel = channels - # Create public video node and publish it. + # Create public video node. public_video_node = testdata.node({ "title": "Kolibri video", "kind_id": "video", }, parent=public_channel.main_tree) - public_video_node.complete = True - public_video_node.published = True - public_video_node.changed = False - public_video_node.save() - - # Publish the public_channel. - public_channel.main_tree.published = True - public_channel.main_tree.changed = False - public_channel.main_tree.save() public_channel.public = True public_channel.save() @@ -72,6 +75,10 @@ def test_search(self): public_video_node.copy_to(target=viewable_channel.main_tree) public_video_node.copy_to(target=inaccessible_channel.main_tree) + # Publish all channels to make them searchable. + for channel in channels: + dummy_publish(channel) + # Get different nodes based on access. editable_channel.main_tree.refresh_from_db() editable_video_node = editable_channel.main_tree.get_descendants().first() @@ -93,12 +100,13 @@ def test_search(self): format="json", ) - for result in response.data["results"]: - self.assertNotEqual(result["id"], inaccessible_video_node.id) + result = response.data["results"][0] + + self.assertNotEqual(result["id"], inaccessible_video_node.id) - if channel_list == "public": - self.assertEqual(result["id"], public_video_node.id) - elif channel_list == "edit": - self.assertEqual(result["id"], editable_video_node.id) - elif channel_list == "view": - self.assertEqual(result["id"], viewable_video_node.id) + if channel_list == "public": + self.assertEqual(result["id"], public_video_node.id) + elif channel_list == "edit": + self.assertEqual(result["id"], editable_video_node.id) + elif channel_list == "view": + self.assertEqual(result["id"], viewable_video_node.id) From 1ffa30d7348a2bd2cd79961caf5b362cf0e1e39c Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 15 Sep 2022 01:41:51 +0530 Subject: [PATCH 019/313] fix: node command edge case; when published nodes go to trash tree, they remain as published --- .../management/commands/set_contentnode_tsvectors.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py index fbc862fae6..4e5673d9ec 100644 --- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py +++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py @@ -32,7 +32,7 @@ def handle(self, *args, **options): .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) - .filter(tsvector_not_already_inserted_query, published=True) + .filter(tsvector_not_already_inserted_query, published=True, channel_id__isnull=False) .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by()) insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE]) @@ -43,10 +43,9 @@ def handle(self, *args, **options): insert_objs = list() for node in insertable_nodes_tsvector: - if node["channel_id"]: - obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], - keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) - insert_objs.append(obj) + obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], + keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) + insert_objs.append(obj) inserted_objs_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs) From 57724e0787a08a77d9ee28f6be064aa8c3cf1e0d Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 16 Sep 2022 16:01:51 +0530 Subject: [PATCH 020/313] Enforce only-one search entries --- contentcuration/search/migrations/0003_fulltextsearch.py | 6 +++--- contentcuration/search/models.py | 4 ++-- contentcuration/search/viewsets/contentnode.py | 5 +---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/contentcuration/search/migrations/0003_fulltextsearch.py b/contentcuration/search/migrations/0003_fulltextsearch.py index 2885a4655f..632df6a39e 100644 --- a/contentcuration/search/migrations/0003_fulltextsearch.py +++ b/contentcuration/search/migrations/0003_fulltextsearch.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.14 on 2022-09-08 07:19 +# Generated by Django 3.2.14 on 2022-09-16 08:55 import uuid import django.contrib.postgres.indexes @@ -28,7 +28,7 @@ class Migration(migrations.Migration): ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), ('author_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_nodes_fts', to='contentcuration.channel')), - ('contentnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')), + ('contentnode', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')), ], ), migrations.CreateModel( @@ -36,7 +36,7 @@ class Migration(migrations.Migration): fields=[ ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)), ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)), - ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')), + ('channel', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')), ], ), AddIndexConcurrently( diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py index 31793d4de7..9e121af509 100644 --- a/contentcuration/search/models.py +++ b/contentcuration/search/models.py @@ -25,7 +25,7 @@ class ContentNodeFullTextSearch(models.Model): id = StudioUUIDField(primary_key=True, default=uuid.uuid4) # The contentnode that this record points to. - contentnode = models.ForeignKey(ContentNode, on_delete=models.CASCADE, related_name="node_fts") + contentnode = models.OneToOneField(ContentNode, on_delete=models.CASCADE, related_name="node_fts") # The channel to which the contentnode belongs. Channel cannot be NULL because we only allow # searches to be made inside channels. @@ -46,7 +46,7 @@ class ChannelFullTextSearch(models.Model): id = StudioUUIDField(primary_key=True, default=uuid.uuid4) # The channel to which this record points. - channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_fts") + channel = models.OneToOneField(Channel, on_delete=models.CASCADE, related_name="channel_fts") # This stores the channel keywords as tsvector for super fast searches. keywords_tsvector = SearchVectorField(null=True, blank=True) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 691dfbe70e..aa6c7fe9b7 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -4,7 +4,6 @@ from django.db.models import F from django.db.models import IntegerField from django.db.models import OuterRef -from django.db.models import Q from django.db.models import Subquery from django.db.models import Value from django.db.models.functions import Coalesce @@ -62,9 +61,7 @@ def filter_channel_list(self, queryset, name, value): return queryset.filter(channel_id__in=list(channel_ids)) def filter_keywords(self, queryset, name, value): - search_query = get_fts_search_query(value) - return queryset.filter(Q(keywords_tsvector=search_query) - | Q(author_tsvector=search_query)) + return queryset.filter(keywords_tsvector=get_fts_search_query(value)) def filter_author(self, queryset, name, value): return queryset.filter(author_tsvector=get_fts_search_query(value)) From e53e56a2e300ea31c10f994dfe3cf5ff0ec25186 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 16 Sep 2022 16:41:27 +0530 Subject: [PATCH 021/313] Remove unnecessary select_related --- .../search/management/commands/set_channel_tsvectors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py index 305e9a8adb..d82f4848f5 100644 --- a/contentcuration/search/management/commands/set_channel_tsvectors.py +++ b/contentcuration/search/management/commands/set_channel_tsvectors.py @@ -27,8 +27,8 @@ def handle(self, *args, **options): channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id"))) - channel_query = (Channel.objects.select_related("main_tree") - .filter(channel_not_already_inserted_query, deleted=False, main_tree__published=True) + channel_query = (Channel.objects.filter(channel_not_already_inserted_query, + deleted=False, main_tree__published=True) .annotate(primary_channel_token=primary_token_subquery, keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) .values("id", "keywords_tsvector")) From 4b3d4c7d0a586afce5236de13bfb8ce12064e52e Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 16 Sep 2022 17:00:04 +0530 Subject: [PATCH 022/313] fix cache tests mock by setting ContentNodeFullTextSearch --- contentcuration/contentcuration/tests/utils/test_cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py index 1bbf69d580..22a2a6a62b 100644 --- a/contentcuration/contentcuration/tests/utils/test_cache.py +++ b/contentcuration/contentcuration/tests/utils/test_cache.py @@ -1,5 +1,6 @@ import mock from django.test import SimpleTestCase +from search.models import ContentNodeFullTextSearch from ..helpers import mock_class_instance from contentcuration.models import ContentNode @@ -9,7 +10,9 @@ class ResourceSizeCacheTestCase(SimpleTestCase): def setUp(self): super(ResourceSizeCacheTestCase, self).setUp() - self.node = mock.Mock(spec_set=ContentNode()) + c = ContentNode() + c.node_fts = ContentNodeFullTextSearch() + self.node = mock.Mock(spec_set=c) self.node.pk = "abcdefghijklmnopqrstuvwxyz" self.redis_client = mock_class_instance("redis.client.StrictRedis") self.cache_client = mock_class_instance("django_redis.client.DefaultClient") From 44ab74c05b06881dabacb9c7bdd9d6bdbc8f4a65 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 16 Sep 2022 21:33:54 +0530 Subject: [PATCH 023/313] fix cache & nodes tests by using db-friendly TestCase --- .../contentcuration/tests/utils/test_cache.py | 9 +++------ .../contentcuration/tests/utils/test_nodes.py | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py index 22a2a6a62b..5327da19ac 100644 --- a/contentcuration/contentcuration/tests/utils/test_cache.py +++ b/contentcuration/contentcuration/tests/utils/test_cache.py @@ -1,18 +1,15 @@ import mock -from django.test import SimpleTestCase -from search.models import ContentNodeFullTextSearch +from django.test import TestCase from ..helpers import mock_class_instance from contentcuration.models import ContentNode from contentcuration.utils.cache import ResourceSizeCache -class ResourceSizeCacheTestCase(SimpleTestCase): +class ResourceSizeCacheTestCase(TestCase): def setUp(self): super(ResourceSizeCacheTestCase, self).setUp() - c = ContentNode() - c.node_fts = ContentNodeFullTextSearch() - self.node = mock.Mock(spec_set=c) + self.node = mock.Mock(spec_set=ContentNode()) self.node.pk = "abcdefghijklmnopqrstuvwxyz" self.redis_client = mock_class_instance("redis.client.StrictRedis") self.cache_client = mock_class_instance("django_redis.client.DefaultClient") diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py index 3b96c30a3c..83171288d6 100644 --- a/contentcuration/contentcuration/tests/utils/test_nodes.py +++ b/contentcuration/contentcuration/tests/utils/test_nodes.py @@ -6,7 +6,7 @@ from dateutil.parser import isoparse from django.db.models import F from django.db.models import Max -from django.test import SimpleTestCase +from django.test import TestCase from ..base import StudioTestCase from contentcuration.models import ContentNode @@ -42,7 +42,7 @@ def test_modified_since(self): @mock.patch("contentcuration.utils.nodes.ResourceSizeHelper") @mock.patch("contentcuration.utils.nodes.ResourceSizeCache") -class CalculateResourceSizeTestCase(SimpleTestCase): +class CalculateResourceSizeTestCase(TestCase): def setUp(self): super(CalculateResourceSizeTestCase, self).setUp() self.node = mock.Mock(spec_set=ContentNode()) From d3002e8e71c22bbe559c1d0f71435e1fc8290922 Mon Sep 17 00:00:00 2001 From: Prathamesh Desai Date: Wed, 21 Sep 2022 18:03:31 +0530 Subject: [PATCH 024/313] Added validation for file_formats --- contentcuration/contentcuration/models.py | 7 ++++++- contentcuration/contentcuration/tests/test_models.py | 10 ++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index cc68b58855..0e26e36c86 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -8,7 +8,6 @@ from datetime import datetime import pytz -from celery import states from django.conf import settings from django.contrib.auth.base_user import AbstractBaseUser from django.contrib.auth.base_user import BaseUserManager @@ -2197,6 +2196,12 @@ def save(self, set_by_file_on_disk=True, *args, **kwargs): 2. fill the other fields accordingly """ from contentcuration.utils.user import calculate_user_storage + + # check if the file format exists in file_formats.choices + if self.file_format_id: + if self.file_format_id not in dict(file_formats.choices): + raise ValidationError("Invalid file_format") + if set_by_file_on_disk and self.file_on_disk: # if file_on_disk is supplied, hash out the file if self.checksum is None or self.checksum == "": md5 = hashlib.md5() diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 97d3359ce7..3c0caa9967 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -4,6 +4,7 @@ import pytest from django.conf import settings from django.core.cache import cache +from django.core.exceptions import ValidationError from django.db.utils import IntegrityError from django.utils import timezone from le_utils.constants import content_kinds @@ -669,6 +670,15 @@ def test_duration_check_constraint__not_media(self): duration=10, ) + def test_invalid_file_format(self): + channel = testdata.channel() + with self.assertRaises(ValidationError, msg="Invalid file_format"): + File.objects.create( + contentnode=create_contentnode(channel.main_tree_id), + preset_id=format_presets.EPUB, + file_format_id='pptx', + ) + class AssessmentItemFilePermissionTestCase(PermissionQuerysetTestCase): @property From 001e788f38a486b883009a988faf9082e74eec5d Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 21 Sep 2022 22:53:33 +0530 Subject: [PATCH 025/313] Use command for tsv insertion & simpler tsv update on publish --- Makefile | 2 +- .../views/ImportFromChannels/BrowsingCard.vue | 2 +- .../contentcuration/tests/helpers.py | 8 ++ .../contentcuration/tests/utils/test_cache.py | 7 +- .../contentcuration/tests/utils/test_nodes.py | 8 +- .../contentcuration/utils/publish.py | 78 +++++++------------ .../commands/set_channel_tsvectors.py | 11 +-- .../commands/set_contentnode_tsvectors.py | 45 +++++++---- contentcuration/search/utils.py | 39 ++++++++++ .../search/viewsets/contentnode.py | 4 +- 10 files changed, 118 insertions(+), 86 deletions(-) diff --git a/Makefile b/Makefile index 29fe984285..99b55d3762 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ learningactivities: set-tsvectors: python contentcuration/manage.py set_channel_tsvectors - python contentcuration/manage.py set_contentnode_tsvectors + python contentcuration/manage.py set_contentnode_tsvectors --published ############################################################### # END PRODUCTION COMMANDS ##################################### diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue index 70f1ccafb7..5d0447df50 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue @@ -149,7 +149,7 @@ if (this.isTopic) { return `${baseUrl}#/${this.node.id}`; } - return `${baseUrl}#/${this.node.parent}/${this.node.id}`; + return `${baseUrl}#/${this.node.parent_id}/${this.node.id}`; }, resourcesMsg() { let count; diff --git a/contentcuration/contentcuration/tests/helpers.py b/contentcuration/contentcuration/tests/helpers.py index 8e3172fcd4..cf1d54130e 100644 --- a/contentcuration/contentcuration/tests/helpers.py +++ b/contentcuration/contentcuration/tests/helpers.py @@ -2,7 +2,9 @@ from importlib import import_module import mock +from search.models import ContentNodeFullTextSearch +from contentcuration.models import ContentNode from contentcuration.models import TaskResult @@ -39,6 +41,12 @@ def mock_class_instance(target): else: target_cls = target + # ContentNode's node_fts field can be handled by Django when tests + # access the database but we mock it so that we don't need to query + # the database. By doing so we get faster test execution. + if type(target_cls) is ContentNode: + target_cls.node_fts = ContentNodeFullTextSearch() + class MockClass(target_cls): def __new__(cls, *args, **kwargs): return mock.Mock(spec_set=cls) diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py index 5327da19ac..d16570648a 100644 --- a/contentcuration/contentcuration/tests/utils/test_cache.py +++ b/contentcuration/contentcuration/tests/utils/test_cache.py @@ -1,15 +1,14 @@ import mock -from django.test import TestCase +from django.test import SimpleTestCase from ..helpers import mock_class_instance -from contentcuration.models import ContentNode from contentcuration.utils.cache import ResourceSizeCache -class ResourceSizeCacheTestCase(TestCase): +class ResourceSizeCacheTestCase(SimpleTestCase): def setUp(self): super(ResourceSizeCacheTestCase, self).setUp() - self.node = mock.Mock(spec_set=ContentNode()) + self.node = mock_class_instance("contentcuration.models.ContentNode") self.node.pk = "abcdefghijklmnopqrstuvwxyz" self.redis_client = mock_class_instance("redis.client.StrictRedis") self.cache_client = mock_class_instance("django_redis.client.DefaultClient") diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py index 83171288d6..be43d295dd 100644 --- a/contentcuration/contentcuration/tests/utils/test_nodes.py +++ b/contentcuration/contentcuration/tests/utils/test_nodes.py @@ -6,10 +6,10 @@ from dateutil.parser import isoparse from django.db.models import F from django.db.models import Max -from django.test import TestCase +from django.test import SimpleTestCase from ..base import StudioTestCase -from contentcuration.models import ContentNode +from contentcuration.tests.helpers import mock_class_instance from contentcuration.utils.nodes import calculate_resource_size from contentcuration.utils.nodes import ResourceSizeHelper from contentcuration.utils.nodes import SlowCalculationError @@ -42,10 +42,10 @@ def test_modified_since(self): @mock.patch("contentcuration.utils.nodes.ResourceSizeHelper") @mock.patch("contentcuration.utils.nodes.ResourceSizeCache") -class CalculateResourceSizeTestCase(TestCase): +class CalculateResourceSizeTestCase(SimpleTestCase): def setUp(self): super(CalculateResourceSizeTestCase, self).setUp() - self.node = mock.Mock(spec_set=ContentNode()) + self.node = mock_class_instance("contentcuration.models.ContentNode") def assertCalculation(self, cache, helper, force=False): helper().get_size.return_value = 456 diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 2b49e13b24..017bf4a561 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -14,7 +14,6 @@ from itertools import chain from django.conf import settings -from django.contrib.postgres.aggregates import StringAgg from django.core.files import File from django.core.files.storage import default_storage as storage from django.core.management import call_command @@ -23,8 +22,8 @@ from django.db.models import Max from django.db.models import OuterRef from django.db.models import Q +from django.db.models import Subquery from django.db.models import Sum -from django.db.models import Value from django.db.utils import IntegrityError from django.template.loader import render_to_string from django.utils import timezone @@ -41,11 +40,10 @@ from le_utils.constants import roles from past.builtins import basestring from past.utils import old_div -from search.constants import CHANNEL_KEYWORDS_TSVECTOR -from search.constants import CONTENTNODE_AUTHOR_TSVECTOR -from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR from search.models import ChannelFullTextSearch from search.models import ContentNodeFullTextSearch +from search.utils import get_fts_annotated_channel_qs +from search.utils import get_fts_annotated_contentnode_qs from contentcuration import models as ccmodels from contentcuration.decorators import delay_user_storage_calculation @@ -823,35 +821,22 @@ def sync_contentnode_and_channel_tsvectors(channel_id): to reflect the current state of channel's main tree. """ # Update or create channel tsvector entry. - logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id)) + logging.info("Setting tsvector for channel with id {}.".format(channel_id)) - from contentcuration.viewsets.channel import primary_token_subquery - - channel = (ccmodels.Channel.objects - .filter(pk=channel_id) - .annotate(primary_channel_token=primary_token_subquery, - keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) + channel = (get_fts_annotated_channel_qs() .values("keywords_tsvector", "main_tree__tree_id") - .get()) + .get(pk=channel_id)) - if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists(): - update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"]) - logging.info("Updated {} channel tsvector.".format(update_count)) - else: - obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"]) - obj.save() + obj, is_created = ChannelFullTextSearch.objects.update_or_create(channel_id=channel_id, defaults={"keywords_tsvector": channel["keywords_tsvector"]}) + del obj + + if is_created: logging.info("Created 1 channel tsvector.") + else: + logging.info("Updated 1 channel tsvector.") # Update or create contentnodes tsvector entry for channel_id. - logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id)) - - nodes_tsvector_query = (ccmodels.ContentNode.objects - .filter(tree_id=channel["main_tree__tree_id"]) - .annotate(channel_id=Value(channel_id), - contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), - keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, - author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) - .order_by()) + logging.info("Setting tsvectors for all main tree contentnodes in channel {}.".format(channel_id)) if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists(): # First, delete nodes that are no longer in main_tree. @@ -860,31 +845,22 @@ def sync_contentnode_and_channel_tsvectors(channel_id): # Now, all remaining nodes are in main_tree, so let's update them. # Update only changed nodes. - nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True) - - update_objs = list() - for node in nodes_to_update: - corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first() - if corresponding_contentnode: - node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"] - node.author_tsvector = corresponding_contentnode["author_tsvector"] - update_objs.append(node) - ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"]) - del update_objs + changed_nodes_subquery = (get_fts_annotated_contentnode_qs(channel_id) + .filter(id=OuterRef("contentnode_id"), + tree_id=channel["main_tree__tree_id"], + complete=True, + changed=True) + .order_by()) + ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).update( + keywords_tsvector=Subquery(changed_nodes_subquery.values_list("keywords_tsvector")[:1]), + author_tsvector=Subquery(changed_nodes_subquery.values_list("author_tsvector")[:1]) + ) # Insert newly created nodes. - nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id)) - nodes_to_insert = (nodes_tsvector_query - .filter(nodes_not_having_tsvector_record) - .values("id", "channel_id", "keywords_tsvector", "author_tsvector")) - - insert_objs = list() - for node in nodes_to_insert: - obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"], - keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"]) - insert_objs.append(obj) - inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs) - logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list))) + call_command("set_contentnode_tsvectors", + "--channel-id={}".format(channel_id), + "--tree-id={}".format(channel["main_tree__tree_id"]), + "--complete") @delay_user_storage_calculation diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py index d82f4848f5..68d7e17b51 100644 --- a/contentcuration/search/management/commands/set_channel_tsvectors.py +++ b/contentcuration/search/management/commands/set_channel_tsvectors.py @@ -7,11 +7,8 @@ from django.core.management.base import BaseCommand from django.db.models import Exists from django.db.models import OuterRef -from search.constants import CHANNEL_KEYWORDS_TSVECTOR from search.models import ChannelFullTextSearch - -from contentcuration.models import Channel -from contentcuration.viewsets.channel import primary_token_subquery +from search.utils import get_fts_annotated_channel_qs logmodule.basicConfig(level=logmodule.INFO) @@ -27,10 +24,8 @@ def handle(self, *args, **options): channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id"))) - channel_query = (Channel.objects.filter(channel_not_already_inserted_query, - deleted=False, main_tree__published=True) - .annotate(primary_channel_token=primary_token_subquery, - keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) + channel_query = (get_fts_annotated_channel_qs().filter(channel_not_already_inserted_query, + deleted=False, main_tree__published=True) .values("id", "keywords_tsvector")) insertable_channels = list(channel_query[:CHUNKSIZE]) diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py index 4e5673d9ec..c5e78ca8d1 100644 --- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py +++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py @@ -4,15 +4,11 @@ import logging as logmodule import time -from django.contrib.postgres.aggregates import StringAgg from django.core.management.base import BaseCommand from django.db.models import Exists from django.db.models import OuterRef -from search.constants import CONTENTNODE_AUTHOR_TSVECTOR -from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR from search.models import ContentNodeFullTextSearch - -from contentcuration.models import ContentNode +from search.utils import get_fts_annotated_contentnode_qs logmodule.basicConfig(level=logmodule.INFO) @@ -22,20 +18,39 @@ class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument("--channel-id", type=str, dest="channel_id", + help="The channel_id to annotate to the nodes. If not specified then each node's channel_id is queried and then annotated.") + parser.add_argument("--tree-id", type=int, dest="tree_id", + help="Set tsvectors for a specific tree_id nodes only. If not specified then tsvectors for all nodes of ContentNode table are set.") + parser.add_argument("--published", dest="published", action="store_true", help="Filters on whether node is published or not.") + parser.add_argument("--complete", dest="complete", action="store_true", help="Filters on whether node is complete or not.") - def handle(self, *args, **options): - start = time.time() + def get_tsvector_nodes_queryset(self, *args, **options): + tsvector_nodes_queryset = get_fts_annotated_contentnode_qs(channel_id=options["channel_id"]) + + if options["tree_id"]: + tsvector_nodes_queryset = tsvector_nodes_queryset.filter(tree_id=options["tree_id"]) + + if options["complete"]: + tsvector_nodes_queryset = tsvector_nodes_queryset.filter(complete=True) + + if options["published"]: + tsvector_nodes_queryset = tsvector_nodes_queryset.filter(published=True) tsvector_not_already_inserted_query = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"))) + tsvector_nodes_queryset = (tsvector_nodes_queryset + .filter(tsvector_not_already_inserted_query, channel_id__isnull=False) + .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by()) + + return tsvector_nodes_queryset + + def handle(self, *args, **options): + start = time.time() - tsvector_node_query = (ContentNode._annotate_channel_id(ContentNode.objects) - .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), - keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, - author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR) - .filter(tsvector_not_already_inserted_query, published=True, channel_id__isnull=False) - .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by()) + tsvector_nodes_queryset = self.get_tsvector_nodes_queryset(*args, **options) - insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE]) + insertable_nodes_tsvector = list(tsvector_nodes_queryset[:CHUNKSIZE]) total_tsvectors_inserted = 0 while insertable_nodes_tsvector: @@ -54,6 +69,6 @@ def handle(self, *args, **options): logging.info("Inserted {} contentnode tsvectors.".format(current_inserts_count)) - insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE]) + insertable_nodes_tsvector = list(tsvector_nodes_queryset[:CHUNKSIZE]) logging.info("Completed! Successfully inserted total of {} contentnode tsvectors in {} seconds.".format(total_tsvectors_inserted, time.time() - start)) diff --git a/contentcuration/search/utils.py b/contentcuration/search/utils.py index 9cb205bb47..4f6768f650 100644 --- a/contentcuration/search/utils.py +++ b/contentcuration/search/utils.py @@ -1,4 +1,9 @@ +from django.contrib.postgres.aggregates import StringAgg from django.contrib.postgres.search import SearchQuery +from django.db.models import Value +from search.constants import CHANNEL_KEYWORDS_TSVECTOR +from search.constants import CONTENTNODE_AUTHOR_TSVECTOR +from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR from search.constants import POSTGRES_FTS_CONFIG @@ -7,3 +12,37 @@ def get_fts_search_query(value): Returns a `SearchQuery` with our postgres full text search config set on it. """ return SearchQuery(value=value, config=POSTGRES_FTS_CONFIG) + + +def get_fts_annotated_contentnode_qs(channel_id=None): + """ + Returns a `ContentNode` queryset annotated with fields required for full text search. + + If `channel_id` is provided, annotates that specific `channel_id` else annotates + the `channel_id` to which the contentnode belongs. + """ + from contentcuration.models import ContentNode + + if channel_id: + queryset = ContentNode.objects.annotate(channel_id=Value(channel_id)) + else: + queryset = ContentNode._annotate_channel_id(ContentNode.objects) + + queryset = queryset.annotate( + contentnode_tags=StringAgg("tags__tag_name", delimiter=" "), + keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR, + author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR + ) + + return queryset + + +def get_fts_annotated_channel_qs(): + """ + Returns a `Channel` queryset annotated with fields required for full text search. + """ + from contentcuration.models import Channel + from contentcuration.viewsets.channel import primary_token_subquery + + return Channel.objects.annotate(primary_channel_token=primary_token_subquery, + keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index aa6c7fe9b7..58660fce5b 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -17,7 +17,7 @@ from contentcuration.models import Channel from contentcuration.models import File -from contentcuration.utils.pagination import CachedListPagination +from contentcuration.utils.pagination import ValuesViewsetPageNumberPagination from contentcuration.viewsets.base import ReadOnlyValuesViewset from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.common import NotNullMapArrayAgg @@ -25,7 +25,7 @@ from contentcuration.viewsets.common import UUIDInFilter -class ListPagination(CachedListPagination): +class ListPagination(ValuesViewsetPageNumberPagination): page_size = 25 page_size_query_param = "page_size" max_page_size = 100 From 034ddffd6b71b6a4369e3b06b52d0b48685dbcd8 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Wed, 21 Sep 2022 12:41:57 -0700 Subject: [PATCH 026/313] Add intellij run configuration for the devserver, allowing in IDE debugging --- .run/devserver.run.xml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .run/devserver.run.xml diff --git a/.run/devserver.run.xml b/.run/devserver.run.xml new file mode 100644 index 0000000000..1c94ee6402 --- /dev/null +++ b/.run/devserver.run.xml @@ -0,0 +1,24 @@ + + + + + From 36a42f46eb4ff312fa4b9a2aaec13a7db6d2a7dd Mon Sep 17 00:00:00 2001 From: Prathamesh Desai Date: Thu, 22 Sep 2022 01:35:30 +0530 Subject: [PATCH 027/313] validate file_format in upload_url --- .../contentcuration/tests/viewsets/test_file.py | 16 ++++++++++++++++ contentcuration/contentcuration/viewsets/file.py | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/contentcuration/contentcuration/tests/viewsets/test_file.py b/contentcuration/contentcuration/tests/viewsets/test_file.py index dca5083d5e..3e0fe475e2 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_file.py +++ b/contentcuration/contentcuration/tests/viewsets/test_file.py @@ -413,6 +413,22 @@ def test_duration_invalid(self): self.assertEqual(response.status_code, 400) + def test_invalid_file_format_upload(self): + self.client.force_authenticate(user=self.user) + file = { + "size": 1000, + "checksum": uuid.uuid4().hex, + "name": "le_studio", + "file_format": "ppx", + "preset": format_presets.AUDIO, + "duration": 10.123 + } + response = self.client.post( + reverse("file-upload-url"), file, format="json", + ) + + self.assertEqual(response.status_code, 400) + def test_insufficient_storage(self): self.file["size"] = 100000000000000 diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index 9b1b67ccaa..78e82d6d8d 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -3,6 +3,7 @@ from django.core.exceptions import PermissionDenied from django.http import HttpResponseBadRequest +from le_utils.constants import file_formats from rest_framework.decorators import action from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response @@ -164,6 +165,9 @@ def upload_url(self, request): filepath, checksum_base64, 600, content_length=size ) + if file_format not in dict(file_formats.choices): + return HttpResponseBadRequest("Invalid file_format!") + file = File( file_size=size, checksum=checksum, From a904f47cbfc43e0c4e58b13932eaa22f37054ebb Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Thu, 22 Sep 2022 07:23:41 -0700 Subject: [PATCH 028/313] Add run configuration to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b5e0261f09..5c77ad049d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,8 @@ var/ # IntelliJ IDE, except project config .idea/* !.idea/studio.iml +# ignore future updates to run configuration +.run/devserver.run.xml # PyInstaller # Usually these files are written by a python script from a template From 9373f966e6fd3b3f271b1178128b416b8406a6fe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 23 Sep 2022 22:44:16 +0000 Subject: [PATCH 029/313] Bump protobuf from 3.17.0 to 3.18.3 Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 3.17.0 to 3.18.3. - [Release notes](https://github.com/protocolbuffers/protobuf/releases) - [Changelog](https://github.com/protocolbuffers/protobuf/blob/main/generate_changelog.py) - [Commits](https://github.com/protocolbuffers/protobuf/compare/v3.17.0...v3.18.3) --- updated-dependencies: - dependency-name: protobuf dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 27c01a16e9..7af7fb673e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -191,7 +191,7 @@ proto-plus==1.18.1 # via # google-cloud-error-reporting # google-cloud-logging -protobuf==3.17.0 +protobuf==3.18.3 # via # google-api-core # googleapis-common-protos @@ -257,7 +257,6 @@ six==1.16.0 # html5lib # oauth2client # progressbar2 - # protobuf # python-dateutil # python-utils sqlparse==0.4.1 From 9ec60cf200e8f445556e2d89f6eee1a2d9e1bb52 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 29 Sep 2022 00:09:07 +0530 Subject: [PATCH 030/313] fixes the strict update subquery, lightens it up --- contentcuration/contentcuration/utils/publish.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 017bf4a561..5b38d85576 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -845,18 +845,14 @@ def sync_contentnode_and_channel_tsvectors(channel_id): # Now, all remaining nodes are in main_tree, so let's update them. # Update only changed nodes. - changed_nodes_subquery = (get_fts_annotated_contentnode_qs(channel_id) - .filter(id=OuterRef("contentnode_id"), - tree_id=channel["main_tree__tree_id"], - complete=True, - changed=True) - .order_by()) - ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).update( - keywords_tsvector=Subquery(changed_nodes_subquery.values_list("keywords_tsvector")[:1]), - author_tsvector=Subquery(changed_nodes_subquery.values_list("author_tsvector")[:1]) + node_tsv_subquery = get_fts_annotated_contentnode_qs(channel_id).filter(id=OuterRef("contentnode_id")).order_by() + ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__complete=True, contentnode__changed=True).update( + keywords_tsvector=Subquery(node_tsv_subquery.values("keywords_tsvector")[:1]), + author_tsvector=Subquery(node_tsv_subquery.values("author_tsvector")[:1]) ) # Insert newly created nodes. + # "set_contentnode_tsvectors" command is defined in "search/management/commands" directory. call_command("set_contentnode_tsvectors", "--channel-id={}".format(channel_id), "--tree-id={}".format(channel["main_tree__tree_id"]), From aae3be1891e635a8d87ab58814f0b625dbee4d77 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Thu, 29 Sep 2022 01:51:33 +0530 Subject: [PATCH 031/313] Do not output deleted channel nodes on search --- contentcuration/search/viewsets/contentnode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py index 58660fce5b..676698031d 100644 --- a/contentcuration/search/viewsets/contentnode.py +++ b/contentcuration/search/viewsets/contentnode.py @@ -52,11 +52,11 @@ def filter_channel_list(self, queryset, name, value): if value == "public": channel_ids = Channel.get_public_channels().values_list("id", flat=True) elif value == "edit" and user: - channel_ids = user.editable_channels.values_list("id", flat=True) + channel_ids = user.editable_channels.filter(deleted=False).values_list("id", flat=True) elif value == "bookmark" and user: - channel_ids = user.bookmarked_channels.values_list("id", flat=True) + channel_ids = user.bookmarked_channels.filter(deleted=False).values_list("id", flat=True) elif value == "view" and user: - channel_ids = user.view_only_channels.values_list("id", flat=True) + channel_ids = user.view_only_channels.filter(deleted=False).values_list("id", flat=True) return queryset.filter(channel_id__in=list(channel_ids)) From 17b3dfcf3b331b76fa5a416002faaf9866ed681a Mon Sep 17 00:00:00 2001 From: Prathamesh Desai Date: Mon, 3 Oct 2022 19:31:37 +0530 Subject: [PATCH 032/313] Work in progress --- .../frontend/shared/languageSwitcher/mixin.js | 1 + .../frontend/shared/views/AppBar.vue | 27 +++- contentcuration/contentcuration/urls.py | 2 +- contentcuration/contentcuration/views/base.py | 149 +++++++++++++----- 4 files changed, 133 insertions(+), 46 deletions(-) diff --git a/contentcuration/contentcuration/frontend/shared/languageSwitcher/mixin.js b/contentcuration/contentcuration/frontend/shared/languageSwitcher/mixin.js index 8b75b03191..682c676eae 100644 --- a/contentcuration/contentcuration/frontend/shared/languageSwitcher/mixin.js +++ b/contentcuration/contentcuration/frontend/shared/languageSwitcher/mixin.js @@ -1,4 +1,5 @@ import { availableLanguages, currentLanguage, sortLanguages } from '../i18n'; + import client from 'shared/client'; export default { diff --git a/contentcuration/contentcuration/frontend/shared/views/AppBar.vue b/contentcuration/contentcuration/frontend/shared/views/AppBar.vue index e018472528..f437e06e51 100644 --- a/contentcuration/contentcuration/frontend/shared/views/AppBar.vue +++ b/contentcuration/contentcuration/frontend/shared/views/AppBar.vue @@ -75,9 +75,30 @@ - - {{ $tr('logIn') }} - + @@ -84,11 +100,13 @@ import { mapActions, mapState } from 'vuex'; import KolibriLogo from './KolibriLogo'; + import LanguageSwitcherModal from 'shared/languageSwitcher/LanguageSwitcherModal'; export default { name: 'MainNavigationDrawer', components: { KolibriLogo, + LanguageSwitcherModal, }, props: { value: { @@ -96,6 +114,11 @@ default: false, }, }, + data() { + return { + showLanguageModal: false, + }; + }, computed: { ...mapState({ user: state => state.session.currentUser, @@ -131,6 +154,7 @@ channelsLink: 'Channels', administrationLink: 'Administration', settingsLink: 'Settings', + changeLanguage: 'Change language', helpLink: 'Help and support', logoutLink: 'Sign out', copyright: '© {year} Learning Equality', diff --git a/contentcuration/contentcuration/templates/pwa/service_worker.js b/contentcuration/contentcuration/templates/pwa/service_worker.js index 583a43bdf2..f69abd79bf 100644 --- a/contentcuration/contentcuration/templates/pwa/service_worker.js +++ b/contentcuration/contentcuration/templates/pwa/service_worker.js @@ -1,6 +1,5 @@ {% load js_reverse %} -{% js_reverse_inline %} {% autoescape off %} {{ webpack_service_worker }} diff --git a/contentcuration/contentcuration/views/base.py b/contentcuration/contentcuration/views/base.py index bbdce92823..d415778aab 100644 --- a/contentcuration/contentcuration/views/base.py +++ b/contentcuration/contentcuration/views/base.py @@ -353,7 +353,6 @@ def set_language(request): payload = json.loads(request.body) lang_code = payload.get(LANGUAGE_QUERY_PARAMETER) next_url = payload.get("next") - # next_url = urlsplit(payload.get("next")) if payload.get("next") else None if ( (next_url or request.accepts('text/html')) and From 2a42f1895cbc1ec4cf3c7ffaeaf0b1d093cb7ffc Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 19 Oct 2022 16:29:21 +0530 Subject: [PATCH 043/313] User soft delete via garbage collection --- .../contentcuration/constants/user_history.py | 11 +++ contentcuration/contentcuration/forms.py | 2 +- .../management/commands/garbage_collect.py | 13 ++- .../migrations/0141_user_deleted.py | 18 ---- .../migrations/0141_user_soft_delete.py | 31 +++++++ contentcuration/contentcuration/models.py | 83 +++++++++++++------ contentcuration/contentcuration/settings.py | 6 +- .../contentcuration/tests/test_models.py | 28 ++++--- .../contentcuration/utils/garbage_collect.py | 73 +++++++++------- 9 files changed, 177 insertions(+), 88 deletions(-) create mode 100644 contentcuration/contentcuration/constants/user_history.py delete mode 100644 contentcuration/contentcuration/migrations/0141_user_deleted.py create mode 100644 contentcuration/contentcuration/migrations/0141_user_soft_delete.py diff --git a/contentcuration/contentcuration/constants/user_history.py b/contentcuration/contentcuration/constants/user_history.py new file mode 100644 index 0000000000..1eecf79c17 --- /dev/null +++ b/contentcuration/contentcuration/constants/user_history.py @@ -0,0 +1,11 @@ +from django.utils.translation import ugettext_lazy as _ + +DELETION = "deletion" +RECOVERY = "recovery" +RELATED_DATA_HARD_DELETION = "related-data-hard-deletion" + +choices = ( + (DELETION, _("User soft deletion")), + (RECOVERY, _("User soft deletion recovery")), + (RELATED_DATA_HARD_DELETION, _("User related data hard deletion")), +) diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index d9dc781f61..d0ae49893f 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -46,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted_at__isnull=False), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/management/commands/garbage_collect.py b/contentcuration/contentcuration/management/commands/garbage_collect.py index f31db7ad5c..f22f70dd4b 100644 --- a/contentcuration/contentcuration/management/commands/garbage_collect.py +++ b/contentcuration/contentcuration/management/commands/garbage_collect.py @@ -11,6 +11,7 @@ from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks @@ -26,15 +27,23 @@ def handle(self, *args, **options): Actual logic for garbage collection. """ - # clean up contentnodes, files and file objects on storage that are associated - # with the orphan tree + # Clean up users that are soft deleted and are older than ACCOUNT_DELETION_BUFFER (90 days). + # Also clean contentnodes, files and file objects on storage that are associated + # with the orphan tree. + logging.info("Cleaning up soft deleted users older than ACCOUNT_DELETION_BUFFER (90 days)") + clean_up_soft_deleted_users() + logging.info("Cleaning up contentnodes from the orphan tree") clean_up_contentnodes() + logging.info("Cleaning up deleted chef nodes") clean_up_deleted_chefs() + logging.info("Cleaning up feature flags") clean_up_feature_flags() + logging.info("Cleaning up stale file objects") clean_up_stale_files() + logging.info("Cleaning up tasks") clean_up_tasks() diff --git a/contentcuration/contentcuration/migrations/0141_user_deleted.py b/contentcuration/contentcuration/migrations/0141_user_deleted.py deleted file mode 100644 index 25444e6577..0000000000 --- a/contentcuration/contentcuration/migrations/0141_user_deleted.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.2.14 on 2022-10-06 11:18 -from django.db import migrations -from django.db import models - - -class Migration(migrations.Migration): - - dependencies = [ - ('contentcuration', '0140_delete_task'), - ] - - operations = [ - migrations.AddField( - model_name='user', - name='deleted', - field=models.BooleanField(db_index=True, default=False), - ), - ] diff --git a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py b/contentcuration/contentcuration/migrations/0141_user_soft_delete.py new file mode 100644 index 0000000000..1f58e4076a --- /dev/null +++ b/contentcuration/contentcuration/migrations/0141_user_soft_delete.py @@ -0,0 +1,31 @@ +# Generated by Django 3.2.14 on 2022-10-19 09:41 +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0140_delete_task'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='deleted_at', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.CreateModel( + name='UserHistory', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('action', models.CharField(choices=[('deletion', 'User soft deletion'), ('recovery', 'User soft deletion recovery'), + ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), + ('performed_at', models.DateTimeField(default=django.utils.timezone.now)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='history', to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 079833c278..537f867978 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -66,6 +66,7 @@ from contentcuration.constants import channel_history from contentcuration.constants import completion_criteria +from contentcuration.constants import user_history from contentcuration.constants.contentnode import kind_activity_map from contentcuration.db.models.expressions import Array from contentcuration.db.models.functions import ArrayRemove @@ -199,7 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) - deleted = models.BooleanField(default=False, db_index=True) + deleted_at = models.DateTimeField(null=True, blank=True) _field_updates = FieldTracker(fields=[ # Field to watch for changes @@ -215,44 +216,66 @@ def __unicode__(self): def delete(self): """ - Hard deletes invitation associated to this account, hard deletes channel and channet sets - only if user is the only editor. Then soft deletes the user account. + Soft deletes the user account. + """ + self.deleted_at = timezone.now() + # Deactivate the user to disallow authentication and also + # to let the user verify the email again after recovery. + self.is_active = False + self.save() + self.history.create(user_id=self.pk, action=user_history.DELETION) + + def recover(self): + """ + Use this method when we want to recover a user. + """ + self.deleted_at = None + self.save() + self.history.create(user_id=self.pk, action=user_history.RECOVERY) + + def hard_delete_user_related_data(self): + """ + Hard delete all user related data. But keeps the user record itself intact. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. """ from contentcuration.viewsets.common import SQCount # Hard delete invitations associated to this account. self.sent_to.all().delete() + self.sent_by.all().delete() - # Hard delete channels associated with this user (if user is the only editor). - user_query = ( + editable_channels_user_query = ( User.objects.filter(editable_channels__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.editable_channels.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + non_public_channels_sole_editor = self.editable_channels.annotate(num_editors=SQCount( + editable_channels_user_query, field="id")).filter(num_editors=1, public=False) + + # Point sole editor non-public channels' contentnodes to orphan tree to let + # our garbage collection delete the nodes and underlying files. + ContentNode._annotate_channel_id(ContentNode.objects).filter(channel_id__in=list( + non_public_channels_sole_editor)).update(parent_id=settings.ORPHANAGE_ROOT_ID) - # Hard delete channel collections associated with this user (if user is the only editor). + # Hard delete non-public channels associated with this user (if user is the only editor). + non_public_channels_sole_editor.delete() + + # Hard delete non-public channel collections associated with this user (if user is the only editor). user_query = ( User.objects.filter(channel_sets__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1, public=False).delete() - # Soft delete user. - self.deleted = True - # Deactivate the user to disallow authentication and also - # to let the user verify the email again after recovery. - self.is_active = False - - self.save() - - def recover(self): - """ - Use this method when we want to recover a user. - """ - self.deleted = False - self.save() + # Create history! + self.history.create(user_id=self.pk, action=user_history.RELATED_DATA_HARD_DELETION) def can_edit(self, channel_id): return Channel.filter_edit_queryset(Channel.objects.all(), self).filter(pk=channel_id).exists() @@ -424,20 +447,20 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, deleted=False, **filters): + def get_for_email(cls, email, deleted_at__isnull=True, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned Filters out deleted User records by default. Can be overridden with - deleted argument. + deleted_at__isnull argument. :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), deleted=deleted, **filters)\ + return User.objects.filter(email__iexact=email.strip(), deleted_at__isnull=deleted_at__isnull, **filters)\ .order_by("-is_active", "-id").first() @@ -1060,6 +1083,16 @@ class Meta: ] +class UserHistory(models.Model): + """ + Model that stores the user's action history. + """ + user = models.ForeignKey(settings.AUTH_USER_MODEL, null=False, blank=False, related_name="history", on_delete=models.CASCADE) + action = models.CharField(max_length=32, choices=user_history.choices) + + performed_at = models.DateTimeField(default=timezone.now) + + class ChannelSet(models.Model): # NOTE: this is referred to as "channel collections" on the front-end, but we need to call it # something else as there is already a ChannelCollection model on the front-end diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index b9c3a73ca1..de05fb484a 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -329,8 +329,10 @@ def gettext(s): HELP_EMAIL = 'content@learningequality.org' DEFAULT_FROM_EMAIL = 'Kolibri Studio ' POLICY_EMAIL = 'legal@learningequality.org' -ACCOUNT_DELETION_BUFFER = 5 # Used to determine how many days a user -# has to undo accidentally deleting account + +# Used to determine how many days a user +# has to undo accidentally deleting account. +ACCOUNT_DELETION_BUFFER = 90 DEFAULT_LICENSE = 1 diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 66720a0985..e6fbe38a35 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -1,3 +1,4 @@ +import datetime import uuid import mock @@ -11,6 +12,7 @@ from le_utils.constants import format_presets from contentcuration.constants import channel_history +from contentcuration.constants import user_history from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ChannelHistory @@ -22,6 +24,7 @@ from contentcuration.models import Invitation from contentcuration.models import object_storage_name from contentcuration.models import User +from contentcuration.models import UserHistory from contentcuration.tests import testdata from contentcuration.tests.base import StudioTestCase @@ -806,27 +809,30 @@ def test_get_for_email(self): user4.delete() self.assertIsNone(User.get_for_email("testing@test.com")) - def test_delete__sets_deleted_true(self): + def test_delete(self): user = self._create_user("tester@tester.com") user.delete() - self.assertEqual(user.deleted, True) - def test_delete__sets_is_active_false(self): - user = self._create_user("tester@tester.com") - user.delete() + # Sets deleted_at? + self.assertIsInstance(user.deleted_at, datetime.datetime) + # Sets is_active to False? self.assertEqual(user.is_active, False) + # Creates user history? + user_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).first() + self.assertIsNotNone(user_delete_history) - def test_recover__sets_deleted_false(self): + def test_recover(self): user = self._create_user("tester@tester.com") user.delete() user.recover() - self.assertEqual(user.deleted, False) - def test_recover__keeps_is_active_false(self): - user = self._create_user("tester@tester.com") - user.delete() - user.recover() + # Sets deleted_at to None? + self.assertEqual(user.deleted_at, None) + # Keeps is_active to False? self.assertEqual(user.is_active, False) + # Creates user history? + user_recover_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RECOVERY).first() + self.assertIsNotNone(user_recover_history) class ChannelHistoryTestCase(StudioTestCase): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index 3343013b7c..de5a29921f 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -7,7 +7,9 @@ from celery import states from django.conf import settings +from django.core.files.storage import default_storage from django.db.models.expressions import CombinedExpression +from django.db.models.expressions import Exists from django.db.models.expressions import F from django.db.models.expressions import Value from django.db.models.signals import post_delete @@ -37,18 +39,57 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.receivers = None -def get_deleted_chefs_root(): +def _get_deleted_chefs_root(): deleted_chefs_node, _new = ContentNode.objects.get_or_create(pk=settings.DELETED_CHEFS_ROOT_ID, kind_id=content_kinds.TOPIC) return deleted_chefs_node +def _clean_up_files(contentnode_ids): + """ + Clean up the files (both in the DB and in object storage) + associated with the `contentnode_ids` iterable that are + not pointed by any other contentnode. + """ + files = File.objects.filter(contentnode__in=contentnode_ids) + files_on_storage = files.values_list("file_on_disk", flat=True) + + for disk_file_path in files_on_storage: + is_other_node_pointing = Exists(File.objects.filter(file_on_disk=disk_file_path).exclude(contentnode__in=contentnode_ids)) + if not is_other_node_pointing: + default_storage.delete(disk_file_path) + + # use _raw_delete for much fast file deletions + files._raw_delete(files.db) + + +def clean_up_soft_deleted_users(): + """ + Hard deletes user related data for soft deleted users that are older than ACCOUNT_DELETION_BUFFER. + + Note: User record itself is not hard deleted. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. + """ + account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) + users_to_delete = User.objects.filter(deleted_at__lt=account_deletion_buffer_delta) + + for user in users_to_delete: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}".format(user.email)) + + def clean_up_deleted_chefs(): """ Clean up all deleted chefs attached to the deleted chefs tree, including all child nodes in that tree. """ - deleted_chefs_node = get_deleted_chefs_root() + deleted_chefs_node = _get_deleted_chefs_root() # we cannot use MPTT methods like get_descendants() or use tree_id because for performance reasons # we are avoiding MPTT entirely. nodes_to_clean_up = ContentNode.objects.filter(parent=deleted_chefs_node) @@ -81,7 +122,7 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH # delete all files first with DisablePostDeleteSignal(): - clean_up_files(nodes_to_clean_up) + _clean_up_files(nodes_to_clean_up) # Use _raw_delete for fast bulk deletions try: @@ -92,32 +133,6 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH pass -def clean_up_files(contentnode_ids): - """ - Clean up the files (both in the DB and in object storage) - associated with the contentnode_ids given in the `contentnode_ids` - iterable. - """ - - # get all file objects associated with these contentnodes - files = File.objects.filter(contentnode__in=contentnode_ids) - # get all their associated real files in object storage - files_on_storage = files.values_list("file_on_disk") - for f in files_on_storage: - # values_list returns each set of items in a tuple, even - # if there's only one item in there. Extract the file_on_disk - # string value from inside that singleton tuple - f[0] - # NOTE (aron):call the storage's delete method on each file, one by one - # disabled for now until we implement logic to not delete files - # that are referenced by non-orphan nodes - # storage.delete(file_path) - - # finally, remove the entries from object storage - # use _raw_delete for much fast file deletions - files._raw_delete(files.db) - - def clean_up_feature_flags(): """ Removes lingering feature flag settings in User records that aren't currently present in the From 4be9df4359a9638a21c11582e613f3d75dc38786 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 21 Oct 2022 13:49:59 +0530 Subject: [PATCH 044/313] fix deleted chefs root references --- contentcuration/contentcuration/tests/test_user.py | 6 ++++-- contentcuration/contentcuration/utils/garbage_collect.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index f9995fb48c..36ae42f1c7 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -161,10 +161,12 @@ def test_user_csv_export(self): self.assertIn(videos[index - 1].original_filename, row) self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) + """ + Write and refactor for related data hard delete test cases below. + """ def test_account_deletion(self): - self.user.delete() - self.assertFalse(Channel.objects.filter(pk=self.channel.pk).exists()) + pass def test_account_deletion_shared_channels_preserved(self): # Deleting a user account shouldn't delete shared channels diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index de5a29921f..206ad74865 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -39,7 +39,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.receivers = None -def _get_deleted_chefs_root(): +def get_deleted_chefs_root(): deleted_chefs_node, _new = ContentNode.objects.get_or_create(pk=settings.DELETED_CHEFS_ROOT_ID, kind_id=content_kinds.TOPIC) return deleted_chefs_node @@ -89,7 +89,7 @@ def clean_up_deleted_chefs(): child nodes in that tree. """ - deleted_chefs_node = _get_deleted_chefs_root() + deleted_chefs_node = get_deleted_chefs_root() # we cannot use MPTT methods like get_descendants() or use tree_id because for performance reasons # we are avoiding MPTT entirely. nodes_to_clean_up = ContentNode.objects.filter(parent=deleted_chefs_node) From d3a4263ee3d0669f2c1c03c50373051dca12821b Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sun, 23 Oct 2022 01:20:36 +0530 Subject: [PATCH 045/313] User UserHistory as single source of truth for timestamps! --- .../contentcuration/constants/user_history.py | 4 +- contentcuration/contentcuration/forms.py | 2 +- ...oft_delete.py => 0141_soft_delete_user.py} | 10 +-- contentcuration/contentcuration/models.py | 20 +++-- .../contentcuration/tests/test_models.py | 84 +++++++++++++++++-- .../contentcuration/tests/test_user.py | 15 ---- .../tests/utils/test_garbage_collect.py | 34 ++++++++ .../contentcuration/utils/garbage_collect.py | 12 ++- 8 files changed, 140 insertions(+), 41 deletions(-) rename contentcuration/contentcuration/migrations/{0141_user_soft_delete.py => 0141_soft_delete_user.py} (63%) diff --git a/contentcuration/contentcuration/constants/user_history.py b/contentcuration/contentcuration/constants/user_history.py index 1eecf79c17..76655993ef 100644 --- a/contentcuration/contentcuration/constants/user_history.py +++ b/contentcuration/contentcuration/constants/user_history.py @@ -1,7 +1,7 @@ from django.utils.translation import ugettext_lazy as _ -DELETION = "deletion" -RECOVERY = "recovery" +DELETION = "soft-deletion" +RECOVERY = "soft-recovery" RELATED_DATA_HARD_DELETION = "related-data-hard-deletion" choices = ( diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index d0ae49893f..d9dc781f61 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -46,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(Q(is_active=True) | Q(deleted_at__isnull=False), email__iexact=email).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py similarity index 63% rename from contentcuration/contentcuration/migrations/0141_user_soft_delete.py rename to contentcuration/contentcuration/migrations/0141_soft_delete_user.py index 1f58e4076a..df66bafcc0 100644 --- a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py +++ b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.14 on 2022-10-19 09:41 +# Generated by Django 3.2.14 on 2022-10-22 18:30 import django.db.models.deletion import django.utils.timezone from django.conf import settings @@ -15,15 +15,15 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='user', - name='deleted_at', - field=models.DateTimeField(blank=True, null=True), + name='deleted', + field=models.BooleanField(db_index=True, default=False), ), migrations.CreateModel( name='UserHistory', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('action', models.CharField(choices=[('deletion', 'User soft deletion'), ('recovery', 'User soft deletion recovery'), - ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), + ('action', models.CharField(choices=[('soft-deletion', 'User soft deletion'), ('soft-recovery', + 'User soft deletion recovery'), ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), ('performed_at', models.DateTimeField(default=django.utils.timezone.now)), ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='history', to=settings.AUTH_USER_MODEL)), ], diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 537f867978..07aa39f2b1 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -200,7 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) - deleted_at = models.DateTimeField(null=True, blank=True) + deleted = models.BooleanField(default=False, db_index=True) _field_updates = FieldTracker(fields=[ # Field to watch for changes @@ -218,7 +218,7 @@ def delete(self): """ Soft deletes the user account. """ - self.deleted_at = timezone.now() + self.deleted = True # Deactivate the user to disallow authentication and also # to let the user verify the email again after recovery. self.is_active = False @@ -229,7 +229,7 @@ def recover(self): """ Use this method when we want to recover a user. """ - self.deleted_at = None + self.deleted = False self.save() self.history.create(user_id=self.pk, action=user_history.RECOVERY) @@ -261,7 +261,7 @@ def hard_delete_user_related_data(self): # Point sole editor non-public channels' contentnodes to orphan tree to let # our garbage collection delete the nodes and underlying files. ContentNode._annotate_channel_id(ContentNode.objects).filter(channel_id__in=list( - non_public_channels_sole_editor)).update(parent_id=settings.ORPHANAGE_ROOT_ID) + non_public_channels_sole_editor.values_list("id", flat=True))).update(parent_id=settings.ORPHANAGE_ROOT_ID) # Hard delete non-public channels associated with this user (if user is the only editor). non_public_channels_sole_editor.delete() @@ -447,21 +447,23 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, deleted_at__isnull=True, **filters): + def get_for_email(cls, email, deleted=False, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned - Filters out deleted User records by default. Can be overridden with - deleted_at__isnull argument. + Filters out deleted User records by default. To include both deleted and + undeleted user records pass None to the deleted argument. :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), deleted_at__isnull=deleted_at__isnull, **filters)\ - .order_by("-is_active", "-id").first() + user_qs = User.objects.filter(email__iexact=email.strip()) + if deleted is not None: + user_qs = user_qs.filter(deleted=deleted) + return user_qs.filter(**filters).order_by("-is_active", "-id").first() class UUIDField(models.CharField): diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index e6fbe38a35..9734ef1309 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -1,4 +1,3 @@ -import datetime import uuid import mock @@ -6,6 +5,7 @@ from django.conf import settings from django.core.cache import cache from django.core.exceptions import ValidationError +from django.db.models import Q from django.db.utils import IntegrityError from django.utils import timezone from le_utils.constants import content_kinds @@ -16,6 +16,7 @@ from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ChannelHistory +from contentcuration.models import ChannelSet from contentcuration.models import ContentNode from contentcuration.models import CONTENTNODE_TREE_ID_CACHE_KEY from contentcuration.models import File @@ -781,6 +782,51 @@ def _create_user(self, email, password='password', is_active=True): user.save() return user + def _setup_user_related_data(self): + user_a = self._create_user("a@tester.com") + user_b = self._create_user("b@tester.com") + + # Create a sole editor non-public channel. + sole_editor_channel = Channel.objects.create(name="sole-editor") + sole_editor_channel.editors.add(user_a) + + # Create sole-editor channel nodes. + for i in range(0, 3): + testdata.node({ + "title": "sole-editor-channel-node", + "kind_id": "video", + }, parent=sole_editor_channel.main_tree) + + # Create a sole editor public channel. + public_channel = testdata.channel("public") + public_channel.editors.add(user_a) + public_channel.public = True + public_channel.save() + + # Create a shared channel. + shared_channel = testdata.channel("shared-channel") + shared_channel.editors.add(user_a) + shared_channel.editors.add(user_b) + + # Invitations. + Invitation.objects.create(sender_id=user_a.id, invited_id=user_b.id) + Invitation.objects.create(sender_id=user_b.id, invited_id=user_a.id) + + # Channel sets. + channel_set = ChannelSet.objects.create(name="sole-editor") + channel_set.editors.add(user_a) + + channel_set = ChannelSet.objects.create(name="public") + channel_set.editors.add(user_a) + channel_set.public = True + channel_set.save() + + channel_set = ChannelSet.objects.create(name="shared-channelset") + channel_set.editors.add(user_a) + channel_set.editors.add(user_b) + + return user_a + def test_unique_lower_email(self): self._create_user("tester@tester.com") with self.assertRaises(IntegrityError): @@ -813,8 +859,8 @@ def test_delete(self): user = self._create_user("tester@tester.com") user.delete() - # Sets deleted_at? - self.assertIsInstance(user.deleted_at, datetime.datetime) + # Sets deleted? + self.assertEqual(user.deleted, True) # Sets is_active to False? self.assertEqual(user.is_active, False) # Creates user history? @@ -826,14 +872,42 @@ def test_recover(self): user.delete() user.recover() - # Sets deleted_at to None? - self.assertEqual(user.deleted_at, None) + # Sets deleted to False? + self.assertEqual(user.deleted, False) # Keeps is_active to False? self.assertEqual(user.is_active, False) # Creates user history? user_recover_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RECOVERY).first() self.assertIsNotNone(user_recover_history) + def test_hard_delete_user_related_data(self): + user = self._setup_user_related_data() + user.hard_delete_user_related_data() + + # Deletes sole-editor channels. + self.assertFalse(Channel.objects.filter(name="sole-editor").exists()) + # Preserves shared channels. + self.assertTrue(Channel.objects.filter(name="shared-channel").exists()) + # Preserves public channels. + self.assertTrue(Channel.objects.filter(name="public").exists()) + + # Deletes all user related invitations. + self.assertFalse(Invitation.objects.filter(Q(sender_id=user.id) | Q(invited_id=user.id)).exists()) + + # Deletes sole-editor channelsets. + self.assertFalse(ChannelSet.objects.filter(name="sole-editor").exists()) + # Preserves shared channelsets. + self.assertTrue(ChannelSet.objects.filter(name="shared-channelset").exists()) + # Preserves public channelsets. + self.assertTrue(ChannelSet.objects.filter(name="public").exists()) + + # All contentnodes of sole-editor channel points to ORPHANGE ROOT NODE? + self.assertFalse(ContentNode.objects.filter(~Q(parent_id=settings.ORPHANAGE_ROOT_ID) + & Q(title="sole-editor-channel-node")).exists()) + # Creates user history? + user_hard_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).first() + self.assertIsNotNone(user_hard_delete_history) + class ChannelHistoryTestCase(StudioTestCase): def setUp(self): diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index 36ae42f1c7..9fda1ceefe 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -15,7 +15,6 @@ from .base import BaseAPITestCase from .testdata import fileobj_video -from contentcuration.models import Channel from contentcuration.models import DEFAULT_CONTENT_DEFAULTS from contentcuration.models import Invitation from contentcuration.models import User @@ -161,17 +160,3 @@ def test_user_csv_export(self): self.assertIn(videos[index - 1].original_filename, row) self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) - """ - Write and refactor for related data hard delete test cases below. - """ - - def test_account_deletion(self): - pass - - def test_account_deletion_shared_channels_preserved(self): - # Deleting a user account shouldn't delete shared channels - newuser = self.create_user() - self.channel.editors.add(newuser) - self.channel.save() - self.user.delete() - self.assertTrue(Channel.objects.filter(pk=self.channel.pk).exists()) diff --git a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py index 6746fa43a6..178bc25656 100644 --- a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py +++ b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py @@ -17,15 +17,19 @@ from contentcuration import models as cc from contentcuration.api import activate_channel +from contentcuration.constants import user_history from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult +from contentcuration.models import UserHistory from contentcuration.tests.base import BaseAPITestCase from contentcuration.tests.base import StudioTestCase from contentcuration.tests.testdata import tree +from contentcuration.utils.db_tools import create_user from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks from contentcuration.utils.garbage_collect import get_deleted_chefs_root @@ -192,6 +196,36 @@ def _create_expired_contentnode(creation_date=THREE_MONTHS_AGO): return c +def _create_expired_deleted_user(email="test@test.com", deletion_date=THREE_MONTHS_AGO): + user = create_user(email, "password", "test", "test") + user.delete() + + user_latest_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() + user_latest_delete_history.performed_at = deletion_date + user_latest_delete_history.save() + return user + + +class CleanUpSoftDeletedExpiredUsersTestCase(StudioTestCase): + def test_cleanup__all_expired_soft_deleted_users(self): + expired_users = [] + for i in range(0, 5): + expired_users.append(_create_expired_deleted_user(email=f"test-{i}@test.com")) + + clean_up_soft_deleted_users() + + for user in expired_users: + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is True + + def test_no_cleanup__unexpired_soft_deleted_users(self): + # TO DO + pass + + def test_no_cleanup__undeleted_users(self): + # TO DO + pass + + class CleanUpContentNodesTestCase(StudioTestCase): def test_delete_all_contentnodes_in_orphanage_tree(self): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index 206ad74865..f7444f5f2e 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -17,11 +17,13 @@ from le_utils.constants import content_kinds from contentcuration.constants import feature_flags +from contentcuration.constants import user_history from contentcuration.db.models.functions import JSONObjectKeys from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult from contentcuration.models import User +from contentcuration.models import UserHistory class DisablePostDeleteSignal(object): @@ -76,11 +78,13 @@ def clean_up_soft_deleted_users(): - all user invitations. """ account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) - users_to_delete = User.objects.filter(deleted_at__lt=account_deletion_buffer_delta) + deleted_users = User.objects.filter(deleted=True) - for user in users_to_delete: - user.hard_delete_user_related_data() - logging.info("Hard deleted user related data for user {}".format(user.email)) + for user in deleted_users: + latest_deletion_time = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() + if latest_deletion_time and latest_deletion_time.performed_at < account_deletion_buffer_delta: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}".format(user.email)) def clean_up_deleted_chefs(): From ca666f35c76178b646a81894af9ada9904a3cfe4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Oct 2022 16:09:17 +0000 Subject: [PATCH 046/313] Bump psycopg2-binary from 2.9.4 to 2.9.5 Bumps [psycopg2-binary](https://github.com/psycopg/psycopg2) from 2.9.4 to 2.9.5. - [Release notes](https://github.com/psycopg/psycopg2/releases) - [Changelog](https://github.com/psycopg/psycopg2/blob/master/NEWS) - [Commits](https://github.com/psycopg/psycopg2/commits) --- updated-dependencies: - dependency-name: psycopg2-binary dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index d725a40821..8d4b5eb708 100644 --- a/requirements.in +++ b/requirements.in @@ -3,7 +3,7 @@ django-cte==1.2.1 django-mptt==0.13.4 django-filter==22.1 djangorestframework==3.12.4 -psycopg2-binary==2.9.4 +psycopg2-binary==2.9.5 django-js-reverse==0.9.1 django-registration==3.3 le-utils==0.1.41 diff --git a/requirements.txt b/requirements.txt index 54648ffa08..8b478194a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -196,7 +196,7 @@ protobuf==3.18.3 # google-api-core # googleapis-common-protos # proto-plus -psycopg2-binary==2.9.4 +psycopg2-binary==2.9.5 # via -r requirements.in pyasn1==0.4.8 # via From 9ab534b2e87ca2590d9fca0847eaa03dce797d0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Oct 2022 16:58:24 +0000 Subject: [PATCH 047/313] Bump django-mptt from 0.13.4 to 0.14.0 Bumps [django-mptt](https://github.com/django-mptt/django-mptt) from 0.13.4 to 0.14.0. - [Release notes](https://github.com/django-mptt/django-mptt/releases) - [Changelog](https://github.com/django-mptt/django-mptt/blob/main/CHANGELOG.rst) - [Commits](https://github.com/django-mptt/django-mptt/compare/0.13.4...0.14) --- updated-dependencies: - dependency-name: django-mptt dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 8d4b5eb708..233e2f08ae 100644 --- a/requirements.in +++ b/requirements.in @@ -1,6 +1,6 @@ attrs==19.3.0 django-cte==1.2.1 -django-mptt==0.13.4 +django-mptt==0.14.0 django-filter==22.1 djangorestframework==3.12.4 psycopg2-binary==2.9.5 diff --git a/requirements.txt b/requirements.txt index 8b478194a8..15a1d19bfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -87,7 +87,7 @@ django-mathfilters==1.0.0 # via -r requirements.in django-model-utils==4.2.0 # via -r requirements.in -django-mptt==0.13.4 +django-mptt==0.14.0 # via -r requirements.in django-postmark==0.1.6 # via -r requirements.in From 17a7ac1ec63ab197697eb1a89155cb5c588ee837 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 31 Oct 2022 14:11:05 +0530 Subject: [PATCH 048/313] User cleanup garbage collection tests --- contentcuration/contentcuration/models.py | 1 + .../tests/utils/test_garbage_collect.py | 18 +++++++++++------- .../contentcuration/utils/garbage_collect.py | 17 ++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 07aa39f2b1..f6eb908a61 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -200,6 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) + deleted = models.BooleanField(default=False, db_index=True) _field_updates = FieldTracker(fields=[ diff --git a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py index 178bc25656..e4c9941df4 100644 --- a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py +++ b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py @@ -196,12 +196,12 @@ def _create_expired_contentnode(creation_date=THREE_MONTHS_AGO): return c -def _create_expired_deleted_user(email="test@test.com", deletion_date=THREE_MONTHS_AGO): +def _create_deleted_user_in_past(deletion_datetime, email="test@test.com"): user = create_user(email, "password", "test", "test") user.delete() user_latest_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() - user_latest_delete_history.performed_at = deletion_date + user_latest_delete_history.performed_at = deletion_datetime user_latest_delete_history.save() return user @@ -210,7 +210,7 @@ class CleanUpSoftDeletedExpiredUsersTestCase(StudioTestCase): def test_cleanup__all_expired_soft_deleted_users(self): expired_users = [] for i in range(0, 5): - expired_users.append(_create_expired_deleted_user(email=f"test-{i}@test.com")) + expired_users.append(_create_deleted_user_in_past(deletion_datetime=THREE_MONTHS_AGO, email=f"test-{i}@test.com")) clean_up_soft_deleted_users() @@ -218,12 +218,16 @@ def test_cleanup__all_expired_soft_deleted_users(self): assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is True def test_no_cleanup__unexpired_soft_deleted_users(self): - # TO DO - pass + two_months_ago = datetime.now() - timedelta(days=63) + user = _create_deleted_user_in_past(deletion_datetime=two_months_ago) + clean_up_soft_deleted_users() + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False def test_no_cleanup__undeleted_users(self): - # TO DO - pass + user = create_user("test@test.com", "password", "test", "test") + clean_up_soft_deleted_users() + assert user.deleted is False + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False class CleanUpContentNodesTestCase(StudioTestCase): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index f7444f5f2e..44e09e4dab 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -8,9 +8,11 @@ from celery import states from django.conf import settings from django.core.files.storage import default_storage +from django.db.models import Subquery from django.db.models.expressions import CombinedExpression from django.db.models.expressions import Exists from django.db.models.expressions import F +from django.db.models.expressions import OuterRef from django.db.models.expressions import Value from django.db.models.signals import post_delete from django.utils.timezone import now @@ -78,13 +80,14 @@ def clean_up_soft_deleted_users(): - all user invitations. """ account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) - deleted_users = User.objects.filter(deleted=True) - - for user in deleted_users: - latest_deletion_time = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() - if latest_deletion_time and latest_deletion_time.performed_at < account_deletion_buffer_delta: - user.hard_delete_user_related_data() - logging.info("Hard deleted user related data for user {}".format(user.email)) + user_latest_deletion_time_subquery = Subquery(UserHistory.objects.filter(user_id=OuterRef( + "id"), action=user_history.DELETION).values("performed_at").order_by("-performed_at")[:1]) + users_to_delete = User.objects.annotate(latest_deletion_time=user_latest_deletion_time_subquery).filter( + deleted=True, latest_deletion_time__lt=account_deletion_buffer_delta) + + for user in users_to_delete: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}.".format(user.email)) def clean_up_deleted_chefs(): From c1fd3f299432fd36e6250aafdb0823f7a3977f80 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 31 Oct 2022 17:55:00 +0530 Subject: [PATCH 049/313] Wrap channel name in tag & hyperlink between brackets --- .../registration/channel_published_email.html | 31 +++++++++++++++++++ .../registration/channel_published_email.txt | 27 ---------------- .../contentcuration/utils/publish.py | 8 ++--- 3 files changed, 35 insertions(+), 31 deletions(-) create mode 100644 contentcuration/contentcuration/templates/registration/channel_published_email.html delete mode 100644 contentcuration/contentcuration/templates/registration/channel_published_email.txt diff --git a/contentcuration/contentcuration/templates/registration/channel_published_email.html b/contentcuration/contentcuration/templates/registration/channel_published_email.html new file mode 100644 index 0000000000..476abbc1bd --- /dev/null +++ b/contentcuration/contentcuration/templates/registration/channel_published_email.html @@ -0,0 +1,31 @@ + +{% load i18n %} + + + + + + + {% autoescape off %} +

{% blocktrans with name=user.first_name %}Hello {{ name }},{% endblocktrans %}

+ +

{% blocktrans with channel_name=channel.name %}{{ channel_name }}{% endblocktrans %} ({{ domain }}{% url 'channel' channel_id=channel.pk %}) {% translate "has finished publishing! Here is the channel token (for importing it into Kolibri):" %}

+ +

+ {% blocktrans with channel_token=token %}Token: {{ channel_token }}{% endblocktrans %} +
+ {% blocktrans with channel_id=channel.pk %}ID (for Kolibri version 0.6.0 and below): {{ channel_id }}{% endblocktrans %} +

+ +

{% blocktrans with notes=notes %}Version notes: {{ notes }}{% endblocktrans %}

+ +

+ {% translate "Thanks for using Kolibri Studio!" %} +
+ {% translate "The Learning Equality Team" %} +

+ +

{% translate "You are receiving this email because you are subscribed to this channel." %}

+ {% endautoescape %} + + diff --git a/contentcuration/contentcuration/templates/registration/channel_published_email.txt b/contentcuration/contentcuration/templates/registration/channel_published_email.txt deleted file mode 100644 index ddabab61b1..0000000000 --- a/contentcuration/contentcuration/templates/registration/channel_published_email.txt +++ /dev/null @@ -1,27 +0,0 @@ -{% load i18n %} - -{% autoescape off %} -{% blocktrans with name=user.first_name %}Hello {{ name }},{% endblocktrans %} - -{% blocktrans with channel_name=channel.name %}{{ channel_name }} has finished publishing!{% endblocktrans %} - -{% translate "Link to the channel: " %}{{ domain }}{% url 'channel' channel_id=channel.pk %} - -{% translate "Here is the channel token (for importing it into Kolibri):" %} - -{% blocktrans with channel_token=token %}Token: {{ channel_token }}{% endblocktrans %} - -{% blocktrans with channel_id=channel.pk %}ID (for Kolibri version 0.6.0 and below): {{ channel_id }}{% endblocktrans %} - -{% blocktrans with notes=notes %}Version notes: {{ notes }}{% endblocktrans %} - - -{% translate "Thanks for using Kolibri Studio!" %} - -{% translate "The Learning Equality Team" %} - - - -{% translate "You are receiving this email because you are subscribed to this channel." %} - -{% endautoescape %} diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 0455083597..b3bbad1f37 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -99,15 +99,15 @@ def send_emails(channel, user_id, version_notes=''): if user_id: user = ccmodels.User.objects.get(pk=user_id) - message = render_to_string('registration/channel_published_email.txt', + message = render_to_string('registration/channel_published_email.html', {'channel': channel, 'user': user, 'token': token, 'notes': version_notes, 'domain': domain}) - user.email_user(subject, message, settings.DEFAULT_FROM_EMAIL, ) + user.email_user(subject, message, settings.DEFAULT_FROM_EMAIL, html_message=message) else: # Email all users about updates to channel for user in itertools.chain(channel.editors.all(), channel.viewers.all()): - message = render_to_string('registration/channel_published_email.txt', + message = render_to_string('registration/channel_published_email.html', {'channel': channel, 'user': user, 'token': token, 'notes': version_notes, 'domain': domain}) - user.email_user(subject, message, settings.DEFAULT_FROM_EMAIL, ) + user.email_user(subject, message, settings.DEFAULT_FROM_EMAIL, html_message=message) def create_content_database(channel, force, user_id, force_exercises, progress_tracker=None): From a974c514cdde4d2a427aba8033825091985a3fcb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Oct 2022 16:07:15 +0000 Subject: [PATCH 050/313] Bump pillow from 9.2.0 to 9.3.0 Bumps [pillow](https://github.com/python-pillow/Pillow) from 9.2.0 to 9.3.0. - [Release notes](https://github.com/python-pillow/Pillow/releases) - [Changelog](https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst) - [Commits](https://github.com/python-pillow/Pillow/compare/9.2.0...9.3.0) --- updated-dependencies: - dependency-name: pillow dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 8d4b5eb708..15d1491831 100644 --- a/requirements.in +++ b/requirements.in @@ -36,7 +36,7 @@ future sentry-sdk django-bulk-update html5lib==1.1 -pillow==9.2.0 +pillow==9.3.0 python-dateutil>=2.8.1 jsonschema>=3.2.0 importlib-metadata==1.7.0 diff --git a/requirements.txt b/requirements.txt index 8b478194a8..484592d630 100644 --- a/requirements.txt +++ b/requirements.txt @@ -179,7 +179,7 @@ packaging==20.9 # redis pathlib==1.0.1 # via -r requirements.in -pillow==9.2.0 +pillow==9.3.0 # via -r requirements.in progressbar2==3.55.0 # via -r requirements.in From 8d9d5036d113479e39a5388a7f56b40bff9a4088 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Wed, 2 Nov 2022 23:52:16 +0530 Subject: [PATCH 051/313] update docstring --- contentcuration/contentcuration/views/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contentcuration/contentcuration/views/base.py b/contentcuration/contentcuration/views/base.py index d415778aab..9a7339481c 100644 --- a/contentcuration/contentcuration/views/base.py +++ b/contentcuration/contentcuration/views/base.py @@ -346,6 +346,12 @@ def activate_channel_endpoint(request): # flake8: noqa: C901 def set_language(request): """ + We are using set_language from official Django set_language redirect view + https://docs.djangoproject.com/en/3.2/_modules/django/views/i18n/#set_language + and slighty modifying it according to our use case as we donot use AJAX, hence we donot + redirect rather just respond the required URL! + + Since this view changes how the user will see the rest of the site, it must only be accessed as a POST request. If called as a GET request, it will error. From a13cd878af3349b74e45150e822a1d3414db7a83 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Tue, 8 Nov 2022 14:28:15 +0530 Subject: [PATCH 052/313] Fixes content_id bug whenever underlying content is changed :tada: --- contentcuration/contentcuration/models.py | 39 +++++++++++++++++++ .../contentcuration/viewsets/file.py | 13 +++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 2b9aea4224..c8190e5601 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1777,6 +1777,14 @@ def mark_complete(self): # noqa C901 self.complete = not errors return errors + def make_content_id_unique(self): + """ + If there exists a contentnode with the same content_id as the self, + then this updates content_id to a random uuid4. + """ + if ContentNode.objects.exclude(pk=self.pk).filter(content_id=self.content_id).exists(): + ContentNode.objects.filter(pk=self.pk).update(content_id=uuid.uuid4().hex) + def on_create(self): self.changed = True self.recalculate_editors_storage() @@ -2051,6 +2059,28 @@ def filter_view_queryset(cls, queryset, user): return queryset.filter(Q(view=True) | Q(edit=True) | Q(public=True)) + def on_create(self): + """ + When an exercise is added to a contentnode, update its content_id + if it's a copied contentnode. + """ + self.contentnode.make_content_id_unique() + + def on_update(self): + """ + When an exercise is updated of a contentnode, update its content_id + if it's a copied contentnode. + """ + self.contentnode.make_content_id_unique() + + def delete(self, *args, **kwargs): + """ + When an exercise is deleted from a contentnode, update its content_id + if it's a copied contentnode. + """ + self.contentnode.make_content_id_unique() + return super(AssessmentItem, self).delete(*args, **kwargs) + class SlideshowSlide(models.Model): contentnode = models.ForeignKey('ContentNode', related_name="slideshow_slides", blank=True, null=True, @@ -2168,10 +2198,19 @@ def filename(self): return os.path.basename(self.file_on_disk.name) + def update_contentnode_content_id(self): + """ + If the file is attached to a contentnode then update that contentnode's content_id + if it's a copied contentnode. + """ + if self.contentnode: + self.contentnode.make_content_id_unique() + def on_update(self): # since modified was added later as a nullable field to File, we don't use a default but # instead we'll just make sure it's always updated through our serializers self.modified = timezone.now() + self.update_contentnode_content_id() def save(self, set_by_file_on_disk=True, *args, **kwargs): """ diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index 860645a146..89bab016a9 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -114,18 +114,23 @@ class FileViewSet(BulkDeleteMixin, BulkUpdateMixin, ReadOnlyValuesViewset): def delete_from_changes(self, changes): try: - # reset channel resource size cache + # Reset channel resource size cache. keys = [change["key"] for change in changes] - queryset = self.filter_queryset_from_keys( + files_qs = self.filter_queryset_from_keys( self.get_edit_queryset(), keys ).order_by() - # find all root nodes for files, and reset the cache modified date + # Find all root nodes for files, and reset the cache modified date. root_nodes = ContentNode.objects.filter( parent__isnull=True, - tree_id__in=queryset.values_list('contentnode__tree_id', flat=True).distinct(), + tree_id__in=files_qs.values_list('contentnode__tree_id', flat=True).distinct(), ) for root_node in root_nodes: ResourceSizeCache(root_node).reset_modified(None) + + # Update file's contentnode content_id. + for file in files_qs: + file.update_contentnode_content_id() + except Exception as e: report_exception(e) From 98a50cac6cbe270611b40e2177f15d4da29fedda Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Nov 2022 12:58:00 +0000 Subject: [PATCH 053/313] Bump loader-utils from 1.4.0 to 1.4.1 Bumps [loader-utils](https://github.com/webpack/loader-utils) from 1.4.0 to 1.4.1. - [Release notes](https://github.com/webpack/loader-utils/releases) - [Changelog](https://github.com/webpack/loader-utils/blob/v1.4.1/CHANGELOG.md) - [Commits](https://github.com/webpack/loader-utils/compare/v1.4.0...v1.4.1) --- updated-dependencies: - dependency-name: loader-utils dependency-type: indirect ... Signed-off-by: dependabot[bot] --- yarn.lock | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/yarn.lock b/yarn.lock index 13ae47f92c..a99d51e19f 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8211,9 +8211,9 @@ loader-runner@^4.2.0: integrity sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg== loader-utils@^1.0.2, loader-utils@^1.1.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.0.tgz#c579b5e34cb34b1a74edc6c1fb36bfa371d5a613" - integrity sha512-qH0WSMBtn/oHuwjy/NucEgbx5dbxxnxup9s4PVXJUDHZBQY+s0NWA9rJf53RBnQZxfch7euUui7hpoAPvALZdA== + version "1.4.1" + resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.1.tgz#278ad7006660bccc4d2c0c1578e17c5c78d5c0e0" + integrity sha512-1Qo97Y2oKaU+Ro2xnDMR26g1BwMT29jNbem1EvcujW2jqt+j5COXyscjM7bLQkM9HaxI7pkWeW7gnI072yMI9Q== dependencies: big.js "^5.2.2" emojis-list "^3.0.0" @@ -8644,11 +8644,16 @@ minimist-options@4.1.0: is-plain-obj "^1.1.0" kind-of "^6.0.3" -minimist@^1.1.1, minimist@^1.2.0, minimist@^1.2.5, minimist@^1.2.6: +minimist@^1.1.1, minimist@^1.2.5, minimist@^1.2.6: version "1.2.6" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.6.tgz#8637a5b759ea0d6e98702cfb3a9283323c93af44" integrity sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q== +minimist@^1.2.0: + version "1.2.7" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.7.tgz#daa1c4d91f507390437c6a8bc01078e7000c4d18" + integrity sha512-bzfL1YUZsP41gmu/qjrEk0Q6i2ix/cVeAhbCbqH9u3zYutS1cLg00qhrD0M2MVdCcx4Sc0UpP2eBWo9rotpq6g== + minipass-collect@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/minipass-collect/-/minipass-collect-1.0.2.tgz#22b813bf745dc6edba2576b940022ad6edc8c617" From eb2c8aad36197146d476e9de51b968105a5ac7e0 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 11 Nov 2022 07:38:12 -0800 Subject: [PATCH 054/313] Remove logging config moved to global settings --- contentcuration/contentcuration/not_production_settings.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/contentcuration/contentcuration/not_production_settings.py b/contentcuration/contentcuration/not_production_settings.py index 44c5da8bc5..e98410433d 100644 --- a/contentcuration/contentcuration/not_production_settings.py +++ b/contentcuration/contentcuration/not_production_settings.py @@ -1,5 +1,3 @@ -import logging - from .settings import * # noqa ALLOWED_HOSTS = ["studio.local", "192.168.31.9", "127.0.0.1", "*"] @@ -10,7 +8,6 @@ POSTMARK_TEST_MODE = True SITE_ID = 2 -logging.basicConfig(level="INFO") # Allow the debug() context processor to add variables to template context. # Include here the IPs from which a local dev server might be accessed. See From 46695996fd6daeb3be0eff1c8790b4aabf9bca9a Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 11 Nov 2022 07:52:06 -0800 Subject: [PATCH 055/313] Cleanup default details and enhance list format validation --- contentcuration/contentcuration/models.py | 14 +++++++------- .../contentcuration/tests/test_contentnodes.py | 13 +++++++++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 8c772e46af..7fe911f14f 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1490,14 +1490,14 @@ def get_details(self, channel_id=None): "resource_size": 0, "includes": {"coach_content": 0, "exercises": 0}, "kind_count": [], - "languages": "", - "accessible_languages": "", - "licenses": "", + "languages": [], + "accessible_languages": [], + "licenses": [], "tags": [], - "copyright_holders": "", - "authors": "", - "aggregators": "", - "providers": "", + "copyright_holders": [], + "authors": [], + "aggregators": [], + "providers": [], "sample_pathway": [], "original_channels": [], "sample_nodes": [], diff --git a/contentcuration/contentcuration/tests/test_contentnodes.py b/contentcuration/contentcuration/tests/test_contentnodes.py index 25569f8988..dd12dcba45 100644 --- a/contentcuration/contentcuration/tests/test_contentnodes.py +++ b/contentcuration/contentcuration/tests/test_contentnodes.py @@ -184,10 +184,15 @@ def test_get_node_details(self): assert details["resource_count"] > 0 assert details["resource_size"] > 0 assert len(details["kind_count"]) > 0 - assert len(details["authors"]) == len([author for author in details["authors"] if author]) - assert len(details["aggregators"]) == len([aggregator for aggregator in details["aggregators"] if aggregator]) - assert len(details["providers"]) == len([provider for provider in details["providers"] if provider]) - assert len(details["copyright_holders"]) == len([holder for holder in details["copyright_holders"] if holder]) + + # assert format of list fields, including that they do not contain invalid data + list_fields = [ + "kind_count", "languages", "accessible_languages", "licenses", "tags", "original_channels", + "authors", "aggregators", "providers", "copyright_holders" + ] + for field in list_fields: + self.assertIsInstance(details.get(field), list, f"Field '{field}' isn't a list") + self.assertEqual(len(details[field]), len([value for value in details[field] if value]), f"List field '{field}' has falsy values") class NodeOperationsTestCase(StudioTestCase): From 3d52560c506c3a2bc3d26721cee89cc3b883587d Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 16 Nov 2022 01:21:09 +0530 Subject: [PATCH 056/313] New content_id update mechanism - update content_id only of copied nodes - update content_id of nodes on channel syncing - fix sync of assessment items bug wherein assessment items were getting synced based on tags attribute --- contentcuration/contentcuration/models.py | 14 ++++++++------ contentcuration/contentcuration/utils/sync.py | 11 ++++++++++- .../contentcuration/viewsets/channel.py | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 7944c0513f..67fc6730b4 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1780,10 +1780,12 @@ def mark_complete(self): # noqa C901 def make_content_id_unique(self): """ - If there exists a contentnode with the same content_id as the self, - then this updates content_id to a random uuid4. + If self is NOT an original contentnode (in other words, a copied contentnode) + and a contentnode with same content_id exists then we update self's content_id. """ - if ContentNode.objects.exclude(pk=self.pk).filter(content_id=self.content_id).exists(): + is_node_original = self.original_source_node_id is None or self.original_source_node_id == self.node_id + does_same_content_exists = ContentNode.objects.exclude(pk=self.pk).filter(content_id=self.content_id).exists() + if (not is_node_original) and does_same_content_exists: ContentNode.objects.filter(pk=self.pk).update(content_id=uuid.uuid4().hex) def on_create(self): @@ -2201,10 +2203,10 @@ def filename(self): def update_contentnode_content_id(self): """ - If the file is attached to a contentnode then update that contentnode's content_id - if it's a copied contentnode. + If the file is attached to a contentnode and is not a thumbnail + then update that contentnode's content_id if it's a copied contentnode. """ - if self.contentnode: + if self.contentnode and self.preset.thumbnail is False: self.contentnode.make_content_id_unique() def on_update(self): diff --git a/contentcuration/contentcuration/utils/sync.py b/contentcuration/contentcuration/utils/sync.py index c42be1d99d..47c334a9dc 100644 --- a/contentcuration/contentcuration/utils/sync.py +++ b/contentcuration/contentcuration/utils/sync.py @@ -106,7 +106,7 @@ def sync_node_tags(node, original): node.changed = True -def sync_node_files(node, original): +def sync_node_files(node, original): # noqa C901 """ Sync all files in ``node`` from the files in ``original`` node. """ @@ -118,6 +118,11 @@ def sync_node_files(node, original): else: file_key = file.preset_id node_files[file_key] = file + # If node has any non-thumbnail file then it means the node + # is an uploaded file. So, we equalize the content_id with the original. + if file.preset.thumbnail is False: + node.content_id = original.content_id + node.changed = True source_files = {} @@ -218,3 +223,7 @@ def sync_node_assessment_items(node, original): # noqa C901 if files_to_create: File.objects.bulk_create(files_to_create) node.changed = True + # Now, node and its original have same content so + # let us equalize its content_id. + node.content_id = original.content_id + node.changed = True diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py index c028405130..8a11dea614 100644 --- a/contentcuration/contentcuration/viewsets/channel.py +++ b/contentcuration/contentcuration/viewsets/channel.py @@ -545,7 +545,7 @@ def sync(self, pk, attributes=False, tags=False, files=False, assessment_items=F attributes, tags, files, - tags, + assessment_items, progress_tracker=progress_tracker, ) From f57811736f63ac5030a52b834d91402f57c2528d Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 16 Nov 2022 15:04:30 +0530 Subject: [PATCH 057/313] Fixes content_id behaviour on syncing --- contentcuration/contentcuration/utils/sync.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/contentcuration/contentcuration/utils/sync.py b/contentcuration/contentcuration/utils/sync.py index 47c334a9dc..02f44ad3d1 100644 --- a/contentcuration/contentcuration/utils/sync.py +++ b/contentcuration/contentcuration/utils/sync.py @@ -106,11 +106,12 @@ def sync_node_tags(node, original): node.changed = True -def sync_node_files(node, original): # noqa C901 +def sync_node_files(node, original): # noqa C901 """ Sync all files in ``node`` from the files in ``original`` node. """ node_files = {} + is_node_uploaded_file = False for file in node.files.all(): if file.preset_id == format_presets.VIDEO_SUBTITLE: @@ -119,10 +120,9 @@ def sync_node_files(node, original): # noqa C901 file_key = file.preset_id node_files[file_key] = file # If node has any non-thumbnail file then it means the node - # is an uploaded file. So, we equalize the content_id with the original. + # is an uploaded file. if file.preset.thumbnail is False: - node.content_id = original.content_id - node.changed = True + is_node_uploaded_file = True source_files = {} @@ -132,6 +132,10 @@ def sync_node_files(node, original): # noqa C901 else: file_key = file.preset_id source_files[file_key] = file + # If node has any non-thumbnail file then it means the node + # is an uploaded file. + if file.preset.thumbnail is False: + is_node_uploaded_file = True files_to_delete = [] files_to_create = [] @@ -153,6 +157,9 @@ def sync_node_files(node, original): # noqa C901 if files_to_create: File.objects.bulk_create(files_to_create) + if node.changed and is_node_uploaded_file: + node.content_id = original.content_id + assessment_item_fields = ( "type", @@ -223,7 +230,8 @@ def sync_node_assessment_items(node, original): # noqa C901 if files_to_create: File.objects.bulk_create(files_to_create) node.changed = True + # Now, node and its original have same content so # let us equalize its content_id. - node.content_id = original.content_id - node.changed = True + if node.changed: + node.content_id = original.content_id From 4c9bcd423c533c96e5599f135aa98638c2671b1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 17 Nov 2022 12:56:14 +0000 Subject: [PATCH 058/313] Bump loader-utils from 1.4.1 to 1.4.2 Bumps [loader-utils](https://github.com/webpack/loader-utils) from 1.4.1 to 1.4.2. - [Release notes](https://github.com/webpack/loader-utils/releases) - [Changelog](https://github.com/webpack/loader-utils/blob/v1.4.2/CHANGELOG.md) - [Commits](https://github.com/webpack/loader-utils/compare/v1.4.1...v1.4.2) --- updated-dependencies: - dependency-name: loader-utils dependency-type: indirect ... Signed-off-by: dependabot[bot] --- yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yarn.lock b/yarn.lock index f42a49c523..0c074104d7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8201,9 +8201,9 @@ loader-runner@^4.2.0: integrity sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg== loader-utils@^1.0.2, loader-utils@^1.1.0: - version "1.4.1" - resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.1.tgz#278ad7006660bccc4d2c0c1578e17c5c78d5c0e0" - integrity sha512-1Qo97Y2oKaU+Ro2xnDMR26g1BwMT29jNbem1EvcujW2jqt+j5COXyscjM7bLQkM9HaxI7pkWeW7gnI072yMI9Q== + version "1.4.2" + resolved "https://registry.yarnpkg.com/loader-utils/-/loader-utils-1.4.2.tgz#29a957f3a63973883eb684f10ffd3d151fec01a3" + integrity sha512-I5d00Pd/jwMD2QCduo657+YM/6L3KZu++pmX9VFncxaxvHcru9jx1lBaFft+r4Mt2jK0Yhp41XlRAihzPxHNCg== dependencies: big.js "^5.2.2" emojis-list "^3.0.0" From 211ec2a413e6ae1cbc5b734f7d069405f2b1948f Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sat, 19 Nov 2022 17:38:19 +0530 Subject: [PATCH 059/313] Content ID testcases for exercises and files --- .../tests/viewsets/test_assessmentitem.py | 158 +++++++++++++++++- .../tests/viewsets/test_file.py | 142 ++++++++++++++++ 2 files changed, 297 insertions(+), 3 deletions(-) diff --git a/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py b/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py index e6370e7a49..e1ffa0f9d3 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py +++ b/contentcuration/contentcuration/tests/viewsets/test_assessmentitem.py @@ -262,12 +262,12 @@ def test_attempt_update_missing_assessmentitem(self): response = self.sync_changes( [ generate_update_event([ - self.channel.main_tree.get_descendants() + self.channel.main_tree.get_descendants() .filter(kind_id=content_kinds.EXERCISE) .first() .id, - uuid.uuid4().hex - ], + uuid.uuid4().hex + ], ASSESSMENTITEM, {"question": "but why is it missing in the first place?"}, channel_id=self.channel.id, @@ -736,3 +736,155 @@ def test_delete_assessmentitem(self): self.fail("AssessmentItem was not deleted") except models.AssessmentItem.DoesNotExist: pass + + +class ContentIDTestCase(SyncTestMixin, StudioAPITestCase): + def setUp(self): + super(ContentIDTestCase, self).setUp() + self.channel = testdata.channel() + self.user = testdata.user() + self.channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def _get_assessmentitem_metadata(self, assessment_id=None, contentnode_id=None): + return { + "assessment_id": assessment_id or uuid.uuid4().hex, + "contentnode_id": contentnode_id or self.channel.main_tree.get_descendants() + .filter(kind_id=content_kinds.EXERCISE) + .first() + .id, + } + + def _create_assessmentitem(self, assessmentitem): + self.sync_changes( + [ + generate_create_event( + [assessmentitem["contentnode_id"], assessmentitem["assessment_id"]], + ASSESSMENTITEM, + assessmentitem, + channel_id=self.channel.id, + ) + ], + ) + + def _update_assessmentitem(self, assessmentitem, update_dict): + self.sync_changes( + [ + generate_update_event( + [assessmentitem["contentnode_id"], assessmentitem["assessment_id"]], + ASSESSMENTITEM, + update_dict, + channel_id=self.channel.id, + ) + ], + ) + + def _delete_assessmentitem(self, assessmentitem): + self.sync_changes( + [ + generate_delete_event( + [assessmentitem["contentnode_id"], assessmentitem["assessment_id"]], + ASSESSMENTITEM, + channel_id=self.channel.id, + ) + ], + ) + + def test_content_id__same_on_copy(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Assert after copying content_id is same. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + def test_content_id__changes_on_new_assessmentitem(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Create a new assessmentitem. + self._create_assessmentitem(self._get_assessmentitem_metadata(contentnode_id=assessmentitem_node_copy.id)) + + # Assert after creating a new assessmentitem on copied node, it's content_id should change. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertNotEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + def test_content_id__changes_on_deleting_assessmentitem(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Delete an already present assessmentitem from copied contentnode. + assessmentitem_from_db = models.AssessmentItem.objects.filter(contentnode=assessmentitem_node_copy.id).first() + self._delete_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id)) + + # Assert after deleting assessmentitem on copied node, it's content_id should change. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertNotEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + def test_content_id__changes_on_updating_assessmentitem(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Update an already present assessmentitem from copied contentnode. + assessmentitem_from_db = models.AssessmentItem.objects.filter(contentnode=assessmentitem_node_copy.id).first() + self._update_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id), + {"question": "New Question!"}) + + # Assert after updating assessmentitem on copied node, it's content_id should change. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertNotEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + def test_content_id__doesnot_changes_of_original_node(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node.copy_to(target=self.channel.main_tree) + + content_id_before_updates = assessmentitem_node.content_id + + # Create, update and delete assessmentitems from original contentnode. + assessmentitem_from_db = models.AssessmentItem.objects.filter(contentnode=assessmentitem_node.id).first() + self._update_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node.id), + {"question": "New Question!"}) + self._delete_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node.id)) + self._create_assessmentitem(self._get_assessmentitem_metadata(contentnode_id=assessmentitem_node.id)) + + # Assert content_id before and after updates remain same. + assessmentitem_node.refresh_from_db() + content_id_after_updates = assessmentitem_node.content_id + self.assertEqual(content_id_before_updates, content_id_after_updates) + + def test_content_id__doesnot_changes_if_already_unique(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Create, update and delete assessmentitems of copied contentnode. + assessmentitem_from_db = models.AssessmentItem.objects.filter(contentnode=assessmentitem_node_copy.id).first() + self._update_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id), + {"question": "New Question!"}) + self._delete_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id)) + self._create_assessmentitem(self._get_assessmentitem_metadata(contentnode_id=assessmentitem_node_copy.id)) + + assessmentitem_node_copy.refresh_from_db() + content_id_after_first_update = assessmentitem_node_copy.content_id + + # Once again, let us create, update and delete assessmentitems of copied contentnode. + assessmentitem_from_db = models.AssessmentItem.objects.filter(contentnode=assessmentitem_node_copy.id).first() + self._update_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id), + {"question": "New Question!"}) + self._delete_assessmentitem(self._get_assessmentitem_metadata(assessmentitem_from_db.assessment_id, assessmentitem_node_copy.id)) + self._create_assessmentitem(self._get_assessmentitem_metadata(contentnode_id=assessmentitem_node_copy.id)) + + assessmentitem_node_copy.refresh_from_db() + content_id_after_second_update = assessmentitem_node_copy.content_id + + # Assert after first and second updates of assessmentitem content_id remains same. + self.assertEqual(content_id_after_first_update, content_id_after_second_update) diff --git a/contentcuration/contentcuration/tests/viewsets/test_file.py b/contentcuration/contentcuration/tests/viewsets/test_file.py index 0fa5f146a6..23aa17adc2 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_file.py +++ b/contentcuration/contentcuration/tests/viewsets/test_file.py @@ -443,3 +443,145 @@ def test_upload_url(self): self.assertEqual(response.status_code, 200) file = models.File.objects.get(checksum=self.file["checksum"]) self.assertEqual(10, file.duration) + + def test_upload_url_doesnot_sets_contentnode(self): + self.client.force_authenticate(user=self.user) + response = self.client.post(reverse("file-upload-url"), self.file, format="json",) + file = models.File.objects.get(checksum=self.file["checksum"]) + self.assertEqual(response.status_code, 200) + self.assertEqual(file.contentnode, None) + + +class ContentIDTestCase(SyncTestMixin, StudioAPITestCase): + def setUp(self): + super(ContentIDTestCase, self).setUp() + self.channel = testdata.channel() + self.user = testdata.user() + self.channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def _get_file_metadata(self): + return { + "size": 2500, + "checksum": uuid.uuid4().hex, + "name": "le_studio_file", + "file_format": file_formats.MP3, + "preset": format_presets.AUDIO, + } + + def _upload_file_to_contentnode(self, file_metadata=None, contentnode_id=None): + """ + This method mimics the frontend file upload process which is a two-step + process for the backend. + First, file's upload URL is fetched and then that file's ORM object is updated + to point to the contentnode. + """ + file = file_metadata or self._get_file_metadata() + self.client.post(reverse("file-upload-url"), file, format="json",) + file_from_db = models.File.objects.get(checksum=file["checksum"]) + self.sync_changes( + [generate_update_event( + file_from_db.id, + FILE, + { + "contentnode": contentnode_id or self.channel.main_tree.get_descendants().first().id + }, + channel_id=self.channel.id)],) + file_from_db.refresh_from_db() + return file_from_db + + def _delete_file_from_contentnode(self, file_from_db): + self.sync_changes( + [ + generate_delete_event(file_from_db.id, FILE, channel_id=self.channel.id), + ], + ) + + def test_content_id__same_on_copy_file_node(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + # Assert content_id same after copying. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + self.assertEqual(file.contentnode.content_id, file_contentnode_copy.content_id) + + def test_content_id__changes_on_upload_file_to_node(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + # Upload a new file to the copied contentnode. + self._upload_file_to_contentnode(contentnode_id=file_contentnode_copy.id) + + # Assert after new file upload, content_id changes. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + self.assertNotEqual(file.contentnode.content_id, file_contentnode_copy.content_id) + + def test_content_id__changes_on_delete_file_from_node(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + # Delete file from the copied contentnode. + self._delete_file_from_contentnode(file_from_db=file_contentnode_copy.files.first()) + + # Assert after deleting file, content_id changes. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + self.assertNotEqual(file.contentnode.content_id, file_contentnode_copy.content_id) + + def test_content_id__doesnot_changes_on_update_original_file_node(self): + file = self._upload_file_to_contentnode() + file.contentnode.copy_to(target=self.channel.main_tree) + + # Upload and delete file from the original contentnode. + content_id_before_updates = file.contentnode.content_id + self._upload_file_to_contentnode(contentnode_id=file.contentnode.id) + self._delete_file_from_contentnode(file_from_db=file) + + # Assert after changes to original contentnode, content_id remains same. + file.contentnode.refresh_from_db() + content_id_after_updates = file.contentnode.content_id + self.assertEqual(content_id_before_updates, content_id_after_updates) + + def test_content_id__doesnot_update_if_unique(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + # Upload a new file to the copied contentnode. + self._upload_file_to_contentnode(contentnode_id=file_contentnode_copy.id) + file_contentnode_copy.refresh_from_db() + content_id_after_first_update = file_contentnode_copy.content_id + + # Upload another new file to the copied contentnode. At this point, + # the content_id of copied node is already unique so it should not be updated. + self._upload_file_to_contentnode(contentnode_id=file_contentnode_copy.id) + file_contentnode_copy.refresh_from_db() + content_id_after_second_update = file_contentnode_copy.content_id + + self.assertEqual(content_id_after_first_update, content_id_after_second_update) + + def test_content_id__thumbnails_dont_update_content_id(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + thumbnail_file_meta_1 = self._get_file_metadata() + thumbnail_file_meta_2 = self._get_file_metadata() + thumbnail_file_meta_1.update({"preset": format_presets.AUDIO_THUMBNAIL, "file_format": file_formats.JPEG, }) + thumbnail_file_meta_2.update({"preset": format_presets.AUDIO_THUMBNAIL, "file_format": file_formats.JPEG, }) + + # Upload thumbnail to original contentnode and copied contentnode. + # content_id should remain same for both these nodes. + original_node_content_id_before_upload = file.contentnode.content_id + copied_node_content_id_before_upload = file_contentnode_copy.content_id + self._upload_file_to_contentnode(file_metadata=thumbnail_file_meta_1, contentnode_id=file.contentnode.id) + self._upload_file_to_contentnode(file_metadata=thumbnail_file_meta_2, contentnode_id=file_contentnode_copy.id) + + # Assert content_id is same after uploading thumbnails to nodes. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + original_node_content_id_after_upload = file.contentnode.content_id + copied_node_content_id_after_upload = file_contentnode_copy.content_id + + self.assertEqual(original_node_content_id_before_upload, original_node_content_id_after_upload) + self.assertEqual(copied_node_content_id_before_upload, copied_node_content_id_after_upload) From 783b9c8e2803bf45f42b0e929d4eeb4d5171807b Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 21 Nov 2022 17:57:52 +0530 Subject: [PATCH 060/313] Sync channel and contentnode model testcases. Ready to merge with confidence! --- .../contentcuration/tests/test_models.py | 25 ++++ .../contentcuration/tests/test_sync.py | 127 ++++++++++++++++++ .../contentcuration/tests/viewsets/base.py | 13 ++ .../tests/viewsets/test_channel.py | 26 ++++ 4 files changed, 191 insertions(+) diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 3c0caa9967..69322abc92 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -439,6 +439,31 @@ def test_filter_by_pk__tree_id_updated_on_move(self): self.assertEqual(after_move_sourcenode.tree_id, testchannel.trash_tree.tree_id) self.assertEqual(tree_id_from_cache, testchannel.trash_tree.tree_id) + def test_make_content_id_unique(self): + channel_original = testdata.channel() + channel_importer = testdata.channel() + + # Import a node from a channel. + original_node = channel_original.main_tree.get_descendants().first() + copied_node = original_node.copy_to(target=channel_importer.main_tree) + + original_node.refresh_from_db() + copied_node.refresh_from_db() + + original_node_old_content_id = original_node.content_id + copied_node_old_content_id = copied_node.content_id + + original_node.make_content_id_unique() + copied_node.make_content_id_unique() + + original_node.refresh_from_db() + copied_node.refresh_from_db() + + # Assert that original node's content_id doesn't change. + self.assertEqual(original_node_old_content_id, original_node.content_id) + # Assert copied node's content_id changes. + self.assertNotEqual(copied_node_old_content_id, copied_node.content_id) + class AssessmentItemTestCase(PermissionQuerysetTestCase): @property diff --git a/contentcuration/contentcuration/tests/test_sync.py b/contentcuration/contentcuration/tests/test_sync.py index 5948ea0d06..0f2459126b 100644 --- a/contentcuration/contentcuration/tests/test_sync.py +++ b/contentcuration/contentcuration/tests/test_sync.py @@ -1,14 +1,27 @@ from __future__ import absolute_import +import uuid + +from django.urls import reverse from le_utils.constants import content_kinds +from le_utils.constants import file_formats +from le_utils.constants import format_presets from .base import StudioTestCase from .testdata import create_temp_file from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ContentTag +from contentcuration.models import File +from contentcuration.tests import testdata +from contentcuration.tests.base import StudioAPITestCase +from contentcuration.tests.viewsets.base import generate_create_event +from contentcuration.tests.viewsets.base import generate_update_event +from contentcuration.tests.viewsets.base import SyncTestMixin from contentcuration.utils.publish import mark_all_nodes_as_published from contentcuration.utils.sync import sync_channel +from contentcuration.viewsets.sync.constants import ASSESSMENTITEM +from contentcuration.viewsets.sync.constants import FILE class SyncTestCase(StudioTestCase): @@ -256,3 +269,117 @@ def test_sync_tags_add_multiple_tags(self): ) self.assertTrue(self.derivative_channel.has_changes()) + + +class ContentIDTestCase(SyncTestMixin, StudioAPITestCase): + def setUp(self): + super(ContentIDTestCase, self).setUp() + self.channel = testdata.channel() + self.user = testdata.user() + self.channel.editors.add(self.user) + self.client.force_authenticate(user=self.user) + + def _get_assessmentitem_metadata(self, assessment_id=None, contentnode_id=None): + return { + "assessment_id": assessment_id or uuid.uuid4().hex, + "contentnode_id": contentnode_id or self.channel.main_tree.get_descendants() + .filter(kind_id=content_kinds.EXERCISE) + .first() + .id, + } + + def _get_file_metadata(self): + return { + "size": 2500, + "checksum": uuid.uuid4().hex, + "name": "le_studio_file", + "file_format": file_formats.MP3, + "preset": format_presets.AUDIO, + } + + def _upload_file_to_contentnode(self, file_metadata=None, contentnode_id=None): + """ + This method mimics the frontend file upload process which is a two-step + process for the backend. + First, file's upload URL is fetched and then that file's ORM object is updated + to point to the contentnode. + """ + file = file_metadata or self._get_file_metadata() + self.client.post(reverse("file-upload-url"), file, format="json",) + file_from_db = File.objects.get(checksum=file["checksum"]) + self.sync_changes( + [generate_update_event( + file_from_db.id, + FILE, + { + "contentnode": contentnode_id or self.channel.main_tree.get_descendants().first().id + }, + channel_id=self.channel.id)],) + file_from_db.refresh_from_db() + return file_from_db + + def _create_assessmentitem(self, assessmentitem, channel_id): + self.sync_changes( + [ + generate_create_event( + [assessmentitem["contentnode_id"], assessmentitem["assessment_id"]], + ASSESSMENTITEM, + assessmentitem, + channel_id=channel_id, + ) + ], + ) + + def test_content_id__becomes_equal_on_channel_sync_assessment_item(self): + # Make a copy of an existing assessmentitem contentnode. + assessmentitem_node = self.channel.main_tree.get_descendants().filter(kind_id=content_kinds.EXERCISE).first() + assessmentitem_node_copy = assessmentitem_node.copy_to(target=self.channel.main_tree) + + # Create a new assessmentitem. + self._create_assessmentitem( + assessmentitem=self._get_assessmentitem_metadata(contentnode_id=assessmentitem_node_copy.id), + channel_id=self.channel.id + ) + + # Assert after creating a new assessmentitem on copied node, it's content_id is changed. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertNotEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + # Syncs channel. + self.channel.main_tree.refresh_from_db() + self.channel.save() + sync_channel( + self.channel, + sync_assessment_items=True, + ) + + # Now after syncing the original and copied node should have same content_id. + assessmentitem_node.refresh_from_db() + assessmentitem_node_copy.refresh_from_db() + self.assertEqual(assessmentitem_node.content_id, assessmentitem_node_copy.content_id) + + def test_content_id__becomes_equal_on_channel_sync_file(self): + file = self._upload_file_to_contentnode() + file_contentnode_copy = file.contentnode.copy_to(target=self.channel.main_tree) + + # Upload a new file to the copied contentnode. + self._upload_file_to_contentnode(contentnode_id=file_contentnode_copy.id) + + # Assert after new file upload, content_id changes. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + self.assertNotEqual(file.contentnode.content_id, file_contentnode_copy.content_id) + + # Syncs channel. + self.channel.main_tree.refresh_from_db() + self.channel.save() + sync_channel( + self.channel, + sync_files=True, + ) + + # Assert that after channel syncing, content_id becomes equal. + file.contentnode.refresh_from_db() + file_contentnode_copy.refresh_from_db() + self.assertEqual(file.contentnode.content_id, file_contentnode_copy.content_id) diff --git a/contentcuration/contentcuration/tests/viewsets/base.py b/contentcuration/contentcuration/tests/viewsets/base.py index 5eca0415fe..48ddd1c995 100644 --- a/contentcuration/contentcuration/tests/viewsets/base.py +++ b/contentcuration/contentcuration/tests/viewsets/base.py @@ -5,6 +5,9 @@ from contentcuration.celery import app from contentcuration.models import Change from contentcuration.tests.helpers import clear_tasks +from contentcuration.viewsets.sync.constants import CHANNEL +from contentcuration.viewsets.sync.constants import SYNCED +from contentcuration.viewsets.sync.utils import _generate_event as base_generate_event from contentcuration.viewsets.sync.utils import generate_copy_event as base_generate_copy_event from contentcuration.viewsets.sync.utils import generate_create_event as base_generate_create_event from contentcuration.viewsets.sync.utils import generate_delete_event as base_generate_delete_event @@ -35,6 +38,16 @@ def generate_update_event(*args, **kwargs): return event +def generate_sync_channel_event(channel_id, attributes, tags, files, assessment_items): + event = base_generate_event(key=channel_id, table=CHANNEL, event_type=SYNCED, channel_id=channel_id, user_id=None) + event["rev"] = random.randint(1, 10000000) + event["attributes"] = attributes + event["tags"] = tags + event["files"] = files + event["assessment_items"] = assessment_items + return event + + class SyncTestMixin(object): celery_task_always_eager = None diff --git a/contentcuration/contentcuration/tests/viewsets/test_channel.py b/contentcuration/contentcuration/tests/viewsets/test_channel.py index 63b0940ae4..b88f54ad98 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_channel.py +++ b/contentcuration/contentcuration/tests/viewsets/test_channel.py @@ -2,6 +2,7 @@ import uuid +import mock from django.urls import reverse from contentcuration import models @@ -9,6 +10,7 @@ from contentcuration.tests.base import StudioAPITestCase from contentcuration.tests.viewsets.base import generate_create_event from contentcuration.tests.viewsets.base import generate_delete_event +from contentcuration.tests.viewsets.base import generate_sync_channel_event from contentcuration.tests.viewsets.base import generate_update_event from contentcuration.tests.viewsets.base import SyncTestMixin from contentcuration.viewsets.sync.constants import CHANNEL @@ -273,6 +275,30 @@ def test_cannot_delete_some_channels(self): self.assertTrue(models.Channel.objects.get(id=channel1.id).deleted) self.assertFalse(models.Channel.objects.get(id=channel2.id).deleted) + @mock.patch("contentcuration.viewsets.channel.sync_channel") + def test_sync_channel_called_correctly(self, sync_channel_mock): + user = testdata.user() + channel = testdata.channel() + channel.editors.add(user) + channel_node = channel.main_tree.get_descendants().first() + channel_node.copy_to(target=channel.main_tree) + + self.client.force_authenticate(user=user) + for i in range(1, 5): + sync_channel_mock.reset_mock() + args = [channel.id, False, False, False, False] + args[i] = True + + response = self.sync_changes( + [ + generate_sync_channel_event(*args) + ] + ) + + self.assertEqual(response.status_code, 200) + self.assertEqual(sync_channel_mock.call_args.args[i], True) + sync_channel_mock.assert_called_once() + class CRUDTestCase(StudioAPITestCase): @property From 347901cedf10c7aca107d3c3a6ad9eb6e1e094e4 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 16 Sep 2022 15:57:20 -0700 Subject: [PATCH 061/313] Initialize postgres docker container with developer defined SQL --- .docker/README.md | 17 +++++++++++++++++ .gitignore | 7 +++++-- Makefile | 25 +++++++++++++++++++++---- docker-compose.yml | 3 ++- 4 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 .docker/README.md diff --git a/.docker/README.md b/.docker/README.md new file mode 100644 index 0000000000..66b76b0f2d --- /dev/null +++ b/.docker/README.md @@ -0,0 +1,17 @@ +## What is this directory? +This directory is a space for mounting directories to docker containers, allowing the mounts to be specified in committed code, but the contents of the mounts to remain ignored by git. + +### postgres +The `postgres` directory is mounted to `/docker-entrypoint-initdb.d`. Any `.sh` or `.sql` files will be executed when the container is first started with a new data volume. You may read more regarding this functionality on the [Docker Hub page](https://hub.docker.com/_/postgres), under _Initialization scripts_. + +When running docker services through the Makefile commands, it specifies a docker-compose project name that depends on the name of the current git branch. This causes the volumes to change when the branch changes, which is helpful when switching between many branches that might have incompatible database schema changes. The downside is that whenever you start a new branch, you'll have to re-initialize the database again, like with `yarn run devsetup`. Creating a SQL dump from an existing, initialized database and placing it in this directory will allow you to skip this step. + +To create a SQL dump of your preferred database data useful for local testing, run `make .docker/postgres/init.sql` while the docker postgres container is running. + +> Note: you will likely need to run `make migrate` to ensure your database schema is up-to-date when using this technique. + +#### pgpass +Stores the postgres authentication for the docker service for scripting access without manually providing a password, created by `make .docker/pgpass` + +### minio +The `minio` directory is mounted to `/data`, since it isn't necessarily useful to have this data isolated based off the current git branch. diff --git a/.gitignore b/.gitignore index b5e0261f09..695051b1ba 100644 --- a/.gitignore +++ b/.gitignore @@ -95,8 +95,11 @@ contentcuration/csvs/ # Ignore the TAGS file generated by some editors TAGS -# Ignore Vagrant-created files -/.vagrant/ +# Services +.vagrant/ +.docker/minio/* +.docker/postgres/* +.docker/pgpass # Ignore test files /contentcuration/contentcuration/proxy_settings.py diff --git a/Makefile b/Makefile index 99b55d3762..5a017618cb 100644 --- a/Makefile +++ b/Makefile @@ -138,15 +138,29 @@ devceleryworkers: run-services: $(MAKE) -j 2 dcservicesup devceleryworkers +.docker/minio: + mkdir -p $@ + +.docker/postgres: + mkdir -p $@ + +.docker/pgpass: + echo "localhost:5432:kolibri-studio:learningequality:kolibri" > .docker/pgpass + chmod 600 .docker/pgpass + +.docker/postgres/init.sql: .docker/pgpass + # assumes postgres is running in a docker container + PGPASSFILE=.docker/pgpass pg_dump --host localhost --port 5432 --username learningequality --dbname "kolibri-studio" --file $@ + dcbuild: # build all studio docker image and all dependent services using docker-compose docker-compose build -dcup: +dcup: .docker/minio .docker/postgres # run all services except for cloudprober docker-compose up studio-app celery-worker -dcup-cloudprober: +dcup-cloudprober: .docker/minio .docker/postgres # run all services including cloudprober docker-compose up @@ -163,11 +177,14 @@ dcshell: # bash shell inside the (running!) studio-app container docker-compose exec studio-app /usr/bin/fish -dctest: +dcpsql: .docker/pgpass + PGPASSFILE=.docker/pgpass psql --host localhost --port 5432 --username learningequality --dbname "kolibri-studio" + +dctest: .docker/minio .docker/postgres # run backend tests inside docker, in new instances docker-compose run studio-app make test -dcservicesup: +dcservicesup: .docker/minio .docker/postgres # launch all studio's dependent services using docker-compose docker-compose -f docker-compose.yml -f docker-compose.alt.yml up minio postgres redis diff --git a/docker-compose.yml b/docker-compose.yml index 7f21cc2844..f0b2cb7b86 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -50,7 +50,7 @@ services: MINIO_SECRET_KEY: development MINIO_API_CORS_ALLOW_ORIGIN: 'http://localhost:8080,http://127.0.0.1:8080' volumes: - - minio_data:/data + - .docker/minio:/data postgres: image: postgres:12 @@ -61,6 +61,7 @@ services: POSTGRES_DB: kolibri-studio volumes: - pgdata:/var/lib/postgresql/data/pgdata + - .docker/postgres:/docker-entrypoint-initdb.d redis: image: redis:6.0.9 From 2a0aac1c4a0e357a58ab1dbfe836d5f0cba20127 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Fri, 16 Sep 2022 16:13:40 -0700 Subject: [PATCH 062/313] Replace target name to avoid duplicating --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5a017618cb..398f68e878 100644 --- a/Makefile +++ b/Makefile @@ -145,8 +145,8 @@ run-services: mkdir -p $@ .docker/pgpass: - echo "localhost:5432:kolibri-studio:learningequality:kolibri" > .docker/pgpass - chmod 600 .docker/pgpass + echo "localhost:5432:kolibri-studio:learningequality:kolibri" > $@ + chmod 600 $@ .docker/postgres/init.sql: .docker/pgpass # assumes postgres is running in a docker container From 9d1b82f73aea30b72624e4f5a6d61ccaaf3a9e35 Mon Sep 17 00:00:00 2001 From: Blaine Jester Date: Mon, 19 Sep 2022 08:54:04 -0700 Subject: [PATCH 063/313] Update to use pgpass file since password env var doesn't work anymore --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 398f68e878..0cadc47528 100644 --- a/Makefile +++ b/Makefile @@ -126,9 +126,9 @@ hascaptions: export COMPOSE_PROJECT_NAME=studio_$(shell git rev-parse --abbrev-ref HEAD) -purge-postgres: - -PGPASSWORD=kolibri dropdb -U learningequality "kolibri-studio" --port 5432 -h localhost - PGPASSWORD=kolibri createdb -U learningequality "kolibri-studio" --port 5432 -h localhost +purge-postgres: .docker/pgpass + -PGPASSFILE=.docker/pgpass dropdb -U learningequality "kolibri-studio" --port 5432 -h localhost + PGPASSFILE=.docker/pgpass createdb -U learningequality "kolibri-studio" --port 5432 -h localhost destroy-and-recreate-database: purge-postgres setup From c09f109af86da7a4c074fc16c2a941aa77fa155c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Nov 2022 16:01:36 +0000 Subject: [PATCH 064/313] Bump jsonschema from 4.16.0 to 4.17.1 Bumps [jsonschema](https://github.com/python-jsonschema/jsonschema) from 4.16.0 to 4.17.1. - [Release notes](https://github.com/python-jsonschema/jsonschema/releases) - [Changelog](https://github.com/python-jsonschema/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/python-jsonschema/jsonschema/compare/v4.16.0...v4.17.1) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e6068899d1..2bc759f7b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -162,7 +162,7 @@ jmespath==0.10.0 # botocore jsonfield==3.1.0 # via -r requirements.in -jsonschema==4.16.0 +jsonschema==4.17.1 # via -r requirements.in kombu==5.2.4 # via celery From d79cc65103389f6c3ecf194ff5f4e59aef5efd5f Mon Sep 17 00:00:00 2001 From: AllanOXDi Date: Fri, 25 Nov 2022 19:08:24 +0300 Subject: [PATCH 065/313] disabled double submit while creating a channel --- .../frontend/shared/views/channel/ChannelModal.vue | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/frontend/shared/views/channel/ChannelModal.vue b/contentcuration/contentcuration/frontend/shared/views/channel/ChannelModal.vue index b76c6782b2..d2eb9623f6 100644 --- a/contentcuration/contentcuration/frontend/shared/views/channel/ChannelModal.vue +++ b/contentcuration/contentcuration/frontend/shared/views/channel/ChannelModal.vue @@ -72,7 +72,7 @@ v-model="contentDefaults" /> - + {{ isNew ? $tr('createButton') : $tr('saveChangesButton' ) }} @@ -153,6 +153,7 @@ showUnsavedDialog: false, diffTracker: {}, dialog: true, + isDisable: false, }; }, computed: { @@ -287,21 +288,25 @@ ...mapActions('channel', ['updateChannel', 'loadChannel', 'commitChannel']), ...mapMutations('channel', ['REMOVE_CHANNEL']), saveChannel() { + this.isDisable = true; if (this.$refs.detailsform.validate()) { this.changed = false; if (this.isNew) { return this.commitChannel({ id: this.channelId, ...this.diffTracker }).then(() => { // TODO: Make sure channel gets created before navigating to channel window.location = window.Urls.channel(this.channelId); + this.isDisable = false; }); } else { return this.updateChannel({ id: this.channelId, ...this.diffTracker }).then(() => { this.$store.dispatch('showSnackbarSimple', this.$tr('changesSaved')); this.header = this.channel.name; + this.isDisable = false; }); } } else if (this.$refs.detailsform.$el.scrollIntoView) { this.$refs.detailsform.$el.scrollIntoView({ behavior: 'smooth' }); + this.isDisable = false; } }, updateTitleForPage() { From c3d58f77efaa4ce2065c18fe98af2706f374cbcb Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sat, 26 Nov 2022 13:05:50 +0530 Subject: [PATCH 066/313] Short circuit query evaluation --- contentcuration/contentcuration/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 67fc6730b4..3ce8f0bf5c 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1784,8 +1784,8 @@ def make_content_id_unique(self): and a contentnode with same content_id exists then we update self's content_id. """ is_node_original = self.original_source_node_id is None or self.original_source_node_id == self.node_id - does_same_content_exists = ContentNode.objects.exclude(pk=self.pk).filter(content_id=self.content_id).exists() - if (not is_node_original) and does_same_content_exists: + node_same_content_id = ContentNode.objects.exclude(pk=self.pk).filter(content_id=self.content_id) + if (not is_node_original) and node_same_content_id.exists(): ContentNode.objects.filter(pk=self.pk).update(content_id=uuid.uuid4().hex) def on_create(self): From b74a5c739cce0b3ab7f6f82683fb26fc6adce057 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Nov 2022 16:04:10 +0000 Subject: [PATCH 067/313] Bump django-s3-storage from 0.13.9 to 0.13.11 Bumps [django-s3-storage](https://github.com/etianen/django-s3-storage) from 0.13.9 to 0.13.11. - [Release notes](https://github.com/etianen/django-s3-storage/releases) - [Changelog](https://github.com/etianen/django-s3-storage/blob/master/CHANGELOG.rst) - [Commits](https://github.com/etianen/django-s3-storage/compare/0.13.9...0.13.11) --- updated-dependencies: - dependency-name: django-s3-storage dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 2704539269..7f1bc54d5f 100644 --- a/requirements.in +++ b/requirements.in @@ -20,7 +20,7 @@ Django==3.2.14 django-webpack-loader==0.7.0 google-cloud-error-reporting google-cloud-storage -django-s3-storage==0.13.9 +django-s3-storage==0.13.11 requests>=2.20.0 google-cloud-core django-db-readonly==0.7.0 diff --git a/requirements.txt b/requirements.txt index e6068899d1..ee2a46cd00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -97,7 +97,7 @@ django-redis==5.2.0 # via -r requirements.in django-registration==3.3 # via -r requirements.in -django-s3-storage==0.13.9 +django-s3-storage==0.13.11 # via -r requirements.in django-webpack-loader==0.7.0 # via -r requirements.in From c849a27cc6f2b0f542af675cff647b1d3b16b83c Mon Sep 17 00:00:00 2001 From: AllanOXDi Date: Mon, 28 Nov 2022 19:40:18 +0300 Subject: [PATCH 068/313] handling the double create in the backend --- contentcuration/contentcuration/viewsets/base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/viewsets/base.py b/contentcuration/contentcuration/viewsets/base.py index f2e786ab07..e159880d67 100644 --- a/contentcuration/contentcuration/viewsets/base.py +++ b/contentcuration/contentcuration/viewsets/base.py @@ -6,6 +6,7 @@ from celery import states from django.core.exceptions import ObjectDoesNotExist from django.db.models import Q +from django.db.utils import IntegrityError from django.http import Http404 from django.http.request import HttpRequest from django_bulk_update.helper import bulk_update @@ -659,7 +660,12 @@ def create_from_changes(self, changes): def create(self, request, *args, **kwargs): serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) - self.perform_create(serializer) + + try: + self.perform_create(serializer) + + except IntegrityError as e: + return Response({"error": str(e)}, status=409) instance = serializer.instance return Response(self.serialize_object(pk=instance.pk), status=HTTP_201_CREATED) From 6129de679c27f05a3957dbf3e87595f3574c6856 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Nov 2022 16:01:28 +0000 Subject: [PATCH 069/313] Bump redis from 4.3.4 to 4.3.5 Bumps [redis](https://github.com/redis/redis-py) from 4.3.4 to 4.3.5. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.3.4...v4.3.5) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index f927214388..b02eeefecc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -54,8 +54,6 @@ click-repl==0.2.0 # via celery confusable-homoglyphs==3.2.0 # via django-registration -deprecated==1.2.13 - # via redis django==3.2.14 # via # -r requirements.in @@ -229,7 +227,7 @@ pytz==2022.1 # django # django-postmark # google-api-core -redis==4.3.4 +redis==4.3.5 # via # -r requirements.in # django-redis @@ -275,8 +273,6 @@ wcwidth==0.2.5 # via prompt-toolkit webencodings==0.5.1 # via html5lib -wrapt==1.14.1 - # via deprecated zipp==3.4.1 # via importlib-metadata From 4ffa3546863fd6edc7f51296751478b76b275309 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 29 Nov 2022 16:03:46 +0000 Subject: [PATCH 070/313] Bump django-model-utils from 4.2.0 to 4.3.1 Bumps [django-model-utils](https://github.com/jazzband/django-model-utils) from 4.2.0 to 4.3.1. - [Release notes](https://github.com/jazzband/django-model-utils/releases) - [Changelog](https://github.com/jazzband/django-model-utils/blob/master/CHANGES.rst) - [Commits](https://github.com/jazzband/django-model-utils/compare/4.2.0...4.3.1) --- updated-dependencies: - dependency-name: django-model-utils dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements.in | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.in b/requirements.in index 293cf6383b..6de2c0090c 100644 --- a/requirements.in +++ b/requirements.in @@ -29,7 +29,7 @@ django-mathfilters google-cloud-kms==1.1.0 backoff backports-abc==0.5 -django-model-utils==4.2.0 +django-model-utils==4.3.1 django-redis django-prometheus future diff --git a/requirements.txt b/requirements.txt index f927214388..b578dbc81a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -85,7 +85,7 @@ django-js-reverse==0.9.1 # via -r requirements.in django-mathfilters==1.0.0 # via -r requirements.in -django-model-utils==4.2.0 +django-model-utils==4.3.1 # via -r requirements.in django-mptt==0.14.0 # via -r requirements.in From cd004ae1146eeedbf94a1a7d50511151a8982c76 Mon Sep 17 00:00:00 2001 From: Samson Akol Date: Tue, 29 Nov 2022 21:59:54 +0300 Subject: [PATCH 071/313] Updates jsPDF dependency url --- package.json | 2 +- yarn.lock | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package.json b/package.json index 6457bfe0ed..e27b8b1e67 100644 --- a/package.json +++ b/package.json @@ -69,7 +69,7 @@ "i18n-iso-countries": "^7.5.0", "intl": "1.2.5", "jquery": "^2.2.4", - "jspdf": "https://github.com/MrRio/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e", + "jspdf": "https://github.com/parallax/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e", "kolibri-constants": "^0.1.41", "kolibri-design-system": "https://github.com/learningequality/kolibri-design-system#e9a2ff34716bb6412fe99f835ded5b17345bab94", "lodash": "^4.17.21", diff --git a/yarn.lock b/yarn.lock index 0c074104d7..b535884415 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7906,9 +7906,9 @@ jsonpointer@^5.0.0: resolved "https://registry.yarnpkg.com/jsonpointer/-/jsonpointer-5.0.0.tgz#f802669a524ec4805fa7389eadbc9921d5dc8072" integrity sha512-PNYZIdMjVIvVgDSYKTT63Y+KZ6IZvGRNNWcxwD+GNnUz1MKPfv30J8ueCjdwcN0nDx2SlshgyB7Oy0epAzVRRg== -"jspdf@https://github.com/MrRio/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e": +"jspdf@https://github.com/parallax/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e": version "2.1.1" - resolved "https://github.com/MrRio/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e" + resolved "https://github.com/parallax/jsPDF.git#b7a1d8239c596292ce86dafa77f05987bcfa2e6e" dependencies: atob "^2.1.2" btoa "^1.2.1" @@ -11112,7 +11112,7 @@ stackblur-canvas@2.2.0: stackblur-canvas@^1.4.1: version "1.4.1" resolved "https://registry.yarnpkg.com/stackblur-canvas/-/stackblur-canvas-1.4.1.tgz#849aa6f94b272ff26f6471fa4130ed1f7e47955b" - integrity sha1-hJqm+UsnL/JvZHH6QTDtH35HlVs= + integrity sha512-TfbTympL5C1K+F/RizDkMBqH18EkUKU8V+4PphIXR+fWhZwwRi3bekP04gy2TOwOT3R6rJQJXAXFrbcZde7wow== static-extend@^0.1.1: version "0.1.2" @@ -11858,7 +11858,7 @@ tr46@^2.1.0: tr46@~0.0.3: version "0.0.3" resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" - integrity sha1-gYT9NH2snNwYWZLzpmIuFLnZq2o= + integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== trim-newlines@^3.0.0: version "3.0.1" @@ -12373,7 +12373,7 @@ web-streams-polyfill@^3.2.1: webidl-conversions@^3.0.0, webidl-conversions@^3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" - integrity sha1-JFNCdeKnvGvnvIZhHMFq4KVlSHE= + integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== webidl-conversions@^4.0.2: version "4.0.2" @@ -12533,7 +12533,7 @@ whatwg-mimetype@^2.1.0, whatwg-mimetype@^2.2.0, whatwg-mimetype@^2.3.0: whatwg-url@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-2.0.1.tgz#5396b2043f020ee6f704d9c45ea8519e724de659" - integrity sha1-U5ayBD8CDub3BNnEXqhRnnJN5lk= + integrity sha512-sX+FT4N6iR0ZiqGqyDEKklyfMGR99zvxZD+LQ8IGae5uVGswQ7DOeLPB5KgJY8FzkwSzwqOXLQeVQvtOTSQU9Q== dependencies: tr46 "~0.0.3" webidl-conversions "^3.0.0" @@ -12874,7 +12874,7 @@ xhr@^2.0.1: "xml-name-validator@>= 2.0.1 < 3.0.0": version "2.0.1" resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-2.0.1.tgz#4d8b8f1eccd3419aa362061becef515e1e559635" - integrity sha1-TYuPHszTQZqjYgYb7O9RXh5VljU= + integrity sha512-jRKe/iQYMyVJpzPH+3HL97Lgu5HrCfii+qSo+TfjKHtOnvbnvdVfMYrn9Q34YV81M2e5sviJlI6Ko9y+nByzvA== xml-name-validator@^3.0.0: version "3.0.0" From 1a1cf8e6c1a39aaf8bae96f4addc2b681cd21137 Mon Sep 17 00:00:00 2001 From: Samson Akol Date: Tue, 29 Nov 2022 22:11:04 +0300 Subject: [PATCH 072/313] Removes misleading offline indicator on sign-in page --- .../frontend/accounts/pages/Main.vue | 71 +++++++++++++------ 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/contentcuration/contentcuration/frontend/accounts/pages/Main.vue b/contentcuration/contentcuration/frontend/accounts/pages/Main.vue index c15a96fca1..dc05d5204c 100644 --- a/contentcuration/contentcuration/frontend/accounts/pages/Main.vue +++ b/contentcuration/contentcuration/frontend/accounts/pages/Main.vue @@ -6,12 +6,12 @@ justify-center class="main pt-5" > -

- -

- + {{ $tr('kolibriStudio') }} - - + + - - - + + +

- +

- + {{ $tr('signInButton') }} - + {{ $tr('createAccountButton') }}

- +

@@ -90,7 +127,6 @@ import PolicyModals from 'shared/views/policies/PolicyModals'; import { policies } from 'shared/constants'; import LanguageSwitcherList from 'shared/languageSwitcher/LanguageSwitcherList'; - import OfflineText from 'shared/views/OfflineText'; export default { name: 'Main', @@ -100,7 +136,6 @@ LanguageSwitcherList, PasswordField, PolicyModals, - OfflineText, }, data() { return { @@ -191,10 +226,4 @@ content: '•'; } - .corner { - position: absolute; - top: 1em; - left: 1em; - } - From 43378a58283dea4e18a969c5e80d88dd2d60b79e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 30 Nov 2022 16:01:54 +0000 Subject: [PATCH 073/313] Bump jsonschema from 4.17.1 to 4.17.3 Bumps [jsonschema](https://github.com/python-jsonschema/jsonschema) from 4.17.1 to 4.17.3. - [Release notes](https://github.com/python-jsonschema/jsonschema/releases) - [Changelog](https://github.com/python-jsonschema/jsonschema/blob/main/CHANGELOG.rst) - [Commits](https://github.com/python-jsonschema/jsonschema/compare/v4.17.1...v4.17.3) --- updated-dependencies: - dependency-name: jsonschema dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f927214388..3cf6c8c5cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -162,7 +162,7 @@ jmespath==0.10.0 # botocore jsonfield==3.1.0 # via -r requirements.in -jsonschema==4.17.1 +jsonschema==4.17.3 # via -r requirements.in kombu==5.2.4 # via celery From 85f86306e3216b97ed9e4da8ac76e5c6df2f50ec Mon Sep 17 00:00:00 2001 From: Liana Harris <46411498+LianaHarris360@users.noreply.github.com> Date: Thu, 1 Dec 2022 08:57:38 -0600 Subject: [PATCH 074/313] Added bullet points between languages on sign-in page --- .../languageSwitcher/LanguageSwitcherList.vue | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/contentcuration/contentcuration/frontend/shared/languageSwitcher/LanguageSwitcherList.vue b/contentcuration/contentcuration/frontend/shared/languageSwitcher/LanguageSwitcherList.vue index 30cc36563b..e95f955e6e 100644 --- a/contentcuration/contentcuration/frontend/shared/languageSwitcher/LanguageSwitcherList.vue +++ b/contentcuration/contentcuration/frontend/shared/languageSwitcher/LanguageSwitcherList.vue @@ -1,7 +1,7 @@ + + + + + + diff --git a/contentcuration/contentcuration/frontend/accounts/pages/resetPassword/ResetPassword.vue b/contentcuration/contentcuration/frontend/accounts/pages/resetPassword/ResetPassword.vue index b3866f5ccd..ae43d92fbc 100644 --- a/contentcuration/contentcuration/frontend/accounts/pages/resetPassword/ResetPassword.vue +++ b/contentcuration/contentcuration/frontend/accounts/pages/resetPassword/ResetPassword.vue @@ -16,9 +16,12 @@ :label="$tr('passwordConfirmLabel')" :additionalRules="passwordConfirmRules" /> - - {{ $tr('submitButton') }} - + @@ -84,3 +87,11 @@ }; + + \ No newline at end of file From 295bde72882a5ea02062d0c528224556c4a770af Mon Sep 17 00:00:00 2001 From: Alex Velez Date: Tue, 10 Jan 2023 07:37:16 -0500 Subject: [PATCH 151/313] Bring back raised buttons for account created/deleted message --- .../accounts/pages/accountDeleted/AccountDeleted.vue | 9 ++++++++- .../accounts/pages/activateAccount/AccountCreated.vue | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/contentcuration/contentcuration/frontend/accounts/pages/accountDeleted/AccountDeleted.vue b/contentcuration/contentcuration/frontend/accounts/pages/accountDeleted/AccountDeleted.vue index 325482ddfd..032cfc506a 100644 --- a/contentcuration/contentcuration/frontend/accounts/pages/accountDeleted/AccountDeleted.vue +++ b/contentcuration/contentcuration/frontend/accounts/pages/accountDeleted/AccountDeleted.vue @@ -2,7 +2,13 @@ + > + + @@ -18,6 +24,7 @@ }, $trs: { accountDeletedTitle: 'Account successfully deleted', + backToLogin: 'Continue to sign-in page', }, }; diff --git a/contentcuration/contentcuration/frontend/accounts/pages/activateAccount/AccountCreated.vue b/contentcuration/contentcuration/frontend/accounts/pages/activateAccount/AccountCreated.vue index b8edb22ee2..14e106c232 100644 --- a/contentcuration/contentcuration/frontend/accounts/pages/activateAccount/AccountCreated.vue +++ b/contentcuration/contentcuration/frontend/accounts/pages/activateAccount/AccountCreated.vue @@ -2,7 +2,13 @@ + > + + @@ -18,6 +24,7 @@ }, $trs: { accountCreatedTitle: 'Account successfully created', + backToLogin: 'Continue to sign-in page', }, }; From fafba94dfbbd74d47d1cd92df99d44e643ff9a09 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Mon, 20 Mar 2023 17:48:36 -0700 Subject: [PATCH 152/313] Migrate channel databases before import to guarantee compatible schema. --- .../management/commands/export_channels_to_kolibri_public.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py index 7f64f11b88..f816c2cfc9 100644 --- a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py +++ b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py @@ -5,7 +5,9 @@ from django.conf import settings from django.core.files.storage import default_storage as storage +from django.core.management import call_command from django.core.management.base import BaseCommand +from kolibri_content.apps import KolibriContentConfig from kolibri_content.models import ChannelMetadata as ExportedChannelMetadata from kolibri_content.router import using_content_database from kolibri_public.models import ChannelMetadata @@ -26,6 +28,8 @@ def _export_channel(self, channel_id): shutil.copyfileobj(storage_file, db_file) db_file.seek(0) with using_content_database(db_file.name): + # Run migration to handle old content databases published prior to current fields being added. + call_command("migrate", app_label=KolibriContentConfig.label, database=db_file.name) channel = ExportedChannelMetadata.objects.get(id=channel_id) logger.info("Found channel {} for id: {} mapping now".format(channel.name, channel_id)) mapper = ChannelMapper(channel) From 2f0f13d63ed74cefdde33460f8564e89a2db2720 Mon Sep 17 00:00:00 2001 From: ozer550 Date: Tue, 21 Mar 2023 22:01:45 +0530 Subject: [PATCH 153/313] add tests for deploy change event --- .../contentcuration/tests/viewsets/base.py | 7 ++ .../tests/viewsets/test_channel.py | 72 +++++++++++++++++++ .../contentcuration/viewsets/channel.py | 48 +++++++++++++ .../contentcuration/viewsets/sync/base.py | 2 + .../viewsets/sync/constants.py | 2 + .../contentcuration/viewsets/sync/utils.py | 6 ++ 6 files changed, 137 insertions(+) diff --git a/contentcuration/contentcuration/tests/viewsets/base.py b/contentcuration/contentcuration/tests/viewsets/base.py index 48ddd1c995..7dd55c9622 100644 --- a/contentcuration/contentcuration/tests/viewsets/base.py +++ b/contentcuration/contentcuration/tests/viewsets/base.py @@ -11,6 +11,7 @@ from contentcuration.viewsets.sync.utils import generate_copy_event as base_generate_copy_event from contentcuration.viewsets.sync.utils import generate_create_event as base_generate_create_event from contentcuration.viewsets.sync.utils import generate_delete_event as base_generate_delete_event +from contentcuration.viewsets.sync.utils import generate_deploy_event as base_generate_deploy_event from contentcuration.viewsets.sync.utils import generate_update_event as base_generate_update_event @@ -48,6 +49,12 @@ def generate_sync_channel_event(channel_id, attributes, tags, files, assessment_ return event +def generate_deploy_channel_event(channel_id, user_id): + event = base_generate_deploy_event(channel_id, user_id=user_id) + event["rev"] = random.randint(1, 10000000) + return event + + class SyncTestMixin(object): celery_task_always_eager = None diff --git a/contentcuration/contentcuration/tests/viewsets/test_channel.py b/contentcuration/contentcuration/tests/viewsets/test_channel.py index b88f54ad98..02e8b6571f 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_channel.py +++ b/contentcuration/contentcuration/tests/viewsets/test_channel.py @@ -4,12 +4,15 @@ import mock from django.urls import reverse +from le_utils.constants import content_kinds from contentcuration import models +from contentcuration import models as cc from contentcuration.tests import testdata from contentcuration.tests.base import StudioAPITestCase from contentcuration.tests.viewsets.base import generate_create_event from contentcuration.tests.viewsets.base import generate_delete_event +from contentcuration.tests.viewsets.base import generate_deploy_channel_event from contentcuration.tests.viewsets.base import generate_sync_channel_event from contentcuration.tests.viewsets.base import generate_update_event from contentcuration.tests.viewsets.base import SyncTestMixin @@ -299,6 +302,75 @@ def test_sync_channel_called_correctly(self, sync_channel_mock): self.assertEqual(sync_channel_mock.call_args.args[i], True) sync_channel_mock.assert_called_once() + def test_deploy_channel_event(self): + channel = testdata.channel() + user = testdata.user() + channel.editors.add(user) + self.client.force_authenticate( + user + ) # This will skip all authentication checks + channel.main_tree.refresh_from_db() + + channel.staging_tree = cc.ContentNode( + kind_id=content_kinds.TOPIC, title="test", node_id="aaa" + ) + channel.staging_tree.save() + channel.previous_tree = cc.ContentNode( + kind_id=content_kinds.TOPIC, title="test", node_id="bbb" + ) + channel.previous_tree.save() + channel.chef_tree = cc.ContentNode( + kind_id=content_kinds.TOPIC, title="test", node_id="ccc" + ) + channel.chef_tree.save() + channel.save() + + self.contentnode = cc.ContentNode.objects.create(kind_id="video") + + response = self.sync_changes( + [ + generate_deploy_channel_event(channel.id, user.id) + ] + ) + + self.assertEqual(response.status_code, 200) + modified_channel = models.Channel.objects.get(id=channel.id) + self.assertEqual(modified_channel.main_tree, channel.staging_tree) + self.assertEqual(modified_channel.staging_tree, None) + self.assertEqual(modified_channel.previous_tree, channel.main_tree) + + def test_deploy_with_staging_tree_None(self): + channel = testdata.channel() + user = testdata.user() + channel.editors.add(user) + self.client.force_authenticate( + user + ) # This will skip all authentication checks + channel.main_tree.refresh_from_db() + + channel.staging_tree = None + channel.previous_tree = cc.ContentNode( + kind_id=content_kinds.TOPIC, title="test", node_id="bbb" + ) + channel.previous_tree.save() + channel.chef_tree = cc.ContentNode( + kind_id=content_kinds.TOPIC, title="test", node_id="ccc" + ) + channel.chef_tree.save() + channel.save() + + self.contentnode = cc.ContentNode.objects.create(kind_id="video") + response = self.sync_changes( + [ + generate_deploy_channel_event(channel.id, user.id) + ] + ) + # Should raise validation error as staging tree was set to NONE + self.assertEqual(len(response.json()["errors"]), 1, response.content) + modified_channel = models.Channel.objects.get(id=channel.id) + self.assertNotEqual(modified_channel.main_tree, channel.staging_tree) + self.assertNotEqual(modified_channel.previous_tree, channel.main_tree) + class CRUDTestCase(StudioAPITestCase): @property diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py index 773bc0b740..bd416acafa 100644 --- a/contentcuration/contentcuration/viewsets/channel.py +++ b/contentcuration/contentcuration/viewsets/channel.py @@ -28,6 +28,7 @@ from search.models import ContentNodeFullTextSearch from search.utils import get_fts_search_query +import contentcuration.models as models from contentcuration.decorators import cache_no_user_data from contentcuration.models import Change from contentcuration.models import Channel @@ -36,6 +37,7 @@ from contentcuration.models import generate_storage_url from contentcuration.models import SecretToken from contentcuration.models import User +from contentcuration.utils.garbage_collect import get_deleted_chefs_root from contentcuration.utils.pagination import CachedListPagination from contentcuration.utils.pagination import ValuesViewsetPageNumberPagination from contentcuration.utils.publish import publish_channel @@ -558,6 +560,52 @@ def sync(self, pk, attributes=False, tags=False, files=False, assessment_items=F progress_tracker=progress_tracker, ) + def deploy_from_changes(self, changes): + errors = [] + for deploy in changes: + try: + self.deploy(self.request.user, deploy["key"]) + except Exception as e: + log_sync_exception(e, user=self.request.user, change=deploy) + deploy["errors"] = [str(e)] + errors.append(deploy) + return 1 + + def deploy(self, user, pk): + + channel = self.get_edit_queryset().get(pk=pk) + + if channel.staging_tree is None: + raise ValidationError("Cannot deploy a channel without staging tree") + + user.check_channel_space(channel) + + if channel.previous_tree and channel.previous_tree != channel.main_tree: + # IMPORTANT: Do not remove this block, MPTT updating the deleted chefs block could hang the server + with models.ContentNode.objects.disable_mptt_updates(): + garbage_node = get_deleted_chefs_root() + channel.previous_tree.parent = garbage_node + channel.previous_tree.title = "Previous tree for channel {}".format(channel.pk) + channel.previous_tree.save() + + channel.previous_tree = channel.main_tree + channel.main_tree = channel.staging_tree + channel.staging_tree = None + channel.save() + + user.staged_files.all().delete() + user.set_space_used() + + models.Change.create_change(generate_update_event( + channel.id, + CHANNEL, + { + "root_id": channel.main_tree.id, + "staging_root_id": None + }, + channel_id=channel.id, + ), applied=True, created_by_id=user.id) + @method_decorator( cache_page( diff --git a/contentcuration/contentcuration/viewsets/sync/base.py b/contentcuration/contentcuration/viewsets/sync/base.py index 455a97e4aa..f11a8f4729 100644 --- a/contentcuration/contentcuration/viewsets/sync/base.py +++ b/contentcuration/contentcuration/viewsets/sync/base.py @@ -22,6 +22,7 @@ from contentcuration.viewsets.sync.constants import COPIED from contentcuration.viewsets.sync.constants import CREATED from contentcuration.viewsets.sync.constants import DELETED +from contentcuration.viewsets.sync.constants import DEPLOYED from contentcuration.viewsets.sync.constants import EDITOR_M2M from contentcuration.viewsets.sync.constants import FILE from contentcuration.viewsets.sync.constants import INVITATION @@ -92,6 +93,7 @@ def get_change_type(obj): COPIED: "copy_from_changes", PUBLISHED: "publish_from_changes", SYNCED: "sync_from_changes", + DEPLOYED: "deploy_from_changes", } diff --git a/contentcuration/contentcuration/viewsets/sync/constants.py b/contentcuration/contentcuration/viewsets/sync/constants.py index 6e553b6ccd..84c2b5aad7 100644 --- a/contentcuration/contentcuration/viewsets/sync/constants.py +++ b/contentcuration/contentcuration/viewsets/sync/constants.py @@ -6,6 +6,7 @@ COPIED = 5 PUBLISHED = 6 SYNCED = 7 +DEPLOYED = 8 ALL_CHANGES = set([ @@ -16,6 +17,7 @@ COPIED, PUBLISHED, SYNCED, + DEPLOYED, ]) # Client-side table constants diff --git a/contentcuration/contentcuration/viewsets/sync/utils.py b/contentcuration/contentcuration/viewsets/sync/utils.py index 1d3718b5e2..47b5a17e54 100644 --- a/contentcuration/contentcuration/viewsets/sync/utils.py +++ b/contentcuration/contentcuration/viewsets/sync/utils.py @@ -6,6 +6,7 @@ from contentcuration.viewsets.sync.constants import COPIED from contentcuration.viewsets.sync.constants import CREATED from contentcuration.viewsets.sync.constants import DELETED +from contentcuration.viewsets.sync.constants import DEPLOYED from contentcuration.viewsets.sync.constants import MOVED from contentcuration.viewsets.sync.constants import PUBLISHED from contentcuration.viewsets.sync.constants import UPDATED @@ -74,6 +75,11 @@ def generate_publish_event( return event +def generate_deploy_event(key, user_id): + event = _generate_event(key, CHANNEL, DEPLOYED, channel_id=key, user_id=user_id) + return event + + def log_sync_exception(e, user=None, change=None, changes=None): # Capture exception and report, but allow sync # to complete properly. From 33e9c5e1aad16d219f8fa68d2ab2f8b99dda8fd3 Mon Sep 17 00:00:00 2001 From: Liana Harris <46411498+LianaHarris360@users.noreply.github.com> Date: Tue, 21 Mar 2023 14:01:32 -0500 Subject: [PATCH 154/313] Updates mp3 resource previewer metadata to include has captions and subtitles --- .../frontend/channelEdit/components/ResourcePanel.vue | 6 +++++- .../channelEdit/components/edit/DetailsTabView.vue | 7 +------ .../edit/__tests__/accessibilityOptions.spec.js | 11 +++++++++++ .../contentcuration/frontend/shared/constants.js | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/ResourcePanel.vue b/contentcuration/contentcuration/frontend/channelEdit/components/ResourcePanel.vue index e651764293..0c505ddbfa 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/ResourcePanel.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/components/ResourcePanel.vue @@ -310,7 +310,11 @@ inline /> - + diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/edit/DetailsTabView.vue b/contentcuration/contentcuration/frontend/channelEdit/components/edit/DetailsTabView.vue index e319fc452a..3c899602a7 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/edit/DetailsTabView.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/components/edit/DetailsTabView.vue @@ -554,12 +554,7 @@ return this.firstNode.original_channel_name; }, requiresAccessibility() { - return ( - this.oneSelected && - this.nodes.every( - node => node.kind !== ContentKindsNames.AUDIO && node.kind !== ContentKindsNames.TOPIC - ) - ); + return this.oneSelected && this.nodes.every(node => node.kind !== ContentKindsNames.TOPIC); }, audioAccessibility() { return this.oneSelected && this.firstNode.kind === 'audio'; diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/edit/__tests__/accessibilityOptions.spec.js b/contentcuration/contentcuration/frontend/channelEdit/components/edit/__tests__/accessibilityOptions.spec.js index 050f389c6a..9bc5d81c04 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/edit/__tests__/accessibilityOptions.spec.js +++ b/contentcuration/contentcuration/frontend/channelEdit/components/edit/__tests__/accessibilityOptions.spec.js @@ -73,6 +73,17 @@ describe('AccessibilityOptions', () => { expect(wrapper.find('[data-test="checkbox-audioDescription"]').exists()).toBe(false); }); + it('should display the correct list of accessibility options if resource is an audio', () => { + const wrapper = mount(AccessibilityOptions, { + propsData: { + kind: 'audio', + }, + }); + + expect(wrapper.find('[data-test="checkbox-captionsSubtitles"]').exists()).toBe(true); + expect(wrapper.find('[data-test="tooltip-captionsSubtitles"]').exists()).toBe(false); + }); + it('should render appropriate tooltips along with the checkbox', () => { const wrapper = mount(AccessibilityOptions, { propsData: { diff --git a/contentcuration/contentcuration/frontend/shared/constants.js b/contentcuration/contentcuration/frontend/shared/constants.js index 21a14afc08..005be9a69c 100644 --- a/contentcuration/contentcuration/frontend/shared/constants.js +++ b/contentcuration/contentcuration/frontend/shared/constants.js @@ -199,11 +199,11 @@ export const ContentModalities = { }; export const AccessibilityCategoriesMap = { - // Note: audio is not included, as it is rendered in the UI differently. document: ['ALT_TEXT', 'HIGH_CONTRAST', 'TAGGED_PDF'], video: ['CAPTIONS_SUBTITLES', 'AUDIO_DESCRIPTION', 'SIGN_LANGUAGE'], exercise: ['ALT_TEXT'], html5: ['ALT_TEXT', 'HIGH_CONTRAST'], + audio: ['CAPTIONS_SUBTITLES'], }; export const CompletionDropdownMap = { From 385cda06f5b167541c65d6829e2c513764529799 Mon Sep 17 00:00:00 2001 From: Richard Tibbles Date: Tue, 21 Mar 2023 13:31:37 -0700 Subject: [PATCH 155/313] Use get_active_content_database to get alias --- .../management/commands/export_channels_to_kolibri_public.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py index f816c2cfc9..2f20a0b78c 100644 --- a/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py +++ b/contentcuration/kolibri_public/management/commands/export_channels_to_kolibri_public.py @@ -9,6 +9,7 @@ from django.core.management.base import BaseCommand from kolibri_content.apps import KolibriContentConfig from kolibri_content.models import ChannelMetadata as ExportedChannelMetadata +from kolibri_content.router import get_active_content_database from kolibri_content.router import using_content_database from kolibri_public.models import ChannelMetadata from kolibri_public.utils.mapper import ChannelMapper @@ -29,7 +30,7 @@ def _export_channel(self, channel_id): db_file.seek(0) with using_content_database(db_file.name): # Run migration to handle old content databases published prior to current fields being added. - call_command("migrate", app_label=KolibriContentConfig.label, database=db_file.name) + call_command("migrate", app_label=KolibriContentConfig.label, database=get_active_content_database()) channel = ExportedChannelMetadata.objects.get(id=channel_id) logger.info("Found channel {} for id: {} mapping now".format(channel.name, channel_id)) mapper = ChannelMapper(channel) From 36c10752a7773b5b321fd6b486a5ccfff0f210eb Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 22 Mar 2023 16:16:52 +0530 Subject: [PATCH 156/313] Put deploy event into indexedDB --- .../channelEdit/pages/StagingTreePage/index.vue | 2 +- .../channelEdit/vuex/currentChannel/actions.js | 10 +--------- .../frontend/shared/data/constants.js | 1 + .../frontend/shared/data/resources.js | 13 +++++++++++++ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/contentcuration/contentcuration/frontend/channelEdit/pages/StagingTreePage/index.vue b/contentcuration/contentcuration/frontend/channelEdit/pages/StagingTreePage/index.vue index 21f5fd82b0..dcf0cb6e50 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/pages/StagingTreePage/index.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/pages/StagingTreePage/index.vue @@ -509,7 +509,7 @@ async onDeployChannelClick() { this.submitDisabled = true; try { - await this.deployCurrentChannel(); + this.deployCurrentChannel(); } catch (e) { this.submitDisabled = false; throw e; diff --git a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js index dd5bb6139a..9dca09a0a2 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js +++ b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js @@ -48,15 +48,7 @@ export function reloadCurrentChannelStagingDiff(context) { } export function deployCurrentChannel(context) { - let payload = { - channel_id: context.state.currentChannelId, - }; - return client.post(window.Urls.activate_channel(), payload).catch(e => { - // If response is 'Bad request', channel must already be activated - if (e.response && e.response.status === 400) { - return Promise.resolve(); - } - }); + return Channel.deploy(context.state.currentChannelId); } export function publishChannel(context, version_notes) { diff --git a/contentcuration/contentcuration/frontend/shared/data/constants.js b/contentcuration/contentcuration/frontend/shared/data/constants.js index 8e658c0ecb..bbc35582a3 100644 --- a/contentcuration/contentcuration/frontend/shared/data/constants.js +++ b/contentcuration/contentcuration/frontend/shared/data/constants.js @@ -6,6 +6,7 @@ export const CHANGE_TYPES = { COPIED: 5, PUBLISHED: 6, SYNCED: 7, + DEPLOYED: 8, }; /** * An array of change types that directly result in the creation of nodes diff --git a/contentcuration/contentcuration/frontend/shared/data/resources.js b/contentcuration/contentcuration/frontend/shared/data/resources.js index ac51f15330..2fb4405443 100644 --- a/contentcuration/contentcuration/frontend/shared/data/resources.js +++ b/contentcuration/contentcuration/frontend/shared/data/resources.js @@ -1085,6 +1085,19 @@ export const Channel = new Resource({ }); }, + deploy(id) { + const change = { + key: id, + source: CLIENTID, + table: this.tableName, + type: CHANGE_TYPES.DEPLOYED, + channel_id: id, + }; + return this.transaction({ mode: 'rw', source: IGNORED_SOURCE }, CHANGES_TABLE, () => { + return db[CHANGES_TABLE].put(change); + }); + }, + sync(id, { attributes = false, tags = false, files = false, assessment_items = false } = {}) { const change = { key: id, From b87156022ec081a74cd598e45471d18ed1304737 Mon Sep 17 00:00:00 2001 From: Liana Harris <46411498+LianaHarris360@users.noreply.github.com> Date: Wed, 22 Mar 2023 14:21:55 -0500 Subject: [PATCH 157/313] Updates command to automatically set accessibility metadata for audio nodes with subtitles --- .../management/commands/set_orm_based_has_captions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contentcuration/contentcuration/management/commands/set_orm_based_has_captions.py b/contentcuration/contentcuration/management/commands/set_orm_based_has_captions.py index 29769e4389..38865f6b89 100644 --- a/contentcuration/contentcuration/management/commands/set_orm_based_has_captions.py +++ b/contentcuration/contentcuration/management/commands/set_orm_based_has_captions.py @@ -22,13 +22,13 @@ class Command(BaseCommand): def handle(self, *args, **options): start = time.time() - logging.info("Setting 'has captions' for video kinds") + logging.info("Setting 'has captions' for audio kinds") has_captions_subquery = Exists(File.objects.filter(contentnode=OuterRef("id"), language=OuterRef("language"), preset_id=format_presets.VIDEO_SUBTITLE)) - # Only try to update video nodes which have not had any accessibility labels set on them + # Only try to update audio nodes which have not had any accessibility labels set on them # this will allow this management command to be rerun and resume from where it left off # and also prevent stomping previous edits to the accessibility_labels field. - updateable_nodes = ContentNode.objects.filter(has_captions_subquery, kind=content_kinds.VIDEO, accessibility_labels__isnull=True) + updateable_nodes = ContentNode.objects.filter(has_captions_subquery, kind=content_kinds.AUDIO, accessibility_labels__isnull=True) updateable_node_slice = updateable_nodes.values_list("id", flat=True)[0:CHUNKSIZE] From c46f34b5d58f4410a6a70156fbe884a9d7ef0c68 Mon Sep 17 00:00:00 2001 From: Liana Harris <46411498+LianaHarris360@users.noreply.github.com> Date: Fri, 24 Mar 2023 10:18:22 -0500 Subject: [PATCH 158/313] Updates nested folder order in breadcrumb --- .../frontend/channelEdit/views/CurrentTopicView.vue | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/CurrentTopicView.vue b/contentcuration/contentcuration/frontend/channelEdit/views/CurrentTopicView.vue index 644ccd629d..19223ffcab 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/views/CurrentTopicView.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/views/CurrentTopicView.vue @@ -5,11 +5,11 @@ -