From 357204c936e9d905c567465e2423ef8df86296a1 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 2 Jun 2022 16:42:36 +0530
Subject: [PATCH 01/25] Optimized import from other channels search

---
 .../contentcuration/dev_settings.py           | 11 ++++
 .../contentcuration/tests/testdata.py         | 18 +++---
 .../contentcuration/viewsets/contentnode.py   |  3 +-
 contentcuration/search/tests.py               | 55 +++++++++++++++++++
 .../search/viewsets/contentnode.py            |  2 +-
 5 files changed, 79 insertions(+), 10 deletions(-)

diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py
index d81d23a993..3dcbd5bf34 100644
--- a/contentcuration/contentcuration/dev_settings.py
+++ b/contentcuration/contentcuration/dev_settings.py
@@ -6,3 +6,14 @@
 ROOT_URLCONF = "contentcuration.dev_urls"
 
 INSTALLED_APPS += ("drf_yasg",)
+
+REST_FRAMEWORK = {
+    'DEFAULT_PERMISSION_CLASSES': (
+        'rest_framework.permissions.IsAuthenticated',
+    ),
+    'DEFAULT_AUTHENTICATION_CLASSES': (
+        'rest_framework.authentication.SessionAuthentication',
+        'rest_framework.authentication.BasicAuthentication',
+        'rest_framework.authentication.TokenAuthentication',
+    )
+}
diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
index 50895337b1..5560e853a5 100644
--- a/contentcuration/contentcuration/tests/testdata.py
+++ b/contentcuration/contentcuration/tests/testdata.py
@@ -195,21 +195,23 @@ def node(data, parent=None):
     return new_node
 
 
-def tree(parent=None):
+def tree(parent=None, tree_data=None):
     # Read from json fixture
-    filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
-    with open(filepath, "rb") as jsonfile:
-        data = json.load(jsonfile)
+    if tree_data is None:
+        filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
+        with open(filepath, "rb") as jsonfile:
+            tree_data = json.load(jsonfile)
 
-    return node(data, parent)
+    return node(tree_data, parent)
 
 
-def channel(name="testchannel"):
+def channel(name="testchannel", create_main_tree=True, main_tree_data=None):
     channel = cc.Channel.objects.create(name=name)
     channel.save()
 
-    channel.main_tree = tree()
-    channel.save()
+    if create_main_tree:
+        channel.main_tree = tree(tree_data=main_tree_data)
+        channel.save()
 
     return channel
 
diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py
index c7bb2e222a..44169a7532 100644
--- a/contentcuration/contentcuration/viewsets/contentnode.py
+++ b/contentcuration/contentcuration/viewsets/contentnode.py
@@ -610,7 +610,8 @@ def delete_from_changes(self, changes):
 
 
 def dict_if_none(obj, field_name=None):
-    return obj[field_name] if obj[field_name] else {}
+    value = obj.get(field_name)
+    return value if value else {}
 
 
 # Apply mixin first to override ValuesViewset
diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests.py
index 2341fc9d1b..9d0aff4f4d 100644
--- a/contentcuration/search/tests.py
+++ b/contentcuration/search/tests.py
@@ -29,3 +29,58 @@ def test_filter_channels_by_edit(self):
         )
         self.assertEqual(response.status_code, 200, response.content)
         self.assertNotEqual(response.data["results"], [])
+
+    def test_search_result(self):
+        # Create two users
+        user_a = testdata.user(email="a@a.com")
+        user_b = testdata.user(email="b@b.com")
+
+        # Create two channels with two editors
+        test_tree_data = {
+            "node_id": "00000000000000000000000000000000",
+            "title": "Root topic node",
+            "kind_id": "topic",
+            "children": [
+                {
+                    "node_id": "00000000000000000000000000000001",
+                    "title": "Kolibri video",
+                    "kind_id": "video",
+                },
+            ]
+        }
+
+        channel_a = testdata.channel(name="user_a_channel", main_tree_data=test_tree_data)
+        channel_a.editors.add(user_a)
+
+        channel_b = testdata.channel(name="user_b_channel", create_main_tree=False)
+        channel_b.editors.add(user_b)
+
+        # Publish channel_a
+        channel_a.main_tree.publishing = False
+        channel_a.main_tree.changed = False
+        channel_a.main_tree.published = True
+        channel_a.main_tree.save()
+        channel_a.public = True
+        channel_a.save()
+
+        # Import resources from channel_a to channel_b
+        channel_a.main_tree.copy_to(channel_b.main_tree, batch_size=1)
+        channel_b.main_tree.refresh_from_db()
+
+        # Send request from user_b to the search endpoint
+        self.client.force_authenticate(user=user_b)
+        response = self.client.get(
+            reverse("search-list"),
+            data={
+                "channel_list": "public",
+                "keywords": "video"
+            },
+            format="json",
+        )
+
+        # Assert whether the location_ids are of accessible nodes or not
+        kolibri_video_node = channel_b.main_tree.get_descendants().filter(title="Kolibri video").first()
+
+        # The ids in location_ids should be of channel_b's ContentNode only
+        self.assertEqual(len(response.data["results"][0]["location_ids"]), 1)
+        self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 54b5f437aa..7736b1f98a 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -162,7 +162,7 @@ def annotate_queryset(self, queryset):
         2. Annotate lists of content node and channel pks
         """
         # Get accessible content nodes that match the content id
-        content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter(
+        content_id_query = queryset.filter(
             content_id=OuterRef("content_id")
         )
 

From d24416cfbfd176d9dd0d3172b646a18629e8e449 Mon Sep 17 00:00:00 2001
From: Blaine Jester <blainesworld@gmail.com>
Date: Wed, 15 Jun 2022 13:47:24 -0700
Subject: [PATCH 02/25] Add search test for channel filtering and location_ids
 handling

---
 .../contentcuration/tests/testdata.py         |  2 +
 contentcuration/search/tests.py               | 75 +++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
index 5560e853a5..b857dfb513 100644
--- a/contentcuration/contentcuration/tests/testdata.py
+++ b/contentcuration/contentcuration/tests/testdata.py
@@ -145,6 +145,7 @@ def node(data, parent=None):
             sort_order=data.get('sort_order', 1),
             complete=True,
             extra_fields=data.get('extra_fields'),
+            grade_levels="{}",
         )
         new_node.save()
         video_file = fileobj_video(contents=b"Video File")
@@ -171,6 +172,7 @@ def node(data, parent=None):
             content_id=data.get('content_id') or data['node_id'],
             sort_order=data.get('sort_order', 1),
             complete=True,
+            grade_levels="{}",
         )
 
         new_node.save()
diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests.py
index 9d0aff4f4d..bcb3b2aa99 100644
--- a/contentcuration/search/tests.py
+++ b/contentcuration/search/tests.py
@@ -2,6 +2,7 @@
 
 from django.urls import reverse
 
+from contentcuration.models import Channel
 from contentcuration.tests import testdata
 from contentcuration.tests.base import StudioAPITestCase
 
@@ -84,3 +85,77 @@ def test_search_result(self):
         # The ids in location_ids should be of channel_b's ContentNode only
         self.assertEqual(len(response.data["results"][0]["location_ids"]), 1)
         self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id)
+
+    def test_channel_list_filter_and_location_ids(self):
+        users = []
+        channels = []
+        for i in range(4):
+            user = testdata.user(email="a{}@a.com".format(i))
+            users.append(user)
+            channel = Channel.objects.create(name="user_a{}_channel".format(i))
+            channel.save()
+            channels.append(channel)
+            channel.editors.add(user)
+
+        public_channel, editable_channel, viewable_channel, inaccessible_channel = channels
+
+        public_video_node = testdata.node({
+            "title": "Kolibri video",
+            "kind_id": "video",
+        }, parent=public_channel.main_tree)
+        public_video_node.complete = True
+        public_video_node.published = True
+        public_video_node.changed = False
+        public_video_node.save()
+
+        public_channel.main_tree.published = True
+        public_channel.main_tree.changed = False
+        public_channel.main_tree.save()
+
+        public_channel.public = True
+        public_channel.save()
+
+        user_b = users[1]
+        viewable_channel.viewers.add(user_b)
+
+        public_video_node.refresh_from_db()
+        public_video_node.copy_to(target=editable_channel.main_tree)
+        public_video_node.copy_to(target=viewable_channel.main_tree)
+        public_video_node.copy_to(target=inaccessible_channel.main_tree)
+
+        editable_channel.main_tree.refresh_from_db()
+        editable_video_node = editable_channel.main_tree.get_descendants().first()
+        viewable_channel.main_tree.refresh_from_db()
+        viewable_video_node = viewable_channel.main_tree.get_descendants().first()
+        inaccessible_channel.main_tree.refresh_from_db()
+        inaccessible_video_node = inaccessible_channel.main_tree.get_descendants().first()
+
+        # Send request from user_b to the search endpoint
+        self.client.force_authenticate(user=user_b)
+
+        for channel_list in ("public", "edit", "view"):
+            response = self.client.get(
+                reverse("search-list"),
+                data={
+                    "channel_list": channel_list,
+                    "keywords": "video"
+                },
+                format="json",
+            )
+
+            for result in response.data["results"]:
+                self.assertNotEqual(result["id"], inaccessible_video_node.id)
+
+                if channel_list == "public":
+                    self.assertEqual(result["id"], public_video_node.id)
+                elif channel_list == "edit":
+                    self.assertEqual(result["id"], editable_video_node.id)
+                elif channel_list == "view":
+                    self.assertEqual(result["id"], viewable_video_node.id)
+
+                location_ids = result["location_ids"]
+                self.assertEqual(len(location_ids), 3)
+                self.assertIn(editable_video_node.id, location_ids)
+                self.assertIn(viewable_video_node.id, location_ids)
+                self.assertIn(public_video_node.id, location_ids)
+                self.assertNotIn(inaccessible_video_node.id, location_ids)

From 676526e4f7de518066f3af8acc4aa9f539a66ba2 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 30 Jun 2022 13:18:26 +0530
Subject: [PATCH 03/25] Fix autodiscovery of search tests

---
 contentcuration/search/tests/__init__.py        |  0
 .../search/{tests.py => tests/test_search.py}   |  0
 contentcuration/search/viewsets/contentnode.py  | 17 ++++++++++-------
 3 files changed, 10 insertions(+), 7 deletions(-)
 create mode 100644 contentcuration/search/tests/__init__.py
 rename contentcuration/search/{tests.py => tests/test_search.py} (100%)

diff --git a/contentcuration/search/tests/__init__.py b/contentcuration/search/tests/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/search/tests.py b/contentcuration/search/tests/test_search.py
similarity index 100%
rename from contentcuration/search/tests.py
rename to contentcuration/search/tests/test_search.py
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 7736b1f98a..9c61c6092a 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -13,18 +13,19 @@
 from django_filters.rest_framework import CharFilter
 from le_utils.constants import content_kinds
 from le_utils.constants import roles
+from rest_framework.permissions import IsAuthenticated
 
 from contentcuration.models import Channel
 from contentcuration.models import ContentNode
 from contentcuration.models import File
 from contentcuration.utils.pagination import CachedListPagination
 from contentcuration.viewsets.base import RequiredFilterSet
+from contentcuration.viewsets.base import ValuesViewset
 from contentcuration.viewsets.common import NotNullMapArrayAgg
 from contentcuration.viewsets.common import SQArrayAgg
 from contentcuration.viewsets.common import SQCount
 from contentcuration.viewsets.common import UUIDFilter
 from contentcuration.viewsets.common import UUIDInFilter
-from contentcuration.viewsets.contentnode import ContentNodeViewSet
 
 
 class ListPagination(CachedListPagination):
@@ -65,9 +66,11 @@ def filter_channel_list(self, queryset, name, value):
 
     def filter_keywords(self, queryset, name, value):
         filter_query = Q(title__icontains=value) | Q(description__icontains=value)
+
         tags_node_ids = ContentNode.tags.through.objects.filter(
             contenttag__tag_name__icontains=value
-        ).values_list("contentnode_id", flat=True)[:250]
+        ).values_list("contentnode_id", flat=True)
+
         # Check if we have a Kolibri node id or ids and add them to the search if so.
         # Add to, rather than replace, the filters so that we never misinterpret a search term as a UUID.
         # node_ids = uuid_re.findall(value) + list(tags_node_ids)
@@ -77,10 +80,8 @@ def filter_keywords(self, queryset, name, value):
             filter_query |= Q(node_id=node_id)
             filter_query |= Q(content_id=node_id)
             filter_query |= Q(id=node_id)
-        for node_id in tags_node_ids:
-            filter_query |= Q(id=node_id)
 
-        return queryset.filter(filter_query)
+        return queryset.filter(Q(id__in=list(tags_node_ids)) | filter_query)
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(
@@ -130,9 +131,11 @@ class Meta:
         )
 
 
-class SearchContentNodeViewSet(ContentNodeViewSet):
+class SearchContentNodeViewSet(ValuesViewset):
+    queryset = ContentNode.objects.all()
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
+    permission_classes = [IsAuthenticated]
     values = (
         "id",
         "content_id",
@@ -162,7 +165,7 @@ def annotate_queryset(self, queryset):
         2. Annotate lists of content node and channel pks
         """
         # Get accessible content nodes that match the content id
-        content_id_query = queryset.filter(
+        content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter(
             content_id=OuterRef("content_id")
         )
 

From 6d40c5559556adb9894740270bc7e6bc3e67208d Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Sat, 9 Jul 2022 19:47:40 +0530
Subject: [PATCH 04/25] Remove location_ids, zero db queries on descendant
 resource count

---
 .../contentcuration/dev_settings.py           |  4 ++
 .../views/ImportFromChannels/BrowsingCard.vue |  9 +--
 .../search/viewsets/contentnode.py            | 72 ++++++-------------
 requirements-dev.in                           |  1 +
 requirements-dev.txt                          |  6 +-
 5 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py
index 3dcbd5bf34..9df785afd1 100644
--- a/contentcuration/contentcuration/dev_settings.py
+++ b/contentcuration/contentcuration/dev_settings.py
@@ -1,4 +1,6 @@
 # flake8: noqa
+from dotenv import load_dotenv
+
 from .not_production_settings import *  # noqa
 
 DEBUG = True
@@ -17,3 +19,5 @@
         'rest_framework.authentication.TokenAuthentication',
     )
 }
+
+load_dotenv(override=True)  # take environment variables from .env.
diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
index d8e3a30a30..f3451c96b8 100644
--- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
+++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
@@ -155,13 +155,8 @@
         }
         return this.$tr('resourcesCount', { count });
       },
-      numLocations() {
-        return this.node.location_ids.length;
-      },
       goToLocationLabel() {
-        return this.numLocations > 1
-          ? this.$tr('goToPluralLocationsAction', { count: this.numLocations })
-          : this.$tr('goToSingleLocationAction');
+        return this.$tr('goToSingleLocationAction');
       },
       isTopic() {
         return this.node.kind === ContentKindsNames.TOPIC;
@@ -184,8 +179,6 @@
     $trs: {
       tagsList: 'Tags: {tags}',
       goToSingleLocationAction: 'Go to location',
-      goToPluralLocationsAction:
-        'In {count, number} {count, plural, one {location} other {locations}}',
       addToClipboardAction: 'Copy to clipboard',
       resourcesCount: '{count, number} {count, plural, one {resource} other {resources}}',
       coach: 'Resource for coaches',
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 9c61c6092a..8fab7f4bbe 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -1,14 +1,12 @@
 import re
 
-from django.db.models import Case
+from django.db.models import ExpressionWrapper
 from django.db.models import F
 from django.db.models import IntegerField
 from django.db.models import OuterRef
 from django.db.models import Q
 from django.db.models import Subquery
 from django.db.models import Value
-from django.db.models import When
-from django.db.models.functions import Coalesce
 from django_filters.rest_framework import BooleanFilter
 from django_filters.rest_framework import CharFilter
 from le_utils.constants import content_kinds
@@ -22,10 +20,9 @@
 from contentcuration.viewsets.base import RequiredFilterSet
 from contentcuration.viewsets.base import ValuesViewset
 from contentcuration.viewsets.common import NotNullMapArrayAgg
-from contentcuration.viewsets.common import SQArrayAgg
-from contentcuration.viewsets.common import SQCount
 from contentcuration.viewsets.common import UUIDFilter
 from contentcuration.viewsets.common import UUIDInFilter
+from contentcuration.viewsets.contentnode import get_title
 
 
 class ListPagination(CachedListPagination):
@@ -136,6 +133,7 @@ class SearchContentNodeViewSet(ValuesViewset):
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
     permission_classes = [IsAuthenticated]
+
     values = (
         "id",
         "content_id",
@@ -154,71 +152,43 @@ class SearchContentNodeViewSet(ValuesViewset):
         "modified",
         "parent_id",
         "changed",
-        "location_ids",
         "content_tags",
         "original_channel_name",
     )
 
+    field_map = {
+        "title": get_title,
+    }
+
+    def get_queryset(self):
+        queryset = super(SearchContentNodeViewSet, self).get_queryset()
+        return ContentNode._annotate_channel_id(queryset)
+
     def annotate_queryset(self, queryset):
         """
         1. Do a distinct by 'content_id,' using the original node if possible
         2. Annotate lists of content node and channel pks
         """
-        # Get accessible content nodes that match the content id
-        content_id_query = ContentNode.filter_view_queryset(ContentNode.objects.all(), self.request.user).filter(
-            content_id=OuterRef("content_id")
-        )
-
-        # Combine by unique content id
-        deduped_content_query = (
-            content_id_query.filter(content_id=OuterRef("content_id"))
-            .annotate(
-                is_original=Case(
-                    When(original_source_node_id=F("node_id"), then=Value(1)),
-                    default=Value(2),
-                    output_field=IntegerField(),
-                ),
-            )
-            .order_by("is_original", "created")
-        )
-        queryset = queryset.filter(
-            pk__in=Subquery(deduped_content_query.values_list("id", flat=True)[:1])
-        ).order_by()
-
         thumbnails = File.objects.filter(
             contentnode=OuterRef("id"), preset__thumbnail=True
         )
 
-        descendant_resources = (
-            ContentNode.objects.filter(
-                tree_id=OuterRef("tree_id"),
-                lft__gt=OuterRef("lft"),
-                rght__lt=OuterRef("rght"),
-            )
-            .exclude(kind_id=content_kinds.TOPIC)
-            .values("id", "role_visibility", "changed")
-            .order_by()
-        )
-        original_channel_name = Coalesce(
-            Subquery(
-                Channel.objects.filter(pk=OuterRef("original_channel_id")).values(
-                    "name"
-                )[:1]
-            ),
-            Subquery(
-                Channel.objects.filter(main_tree__tree_id=OuterRef("tree_id")).values(
-                    "name"
-                )[:1]
-            ),
+        descendant_resources_count = ExpressionWrapper(((F("rght") - F("lft") - Value(1)) / Value(2)), output_field=IntegerField())
+
+        channel_name = Subquery(
+            Channel.objects.filter(pk=OuterRef("channel_id")).values(
+                "name"
+            )[:1]
         )
+
         queryset = queryset.annotate(
-            location_ids=SQArrayAgg(content_id_query, field="id"),
-            resource_count=SQCount(descendant_resources, field="id"),
+            resource_count=descendant_resources_count,
             thumbnail_checksum=Subquery(thumbnails.values("checksum")[:1]),
             thumbnail_extension=Subquery(
                 thumbnails.values("file_format__extension")[:1]
             ),
             content_tags=NotNullMapArrayAgg("tags__tag_name"),
-            original_channel_name=original_channel_name,
+            original_channel_name=channel_name,
         )
+
         return queryset
diff --git a/requirements-dev.in b/requirements-dev.in
index 6d4a04ff49..ea6f900871 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -41,3 +41,4 @@ tabulate==0.8.2
 fonttools
 flower==0.9.4
 minio==7.1.1
+python-dotenv
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 2961f78cec..0c21ce1c18 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile
+# This file is autogenerated by pip-compile with python 3.8
 # To update, run:
 #
 #    pip-compile requirements-dev.in
@@ -256,6 +256,8 @@ python-dateutil==2.8.1
     # via
     #   -c requirements.txt
     #   faker
+python-dotenv==0.20.0
+    # via -r requirements-dev.in
 python-jsonrpc-server==0.4.0
     # via python-language-server
 python-language-server==0.36.2
@@ -286,6 +288,8 @@ rope==0.19.0
     # via -r requirements-dev.in
 ruamel-yaml==0.17.4
     # via drf-yasg
+ruamel-yaml-clib==0.2.6
+    # via ruamel-yaml
 service-factory==0.1.6
     # via -r requirements-dev.in
 six==1.16.0

From 094f05f7ef0e995ebccf2ba5da9aa41b8267a152 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Tue, 12 Jul 2022 14:19:28 +0530
Subject: [PATCH 05/25] Upgrade django debug toolbar and fix its settings

---
 .../contentcuration/debug/__init__.py         |  0
 .../contentcuration/debug/middleware.py       | 47 -------------------
 .../contentcuration/debug_panel_settings.py   |  2 +-
 requirements-dev.in                           |  2 +-
 requirements-dev.txt                          |  2 +-
 5 files changed, 3 insertions(+), 50 deletions(-)
 delete mode 100644 contentcuration/contentcuration/debug/__init__.py
 delete mode 100644 contentcuration/contentcuration/debug/middleware.py

diff --git a/contentcuration/contentcuration/debug/__init__.py b/contentcuration/contentcuration/debug/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/contentcuration/contentcuration/debug/middleware.py b/contentcuration/contentcuration/debug/middleware.py
deleted file mode 100644
index 803a94f89b..0000000000
--- a/contentcuration/contentcuration/debug/middleware.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import threading
-import time
-
-import debug_panel.urls
-from debug_panel.cache import cache
-from debug_panel.middleware import DebugPanelMiddleware
-from django.urls import reverse
-
-
-class CustomDebugPanelMiddleware(DebugPanelMiddleware):
-    """
-    Custom version to fix SQL escaping:
-    https://github.com/recamshak/django-debug-panel/issues/17#issuecomment-366268893
-    """
-
-    def process_response(self, request, response):
-        """
-        Store the DebugToolbarMiddleware rendered toolbar into a cache store.
-        The data stored in the cache are then reachable from an URL that is appened
-        to the HTTP response header under the 'X-debug-data-url' key.
-        """
-        toolbar = self.__class__.debug_toolbars.get(
-            threading.current_thread().ident, None
-        )
-
-        response = super(DebugPanelMiddleware, self).process_response(request, response)
-
-        if toolbar:
-            # for django-debug-toolbar >= 1.4
-            for panel in reversed(toolbar.enabled_panels):
-                if (
-                    hasattr(panel, "generate_stats") and not panel.get_stats()
-                ):  # PATCH HERE
-                    panel.generate_stats(request, response)
-
-            cache_key = "%f" % time.time()
-            cache.set(cache_key, toolbar.render_toolbar())
-
-            response["X-debug-data-url"] = request.build_absolute_uri(
-                reverse(
-                    "debug_data",
-                    urlconf=debug_panel.urls,
-                    kwargs={"cache_key": cache_key},
-                )
-            )
-
-        return response
diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py
index 79f9ddac6e..a61b573aab 100644
--- a/contentcuration/contentcuration/debug_panel_settings.py
+++ b/contentcuration/contentcuration/debug_panel_settings.py
@@ -17,7 +17,7 @@ def custom_show_toolbar(request):
 # if debug_panel exists, add it to our INSTALLED_APPS
 INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler")  # noqa F405
 MIDDLEWARE += (  # noqa F405
-    "contentcuration.debug.middleware.CustomDebugPanelMiddleware",
+    "debug_toolbar.middleware.DebugToolbarMiddleware",
 )
 DEBUG_TOOLBAR_CONFIG = {
     "SHOW_TOOLBAR_CALLBACK": custom_show_toolbar,
diff --git a/requirements-dev.in b/requirements-dev.in
index ea6f900871..a398514373 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -2,7 +2,7 @@
 python-language-server
 django-concurrent-test-helper==0.7.0
 django-debug-panel==0.8.3
-django-debug-toolbar==1.9.1
+django-debug-toolbar==3.5.0
 flake8==3.4.1
 whitenoise
 Pympler
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 0c21ce1c18..1b98ce6bc3 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -81,7 +81,7 @@ django-concurrent-test-helper==0.7.0
     # via -r requirements-dev.in
 django-debug-panel==0.8.3
     # via -r requirements-dev.in
-django-debug-toolbar==1.9.1
+django-debug-toolbar==3.5.0
     # via
     #   -r requirements-dev.in
     #   django-debug-panel

From 701ddc9a0176882c077839479e0bc0990367ddf1 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 22 Jul 2022 15:02:25 +0530
Subject: [PATCH 06/25] Remove unnecessary dev settings

---
 .../contentcuration/debug_panel_settings.py       |  8 ++++++--
 contentcuration/contentcuration/dev_settings.py   | 15 ---------------
 requirements-dev.in                               |  1 -
 requirements-dev.txt                              |  2 --
 4 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py
index a61b573aab..5dc3c6783f 100644
--- a/contentcuration/contentcuration/debug_panel_settings.py
+++ b/contentcuration/contentcuration/debug_panel_settings.py
@@ -1,8 +1,12 @@
 from .dev_settings import *  # noqa
 
-# These endpoints will throw an error on the django debug panel
+# These endpoints will throw an error on the django debug panel.
 EXCLUDED_DEBUG_URLS = [
     "/content/storage",
+
+    # Disabling task API because as soon as the task API gets polled
+    # the current request data gets overwritten.
+    "/api/task",
 ]
 
 DEBUG_PANEL_ACTIVE = True
@@ -14,7 +18,7 @@ def custom_show_toolbar(request):
     )  # noqa F405
 
 
-# if debug_panel exists, add it to our INSTALLED_APPS
+# if debug_panel exists, add it to our INSTALLED_APPS.
 INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler")  # noqa F405
 MIDDLEWARE += (  # noqa F405
     "debug_toolbar.middleware.DebugToolbarMiddleware",
diff --git a/contentcuration/contentcuration/dev_settings.py b/contentcuration/contentcuration/dev_settings.py
index 9df785afd1..d81d23a993 100644
--- a/contentcuration/contentcuration/dev_settings.py
+++ b/contentcuration/contentcuration/dev_settings.py
@@ -1,6 +1,4 @@
 # flake8: noqa
-from dotenv import load_dotenv
-
 from .not_production_settings import *  # noqa
 
 DEBUG = True
@@ -8,16 +6,3 @@
 ROOT_URLCONF = "contentcuration.dev_urls"
 
 INSTALLED_APPS += ("drf_yasg",)
-
-REST_FRAMEWORK = {
-    'DEFAULT_PERMISSION_CLASSES': (
-        'rest_framework.permissions.IsAuthenticated',
-    ),
-    'DEFAULT_AUTHENTICATION_CLASSES': (
-        'rest_framework.authentication.SessionAuthentication',
-        'rest_framework.authentication.BasicAuthentication',
-        'rest_framework.authentication.TokenAuthentication',
-    )
-}
-
-load_dotenv(override=True)  # take environment variables from .env.
diff --git a/requirements-dev.in b/requirements-dev.in
index a398514373..8a83c768c5 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -41,4 +41,3 @@ tabulate==0.8.2
 fonttools
 flower==0.9.4
 minio==7.1.1
-python-dotenv
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 1b98ce6bc3..cf06183f09 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -256,8 +256,6 @@ python-dateutil==2.8.1
     # via
     #   -c requirements.txt
     #   faker
-python-dotenv==0.20.0
-    # via -r requirements-dev.in
 python-jsonrpc-server==0.4.0
     # via python-language-server
 python-language-server==0.36.2

From 1a14749e79047fcbe52670419650fd59c2cc3e40 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 22 Jul 2022 15:33:52 +0530
Subject: [PATCH 07/25] Add .envrc to .gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 19bfec72a5..63b5c6d221 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,7 +65,8 @@ docs/_build/
 # PyBuilder
 target/
 
-# virtualenv, pipenv
+# virtualenv, pipenv, direnv
+.envrc
 .env
 venv
 .venv

From bff595cdb78b0c7089bd931b82461e831077df26 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 12 Aug 2022 21:57:44 +0530
Subject: [PATCH 08/25] Add vector search column & indexes, also GiST trigram
 index

---
 .../0138_contentnode_search_vector.py         | 34 +++++++++++++++++++
 contentcuration/contentcuration/models.py     | 19 +++++++++++
 contentcuration/contentcuration/settings.py   |  1 +
 .../contentcuration/utils/publish.py          |  7 +++-
 .../search/viewsets/contentnode.py            | 21 +++---------
 5 files changed, 64 insertions(+), 18 deletions(-)
 create mode 100644 contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py

diff --git a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py
new file mode 100644
index 0000000000..e3b24b8905
--- /dev/null
+++ b/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py
@@ -0,0 +1,34 @@
+# Generated by Django 3.2.13 on 2022-08-10 19:20
+import django.contrib.postgres.indexes
+import django.contrib.postgres.search
+from django.contrib.postgres.operations import AddIndexConcurrently
+from django.contrib.postgres.operations import TrigramExtension
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    atomic = False
+
+    dependencies = [
+        ('contentcuration', '0137_channelhistory'),
+    ]
+
+    operations = [
+        # Installs the pg_trgm module that comes pre-bundled with PostgreSQL 9.6.
+        TrigramExtension(),
+
+        migrations.AddField(
+            model_name='contentnode',
+            name='title_description_search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True),
+        ),
+        AddIndexConcurrently(
+            model_name='contentnode',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['title_description_search_vector'], name='node_search_vector_gin_idx'),
+        ),
+        AddIndexConcurrently(
+            model_name='contenttag',
+            index=django.contrib.postgres.indexes.GistIndex(fields=['tag_name'], name='contenttag_tag_name_gist_idx', opclasses=['gist_trgm_ops']),
+        ),
+    ]
diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index f7fea1aee2..e72e05b012 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -13,6 +13,10 @@
 from django.contrib.auth.base_user import AbstractBaseUser
 from django.contrib.auth.base_user import BaseUserManager
 from django.contrib.auth.models import PermissionsMixin
+from django.contrib.postgres.indexes import GinIndex
+from django.contrib.postgres.indexes import GistIndex
+from django.contrib.postgres.search import SearchVector
+from django.contrib.postgres.search import SearchVectorField
 from django.core.cache import cache
 from django.core.exceptions import MultipleObjectsReturned
 from django.core.exceptions import ObjectDoesNotExist
@@ -1082,6 +1086,9 @@ def delete(self, *args, **kwargs):
             self.secret_token.delete()
 
 
+CONTENT_TAG_NAME__INDEX_NAME = "contenttag_tag_name_gist_idx"
+
+
 class ContentTag(models.Model):
     id = UUIDField(primary_key=True, default=uuid.uuid4)
     tag_name = models.CharField(max_length=50)
@@ -1093,6 +1100,7 @@ def __str__(self):
 
     class Meta:
         unique_together = ['tag_name', 'channel']
+        indexes = [GistIndex(fields=["tag_name"], name=CONTENT_TAG_NAME__INDEX_NAME, opclasses=["gist_trgm_ops"])]
 
 
 def delegate_manager(method):
@@ -1136,6 +1144,12 @@ def __str__(self):
 NODE_ID_INDEX_NAME = "node_id_idx"
 NODE_MODIFIED_INDEX_NAME = "node_modified_idx"
 NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx"
+NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx"
+
+# Ours postgres full text search configuration.
+POSTGRES_FTS_CONFIG = "simple"
+# Search vector to create tsvector of title and description concatenated.
+POSTGRES_SEARCH_VECTOR = SearchVector("title", "description", config=POSTGRES_FTS_CONFIG)
 
 
 class ContentNode(MPTTModel, models.Model):
@@ -1231,6 +1245,10 @@ class ContentNode(MPTTModel, models.Model):
     # this duration should be in seconds.
     suggested_duration = models.IntegerField(blank=True, null=True, help_text="Suggested duration for the content node (in seconds)")
 
+    # A field to store the ts_vector form of (title + ' ' + description).
+    # This significantly increases the search performance.
+    title_description_search_vector = SearchVectorField(blank=True, null=True)
+
     objects = CustomContentNodeTreeManager()
 
     # Track all updates and ignore a blacklist of attributes
@@ -1830,6 +1848,7 @@ class Meta:
         indexes = [
             models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME),
             models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME),
+            GinIndex(fields=["title_description_search_vector"], name=NODE_SEARCH_VECTOR_GIN_INDEX_NAME),
         ]
 
 
diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
index ae637542ba..b79c3b641e 100644
--- a/contentcuration/contentcuration/settings.py
+++ b/contentcuration/contentcuration/settings.py
@@ -86,6 +86,7 @@
     'webpack_loader',
     'django_filters',
     'mathfilters',
+    'django.contrib.postgres',
 )
 
 SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 8dfd5ef428..88b48be574 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -187,6 +187,10 @@ def queue_get_return_none_when_empty():
                 logging.debug("Mapping node with id {id}".format(
                     id=node.pk))
 
+                # Update tsvector for this node.
+                node.title_description_search_vector = ccmodels.POSTGRES_SEARCH_VECTOR
+                node.save(update_fields=["title_description_search_vector"])
+
                 if node.get_descendants(include_self=True).exclude(kind_id=content_kinds.TOPIC).exists() and node.complete:
                     children = (node.children.all())
                     node_queue.extend(children)
@@ -428,7 +432,8 @@ def process_assessment_metadata(ccnode, kolibrinode):
     exercise_data_type = ""
     if exercise_data.get('mastery_model'):
         exercise_data_type = exercise_data.get('mastery_model')
-    if exercise_data.get('option') and exercise_data.get('option').get('completion_criteria') and exercise_data.get('option').get('completion_criteria').get('mastery_model'):
+    if exercise_data.get('option') and exercise_data.get('option').get('completion_criteria') \
+            and exercise_data.get('option').get('completion_criteria').get('mastery_model'):
         exercise_data_type = exercise_data.get('option').get('completion_criteria').get('mastery_model')
 
     mastery_model = {'type': exercise_data_type or exercises.M_OF_N}
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 8fab7f4bbe..864897db25 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -1,5 +1,6 @@
 import re
 
+from django.contrib.postgres.search import SearchQuery
 from django.db.models import ExpressionWrapper
 from django.db.models import F
 from django.db.models import IntegerField
@@ -16,6 +17,7 @@
 from contentcuration.models import Channel
 from contentcuration.models import ContentNode
 from contentcuration.models import File
+from contentcuration.models import POSTGRES_FTS_CONFIG
 from contentcuration.utils.pagination import CachedListPagination
 from contentcuration.viewsets.base import RequiredFilterSet
 from contentcuration.viewsets.base import ValuesViewset
@@ -62,23 +64,8 @@ def filter_channel_list(self, queryset, name, value):
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        filter_query = Q(title__icontains=value) | Q(description__icontains=value)
-
-        tags_node_ids = ContentNode.tags.through.objects.filter(
-            contenttag__tag_name__icontains=value
-        ).values_list("contentnode_id", flat=True)
-
-        # Check if we have a Kolibri node id or ids and add them to the search if so.
-        # Add to, rather than replace, the filters so that we never misinterpret a search term as a UUID.
-        # node_ids = uuid_re.findall(value) + list(tags_node_ids)
-        node_ids = uuid_re.findall(value)
-        for node_id in node_ids:
-            # check for the major ID types
-            filter_query |= Q(node_id=node_id)
-            filter_query |= Q(content_id=node_id)
-            filter_query |= Q(id=node_id)
-
-        return queryset.filter(Q(id__in=list(tags_node_ids)) | filter_query)
+        search_tsquery = SearchQuery(value=value, config=POSTGRES_FTS_CONFIG, search_type="plain")
+        return queryset.filter(title_description_search_vector=search_tsquery)
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(

From c0e55ee704ecf7d7ba997c2b6108233097489653 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 12 Aug 2022 22:21:46 +0530
Subject: [PATCH 09/25] Remove cyclic migration conflicts

---
 ...de_search_vector.py => 0141_contentnode_search_vector.py} | 2 +-
 contentcuration/search/viewsets/contentnode.py               | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)
 rename contentcuration/contentcuration/migrations/{0138_contentnode_search_vector.py => 0141_contentnode_search_vector.py} (95%)

diff --git a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
similarity index 95%
rename from contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py
rename to contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
index e3b24b8905..551f42d5b7 100644
--- a/contentcuration/contentcuration/migrations/0138_contentnode_search_vector.py
+++ b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
@@ -11,7 +11,7 @@ class Migration(migrations.Migration):
     atomic = False
 
     dependencies = [
-        ('contentcuration', '0137_channelhistory'),
+        ('contentcuration', '0140_delete_task'),
     ]
 
     operations = [
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 864897db25..e192a3ccb7 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -24,7 +24,6 @@
 from contentcuration.viewsets.common import NotNullMapArrayAgg
 from contentcuration.viewsets.common import UUIDFilter
 from contentcuration.viewsets.common import UUIDInFilter
-from contentcuration.viewsets.contentnode import get_title
 
 
 class ListPagination(CachedListPagination):
@@ -143,10 +142,6 @@ class SearchContentNodeViewSet(ValuesViewset):
         "original_channel_name",
     )
 
-    field_map = {
-        "title": get_title,
-    }
-
     def get_queryset(self):
         queryset = super(SearchContentNodeViewSet, self).get_queryset()
         return ContentNode._annotate_channel_id(queryset)

From 34e8436886516e92478dce5f364a2774ce63f113 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 12 Aug 2022 22:25:57 +0530
Subject: [PATCH 10/25] Fix wrong indentation happened due to merge conflict

---
 .../contentcuration/utils/publish.py          | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 213682b539..732d92d125 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -198,17 +198,17 @@ def queue_get_return_none_when_empty():
                 children = (node.children.all())
                 node_queue.extend(children)
 
-            kolibrinode = create_bare_contentnode(node, default_language, channel_id, channel_name)
-
-            if node.kind.kind == content_kinds.EXERCISE:
-                exercise_data = process_assessment_metadata(node, kolibrinode)
-                if force_exercises or node.changed or not \
-                        node.files.filter(preset_id=format_presets.EXERCISE).exists():
-                    create_perseus_exercise(node, kolibrinode, exercise_data, user_id=user_id)
-            elif node.kind.kind == content_kinds.SLIDESHOW:
-                create_slideshow_manifest(node, kolibrinode, user_id=user_id)
-            create_associated_file_objects(kolibrinode, node)
-            map_tags_to_node(kolibrinode, node)
+                kolibrinode = create_bare_contentnode(node, default_language, channel_id, channel_name)
+
+                if node.kind.kind == content_kinds.EXERCISE:
+                    exercise_data = process_assessment_metadata(node, kolibrinode)
+                    if force_exercises or node.changed or not \
+                            node.files.filter(preset_id=format_presets.EXERCISE).exists():
+                        create_perseus_exercise(node, kolibrinode, exercise_data, user_id=user_id)
+                elif node.kind.kind == content_kinds.SLIDESHOW:
+                    create_slideshow_manifest(node, kolibrinode, user_id=user_id)
+                create_associated_file_objects(kolibrinode, node)
+                map_tags_to_node(kolibrinode, node)
 
             if progress_tracker:
                 progress_tracker.increment(increment=percent_per_node)

From e00c512f40718858c154e6e3b0584287a836cef8 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Sun, 14 Aug 2022 19:07:20 +0530
Subject: [PATCH 11/25] Add a command for setting tsvectors and fix tests

---
 .../management/commands/set_tsvectors.py      | 71 ++++++++++++++++++
 contentcuration/contentcuration/models.py     | 12 +++
 contentcuration/contentcuration/settings.py   |  3 +-
 contentcuration/search/tests/test_search.py   | 73 ++-----------------
 .../search/viewsets/contentnode.py            | 15 +---
 5 files changed, 96 insertions(+), 78 deletions(-)
 create mode 100644 contentcuration/contentcuration/management/commands/set_tsvectors.py

diff --git a/contentcuration/contentcuration/management/commands/set_tsvectors.py b/contentcuration/contentcuration/management/commands/set_tsvectors.py
new file mode 100644
index 0000000000..6ed9819ff5
--- /dev/null
+++ b/contentcuration/contentcuration/management/commands/set_tsvectors.py
@@ -0,0 +1,71 @@
+"""
+This command sets tsvector in title_description_search_vector field in batches.
+The batches are created on the basis of channel_id. This enables resumption. Also helps
+in cases of failure or memory overflow.
+"""
+import logging as logmodule
+
+from django.core.cache import cache
+from django.core.management.base import BaseCommand
+
+from contentcuration.models import Channel
+from contentcuration.models import ContentNode
+from contentcuration.models import POSTGRES_SEARCH_VECTOR
+
+
+logmodule.basicConfig(level=logmodule.INFO)
+logging = logmodule.getLogger(__name__)
+
+
+UPDATED_TS_VECTORS_CACHE_KEY = "tsvectors_updated_for_channel_ids"
+UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY = "tsvectors_updated_for_null_channels"
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--public",
+            action="store_true",
+            help="Set tsvector for only the public channel nodes instead of all nodes.",
+        )
+        parser.add_argument(
+            "--no-cache",
+            action="store_true",
+            help="Disables the cache. This updates all previously updated nodes.",
+        )
+
+    def handle(self, *args, **options):
+        if options["no_cache"]:
+            updated_channel_ids = []
+            do_update_nodes_with_null_channel_id = True
+        else:
+            updated_channel_ids = [] if cache.get(UPDATED_TS_VECTORS_CACHE_KEY) is None else cache.get(UPDATED_TS_VECTORS_CACHE_KEY)
+            do_update_nodes_with_null_channel_id = not cache.get(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY)
+
+        if options["public"]:
+            to_update_channel_ids = list(Channel.get_public_channels().exclude(id__in=updated_channel_ids).values_list("id", flat=True))
+            do_update_nodes_with_null_channel_id = False
+            logging.info("Started setting tsvector for public channel nodes.")
+        else:
+            to_update_channel_ids = list(Channel.objects.exclude(id__in=updated_channel_ids).values_list("id", flat=True))
+            logging.info("Started setting tsvector for all nodes.")
+
+        annotated_contentnode_qs = ContentNode._annotate_channel_id(ContentNode.objects)
+
+        for channel_id in to_update_channel_ids:
+            logging.info("Setting tsvector for nodes of channel {}.".format(channel_id))
+            annotated_contentnode_qs.filter(channel_id=channel_id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+            updated_channel_ids.append(channel_id)
+            cache.set(UPDATED_TS_VECTORS_CACHE_KEY, updated_channel_ids, None)
+            logging.info("Finished setting tsvector for nodes of channel {}.".format(channel_id))
+
+        if do_update_nodes_with_null_channel_id:
+            logging.info("Setting tsvector for nodes with NULL channel_id.")
+            annotated_contentnode_qs.filter(channel_id__isnull=True).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+            cache.set(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY, True, None)
+            logging.info("Finished setting tsvector for nodes with NULL channel_id.")
+
+        if options["public"]:
+            logging.info("Finished setting tsvector for public channel nodes.")
+        else:
+            logging.info("Finished setting tsvector for all nodes.")
diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index cebbfea959..8b1c331097 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -14,6 +14,7 @@
 from django.contrib.auth.models import PermissionsMixin
 from django.contrib.postgres.indexes import GinIndex
 from django.contrib.postgres.indexes import GistIndex
+from django.contrib.postgres.search import SearchQuery
 from django.contrib.postgres.search import SearchVector
 from django.contrib.postgres.search import SearchVectorField
 from django.contrib.sessions.models import Session
@@ -1356,6 +1357,12 @@ def filter_view_queryset(cls, queryset, user):
             | Q(public=True)
         )
 
+    @classmethod
+    def search(self, queryset, search_term):
+        search_query = Q(title_description_search_vector=SearchQuery(value=search_term, config=POSTGRES_FTS_CONFIG, search_type="plain"))
+        tags_query = Q(tags__tag_name__icontains=search_term)
+        return queryset.filter(search_query | tags_query)
+
     @raise_if_unsaved
     def get_root(self):
         # Only topics can be root nodes
@@ -1839,8 +1846,10 @@ def set_default_learning_activity(self):
 
     def save(self, skip_lock=False, *args, **kwargs):
         if self._state.adding:
+            is_create = True
             self.on_create()
         else:
+            is_create = False
             self.on_update()
 
         # Logic borrowed from mptt - do a simple check to see if we have changed
@@ -1884,6 +1893,9 @@ def save(self, skip_lock=False, *args, **kwargs):
             if changed_ids:
                 ContentNode.objects.filter(id__in=changed_ids).update(changed=True)
 
+        if is_create:
+            ContentNode.filter_by_pk(pk=self.id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+
     # Copied from MPTT
     save.alters_data = True
 
diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
index 92ed582a57..1e3cdb1e2c 100644
--- a/contentcuration/contentcuration/settings.py
+++ b/contentcuration/contentcuration/settings.py
@@ -221,7 +221,8 @@
 }
 
 IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False
-
+TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES") or False
+TSVECTOR_SET_FOR_ALL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_NODES") or False
 
 DATABASE_ROUTERS = [
     "kolibri_content.router.ContentDBRouter",
diff --git a/contentcuration/search/tests/test_search.py b/contentcuration/search/tests/test_search.py
index bcb3b2aa99..6b3d34cc41 100644
--- a/contentcuration/search/tests/test_search.py
+++ b/contentcuration/search/tests/test_search.py
@@ -31,67 +31,14 @@ def test_filter_channels_by_edit(self):
         self.assertEqual(response.status_code, 200, response.content)
         self.assertNotEqual(response.data["results"], [])
 
-    def test_search_result(self):
-        # Create two users
-        user_a = testdata.user(email="a@a.com")
-        user_b = testdata.user(email="b@b.com")
-
-        # Create two channels with two editors
-        test_tree_data = {
-            "node_id": "00000000000000000000000000000000",
-            "title": "Root topic node",
-            "kind_id": "topic",
-            "children": [
-                {
-                    "node_id": "00000000000000000000000000000001",
-                    "title": "Kolibri video",
-                    "kind_id": "video",
-                },
-            ]
-        }
-
-        channel_a = testdata.channel(name="user_a_channel", main_tree_data=test_tree_data)
-        channel_a.editors.add(user_a)
-
-        channel_b = testdata.channel(name="user_b_channel", create_main_tree=False)
-        channel_b.editors.add(user_b)
-
-        # Publish channel_a
-        channel_a.main_tree.publishing = False
-        channel_a.main_tree.changed = False
-        channel_a.main_tree.published = True
-        channel_a.main_tree.save()
-        channel_a.public = True
-        channel_a.save()
-
-        # Import resources from channel_a to channel_b
-        channel_a.main_tree.copy_to(channel_b.main_tree, batch_size=1)
-        channel_b.main_tree.refresh_from_db()
-
-        # Send request from user_b to the search endpoint
-        self.client.force_authenticate(user=user_b)
-        response = self.client.get(
-            reverse("search-list"),
-            data={
-                "channel_list": "public",
-                "keywords": "video"
-            },
-            format="json",
-        )
-
-        # Assert whether the location_ids are of accessible nodes or not
-        kolibri_video_node = channel_b.main_tree.get_descendants().filter(title="Kolibri video").first()
-
-        # The ids in location_ids should be of channel_b's ContentNode only
-        self.assertEqual(len(response.data["results"][0]["location_ids"]), 1)
-        self.assertEqual(response.data["results"][0]["location_ids"][0], kolibri_video_node.cloned_source_id)
-
-    def test_channel_list_filter_and_location_ids(self):
+    def test_search(self):
         users = []
         channels = []
+
         for i in range(4):
             user = testdata.user(email="a{}@a.com".format(i))
             users.append(user)
+
             channel = Channel.objects.create(name="user_a{}_channel".format(i))
             channel.save()
             channels.append(channel)
@@ -99,6 +46,7 @@ def test_channel_list_filter_and_location_ids(self):
 
         public_channel, editable_channel, viewable_channel, inaccessible_channel = channels
 
+        # Create public video node and publish it.
         public_video_node = testdata.node({
             "title": "Kolibri video",
             "kind_id": "video",
@@ -108,13 +56,14 @@ def test_channel_list_filter_and_location_ids(self):
         public_video_node.changed = False
         public_video_node.save()
 
+        # Publish the public_channel.
         public_channel.main_tree.published = True
         public_channel.main_tree.changed = False
         public_channel.main_tree.save()
-
         public_channel.public = True
         public_channel.save()
 
+        # Set user_b viewable channel.
         user_b = users[1]
         viewable_channel.viewers.add(user_b)
 
@@ -123,6 +72,7 @@ def test_channel_list_filter_and_location_ids(self):
         public_video_node.copy_to(target=viewable_channel.main_tree)
         public_video_node.copy_to(target=inaccessible_channel.main_tree)
 
+        # Get different nodes based on access.
         editable_channel.main_tree.refresh_from_db()
         editable_video_node = editable_channel.main_tree.get_descendants().first()
         viewable_channel.main_tree.refresh_from_db()
@@ -130,7 +80,7 @@ def test_channel_list_filter_and_location_ids(self):
         inaccessible_channel.main_tree.refresh_from_db()
         inaccessible_video_node = inaccessible_channel.main_tree.get_descendants().first()
 
-        # Send request from user_b to the search endpoint
+        # Send request from user_b to the search endpoint.
         self.client.force_authenticate(user=user_b)
 
         for channel_list in ("public", "edit", "view"):
@@ -152,10 +102,3 @@ def test_channel_list_filter_and_location_ids(self):
                     self.assertEqual(result["id"], editable_video_node.id)
                 elif channel_list == "view":
                     self.assertEqual(result["id"], viewable_video_node.id)
-
-                location_ids = result["location_ids"]
-                self.assertEqual(len(location_ids), 3)
-                self.assertIn(editable_video_node.id, location_ids)
-                self.assertIn(viewable_video_node.id, location_ids)
-                self.assertIn(public_video_node.id, location_ids)
-                self.assertNotIn(inaccessible_video_node.id, location_ids)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index e192a3ccb7..df5cf831cb 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -1,6 +1,5 @@
 import re
 
-from django.contrib.postgres.search import SearchQuery
 from django.db.models import ExpressionWrapper
 from django.db.models import F
 from django.db.models import IntegerField
@@ -17,7 +16,6 @@
 from contentcuration.models import Channel
 from contentcuration.models import ContentNode
 from contentcuration.models import File
-from contentcuration.models import POSTGRES_FTS_CONFIG
 from contentcuration.utils.pagination import CachedListPagination
 from contentcuration.viewsets.base import RequiredFilterSet
 from contentcuration.viewsets.base import ValuesViewset
@@ -32,9 +30,6 @@ class ListPagination(CachedListPagination):
     max_page_size = 100
 
 
-uuid_re = re.compile("([a-f0-9]{32})")
-
-
 class ContentNodeFilter(RequiredFilterSet):
     keywords = CharFilter(method="filter_keywords")
     languages = CharFilter(method="filter_languages")
@@ -63,8 +58,7 @@ def filter_channel_list(self, queryset, name, value):
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        search_tsquery = SearchQuery(value=value, config=POSTGRES_FTS_CONFIG, search_type="plain")
-        return queryset.filter(title_description_search_vector=search_tsquery)
+        return ContentNode.search(queryset=queryset, search_term=value)
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(
@@ -115,7 +109,6 @@ class Meta:
 
 
 class SearchContentNodeViewSet(ValuesViewset):
-    queryset = ContentNode.objects.all()
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
     permission_classes = [IsAuthenticated]
@@ -143,13 +136,11 @@ class SearchContentNodeViewSet(ValuesViewset):
     )
 
     def get_queryset(self):
-        queryset = super(SearchContentNodeViewSet, self).get_queryset()
-        return ContentNode._annotate_channel_id(queryset)
+        return ContentNode._annotate_channel_id(ContentNode.objects)
 
     def annotate_queryset(self, queryset):
         """
-        1. Do a distinct by 'content_id,' using the original node if possible
-        2. Annotate lists of content node and channel pks
+        Annotates thumbnails, resources count and channel name.
         """
         thumbnails = File.objects.filter(
             contentnode=OuterRef("id"), preset__thumbnail=True

From f3280d94432d8d54fdb1070b286b6b35ad849cf5 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Sun, 14 Aug 2022 20:43:13 +0530
Subject: [PATCH 12/25] Remove grade_level default to pass failing tests

---
 contentcuration/contentcuration/settings.py       | 2 --
 contentcuration/contentcuration/tests/testdata.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
index 1e3cdb1e2c..a8e5e8678b 100644
--- a/contentcuration/contentcuration/settings.py
+++ b/contentcuration/contentcuration/settings.py
@@ -221,8 +221,6 @@
 }
 
 IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False
-TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_PUBLIC_CHANNEL_NODES") or False
-TSVECTOR_SET_FOR_ALL_NODES = os.getenv("TSVECTOR_SET_FOR_ALL_NODES") or False
 
 DATABASE_ROUTERS = [
     "kolibri_content.router.ContentDBRouter",
diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
index b857dfb513..5560e853a5 100644
--- a/contentcuration/contentcuration/tests/testdata.py
+++ b/contentcuration/contentcuration/tests/testdata.py
@@ -145,7 +145,6 @@ def node(data, parent=None):
             sort_order=data.get('sort_order', 1),
             complete=True,
             extra_fields=data.get('extra_fields'),
-            grade_levels="{}",
         )
         new_node.save()
         video_file = fileobj_video(contents=b"Video File")
@@ -172,7 +171,6 @@ def node(data, parent=None):
             content_id=data.get('content_id') or data['node_id'],
             sort_order=data.get('sort_order', 1),
             complete=True,
-            grade_levels="{}",
         )
 
         new_node.save()

From 3ff0edd040d1ba5a2ab0f958b4e173152821c85a Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 8 Sep 2022 13:02:32 +0530
Subject: [PATCH 13/25] Full text search models and data migrations

---
 .../contentcuration/debug_panel_settings.py   |  5 +-
 .../management/commands/set_tsvectors.py      | 71 -------------------
 .../0141_contentnode_search_vector.py         | 34 ---------
 contentcuration/contentcuration/models.py     | 27 -------
 .../contentcuration/viewsets/contentnode.py   |  3 +-
 contentcuration/search/admin.py               |  1 -
 contentcuration/search/management/__init__.py |  0
 .../search/management/commands/__init__.py    |  0
 .../commands/set_channel_tsvectors.py         | 57 +++++++++++++++
 .../commands/set_contentnode_tsvectors.py     | 60 ++++++++++++++++
 .../search/migrations/0003_fulltextsearch.py  | 54 ++++++++++++++
 contentcuration/search/models.py              | 53 ++++++++++++++
 12 files changed, 228 insertions(+), 137 deletions(-)
 delete mode 100644 contentcuration/contentcuration/management/commands/set_tsvectors.py
 delete mode 100644 contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
 delete mode 100644 contentcuration/search/admin.py
 create mode 100644 contentcuration/search/management/__init__.py
 create mode 100644 contentcuration/search/management/commands/__init__.py
 create mode 100644 contentcuration/search/management/commands/set_channel_tsvectors.py
 create mode 100644 contentcuration/search/management/commands/set_contentnode_tsvectors.py
 create mode 100644 contentcuration/search/migrations/0003_fulltextsearch.py

diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py
index 5dc3c6783f..c097acbbc6 100644
--- a/contentcuration/contentcuration/debug_panel_settings.py
+++ b/contentcuration/contentcuration/debug_panel_settings.py
@@ -4,9 +4,10 @@
 EXCLUDED_DEBUG_URLS = [
     "/content/storage",
 
-    # Disabling task API because as soon as the task API gets polled
+    # Disabling sync API because as soon as the sync API gets polled
     # the current request data gets overwritten.
-    "/api/task",
+    # Can be removed after websockets deployment.
+    "/api/sync",
 ]
 
 DEBUG_PANEL_ACTIVE = True
diff --git a/contentcuration/contentcuration/management/commands/set_tsvectors.py b/contentcuration/contentcuration/management/commands/set_tsvectors.py
deleted file mode 100644
index 6ed9819ff5..0000000000
--- a/contentcuration/contentcuration/management/commands/set_tsvectors.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-This command sets tsvector in title_description_search_vector field in batches.
-The batches are created on the basis of channel_id. This enables resumption. Also helps
-in cases of failure or memory overflow.
-"""
-import logging as logmodule
-
-from django.core.cache import cache
-from django.core.management.base import BaseCommand
-
-from contentcuration.models import Channel
-from contentcuration.models import ContentNode
-from contentcuration.models import POSTGRES_SEARCH_VECTOR
-
-
-logmodule.basicConfig(level=logmodule.INFO)
-logging = logmodule.getLogger(__name__)
-
-
-UPDATED_TS_VECTORS_CACHE_KEY = "tsvectors_updated_for_channel_ids"
-UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY = "tsvectors_updated_for_null_channels"
-
-
-class Command(BaseCommand):
-    def add_arguments(self, parser):
-        parser.add_argument(
-            "--public",
-            action="store_true",
-            help="Set tsvector for only the public channel nodes instead of all nodes.",
-        )
-        parser.add_argument(
-            "--no-cache",
-            action="store_true",
-            help="Disables the cache. This updates all previously updated nodes.",
-        )
-
-    def handle(self, *args, **options):
-        if options["no_cache"]:
-            updated_channel_ids = []
-            do_update_nodes_with_null_channel_id = True
-        else:
-            updated_channel_ids = [] if cache.get(UPDATED_TS_VECTORS_CACHE_KEY) is None else cache.get(UPDATED_TS_VECTORS_CACHE_KEY)
-            do_update_nodes_with_null_channel_id = not cache.get(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY)
-
-        if options["public"]:
-            to_update_channel_ids = list(Channel.get_public_channels().exclude(id__in=updated_channel_ids).values_list("id", flat=True))
-            do_update_nodes_with_null_channel_id = False
-            logging.info("Started setting tsvector for public channel nodes.")
-        else:
-            to_update_channel_ids = list(Channel.objects.exclude(id__in=updated_channel_ids).values_list("id", flat=True))
-            logging.info("Started setting tsvector for all nodes.")
-
-        annotated_contentnode_qs = ContentNode._annotate_channel_id(ContentNode.objects)
-
-        for channel_id in to_update_channel_ids:
-            logging.info("Setting tsvector for nodes of channel {}.".format(channel_id))
-            annotated_contentnode_qs.filter(channel_id=channel_id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
-            updated_channel_ids.append(channel_id)
-            cache.set(UPDATED_TS_VECTORS_CACHE_KEY, updated_channel_ids, None)
-            logging.info("Finished setting tsvector for nodes of channel {}.".format(channel_id))
-
-        if do_update_nodes_with_null_channel_id:
-            logging.info("Setting tsvector for nodes with NULL channel_id.")
-            annotated_contentnode_qs.filter(channel_id__isnull=True).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
-            cache.set(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY, True, None)
-            logging.info("Finished setting tsvector for nodes with NULL channel_id.")
-
-        if options["public"]:
-            logging.info("Finished setting tsvector for public channel nodes.")
-        else:
-            logging.info("Finished setting tsvector for all nodes.")
diff --git a/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
deleted file mode 100644
index 551f42d5b7..0000000000
--- a/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Generated by Django 3.2.13 on 2022-08-10 19:20
-import django.contrib.postgres.indexes
-import django.contrib.postgres.search
-from django.contrib.postgres.operations import AddIndexConcurrently
-from django.contrib.postgres.operations import TrigramExtension
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    atomic = False
-
-    dependencies = [
-        ('contentcuration', '0140_delete_task'),
-    ]
-
-    operations = [
-        # Installs the pg_trgm module that comes pre-bundled with PostgreSQL 9.6.
-        TrigramExtension(),
-
-        migrations.AddField(
-            model_name='contentnode',
-            name='title_description_search_vector',
-            field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True),
-        ),
-        AddIndexConcurrently(
-            model_name='contentnode',
-            index=django.contrib.postgres.indexes.GinIndex(fields=['title_description_search_vector'], name='node_search_vector_gin_idx'),
-        ),
-        AddIndexConcurrently(
-            model_name='contenttag',
-            index=django.contrib.postgres.indexes.GistIndex(fields=['tag_name'], name='contenttag_tag_name_gist_idx', opclasses=['gist_trgm_ops']),
-        ),
-    ]
diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index 8b1c331097..a2a4af786d 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -12,11 +12,6 @@
 from django.contrib.auth.base_user import AbstractBaseUser
 from django.contrib.auth.base_user import BaseUserManager
 from django.contrib.auth.models import PermissionsMixin
-from django.contrib.postgres.indexes import GinIndex
-from django.contrib.postgres.indexes import GistIndex
-from django.contrib.postgres.search import SearchQuery
-from django.contrib.postgres.search import SearchVector
-from django.contrib.postgres.search import SearchVectorField
 from django.contrib.sessions.models import Session
 from django.core.cache import cache
 from django.core.exceptions import MultipleObjectsReturned
@@ -1111,7 +1106,6 @@ def __str__(self):
 
     class Meta:
         unique_together = ['tag_name', 'channel']
-        indexes = [GistIndex(fields=["tag_name"], name=CONTENT_TAG_NAME__INDEX_NAME, opclasses=["gist_trgm_ops"])]
 
 
 def delegate_manager(method):
@@ -1156,11 +1150,6 @@ def __str__(self):
 NODE_MODIFIED_INDEX_NAME = "node_modified_idx"
 NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx"
 NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx"
-
-# Ours postgres full text search configuration.
-POSTGRES_FTS_CONFIG = "simple"
-# Search vector to create tsvector of title and description concatenated.
-POSTGRES_SEARCH_VECTOR = SearchVector("title", "description", config=POSTGRES_FTS_CONFIG)
 CONTENTNODE_TREE_ID_CACHE_KEY = "contentnode_{pk}__tree_id"
 
 
@@ -1257,10 +1246,6 @@ class ContentNode(MPTTModel, models.Model):
     # this duration should be in seconds.
     suggested_duration = models.IntegerField(blank=True, null=True, help_text="Suggested duration for the content node (in seconds)")
 
-    # A field to store the ts_vector form of (title + ' ' + description).
-    # This significantly increases the search performance.
-    title_description_search_vector = SearchVectorField(blank=True, null=True)
-
     objects = CustomContentNodeTreeManager()
 
     # Track all updates and ignore a blacklist of attributes
@@ -1357,12 +1342,6 @@ def filter_view_queryset(cls, queryset, user):
             | Q(public=True)
         )
 
-    @classmethod
-    def search(self, queryset, search_term):
-        search_query = Q(title_description_search_vector=SearchQuery(value=search_term, config=POSTGRES_FTS_CONFIG, search_type="plain"))
-        tags_query = Q(tags__tag_name__icontains=search_term)
-        return queryset.filter(search_query | tags_query)
-
     @raise_if_unsaved
     def get_root(self):
         # Only topics can be root nodes
@@ -1846,10 +1825,8 @@ def set_default_learning_activity(self):
 
     def save(self, skip_lock=False, *args, **kwargs):
         if self._state.adding:
-            is_create = True
             self.on_create()
         else:
-            is_create = False
             self.on_update()
 
         # Logic borrowed from mptt - do a simple check to see if we have changed
@@ -1893,9 +1870,6 @@ def save(self, skip_lock=False, *args, **kwargs):
             if changed_ids:
                 ContentNode.objects.filter(id__in=changed_ids).update(changed=True)
 
-        if is_create:
-            ContentNode.filter_by_pk(pk=self.id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
-
     # Copied from MPTT
     save.alters_data = True
 
@@ -1938,7 +1912,6 @@ class Meta:
         indexes = [
             models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME),
             models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME),
-            GinIndex(fields=["title_description_search_vector"], name=NODE_SEARCH_VECTOR_GIN_INDEX_NAME),
         ]
 
 
diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py
index dd942b161e..cc6ad0727f 100644
--- a/contentcuration/contentcuration/viewsets/contentnode.py
+++ b/contentcuration/contentcuration/viewsets/contentnode.py
@@ -633,8 +633,7 @@ def delete_from_changes(self, changes):
 
 
 def dict_if_none(obj, field_name=None):
-    value = obj.get(field_name)
-    return value if value else {}
+    return obj[field_name] if obj[field_name] else {}
 
 
 # Apply mixin first to override ValuesViewset
diff --git a/contentcuration/search/admin.py b/contentcuration/search/admin.py
deleted file mode 100644
index 846f6b4061..0000000000
--- a/contentcuration/search/admin.py
+++ /dev/null
@@ -1 +0,0 @@
-# Register your models here.
diff --git a/contentcuration/search/management/__init__.py b/contentcuration/search/management/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/search/management/commands/__init__.py b/contentcuration/search/management/commands/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py
new file mode 100644
index 0000000000..4d27927f53
--- /dev/null
+++ b/contentcuration/search/management/commands/set_channel_tsvectors.py
@@ -0,0 +1,57 @@
+"""
+This command inserts in bulk channel tsvectors to the ChannelFullTextSearch table.
+"""
+import logging as logmodule
+import time
+
+from django.core.management.base import BaseCommand
+from django.db.models import Exists
+from django.db.models import OuterRef
+from search.models import CHANNEL_KEYWORDS_TSVECTOR
+from search.models import ChannelFullTextSearch
+
+from contentcuration.models import Channel
+from contentcuration.viewsets.channel import primary_token_subquery
+
+
+logmodule.basicConfig(level=logmodule.INFO)
+logging = logmodule.getLogger("command")
+
+CHUNKSIZE = 5000
+
+
+class Command(BaseCommand):
+
+    def handle(self, *args, **options):
+        start = time.time()
+
+        channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id")))
+
+        channel_query = (Channel.objects.select_related("main_tree")
+                         .filter(channel_not_already_inserted_query, deleted=False, main_tree__published=True)
+                         .annotate(primary_channel_token=primary_token_subquery,
+                                   keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+                         .values("id", "keywords_tsvector"))
+
+        insertable_channels = list(channel_query[:CHUNKSIZE])
+        total_channel_tsvectors_inserted = 0
+
+        while insertable_channels:
+            logging.info("Inserting channel tsvectors.")
+
+            insert_objs = list()
+            for channel in insertable_channels:
+                obj = ChannelFullTextSearch(channel_id=channel["id"], keywords_tsvector=channel["keywords_tsvector"])
+                insert_objs.append(obj)
+
+            inserted_objs_list = ChannelFullTextSearch.objects.bulk_create(insert_objs)
+
+            current_inserts_count = len(inserted_objs_list)
+            total_channel_tsvectors_inserted = total_channel_tsvectors_inserted + current_inserts_count
+
+            logging.info("Inserted {} channel tsvectors.".format(current_inserts_count))
+
+            insertable_channels = list(channel_query[:CHUNKSIZE])
+
+        logging.info("Completed! successfully inserted total of {} channel tsvectors in {} seconds.".format(
+            total_channel_tsvectors_inserted, time.time() - start))
diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
new file mode 100644
index 0000000000..58cf9350e8
--- /dev/null
+++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
@@ -0,0 +1,60 @@
+"""
+This command inserts in bulk contentnode tsvectors to the ContentNodeFullTextSearch table.
+"""
+import logging as logmodule
+import time
+
+from django.contrib.postgres.aggregates import StringAgg
+from django.core.management.base import BaseCommand
+from django.db.models import Exists
+from django.db.models import OuterRef
+from search.models import CONTENTNODE_AUTHOR_TSVECTOR
+from search.models import CONTENTNODE_KEYWORDS_TSVECTOR
+from search.models import ContentNodeFullTextSearch
+
+from contentcuration.models import ContentNode
+
+
+logmodule.basicConfig(level=logmodule.INFO)
+logging = logmodule.getLogger("command")
+
+CHUNKSIZE = 10000
+
+
+class Command(BaseCommand):
+
+    def handle(self, *args, **options):
+        start = time.time()
+
+        tsvector_not_already_inserted_query = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id")))
+
+        tsvector_node_query = (ContentNode._annotate_channel_id(ContentNode.objects)
+                               .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+                                         keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+                                         author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
+                               .filter(tsvector_not_already_inserted_query, published=True)
+                               .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by())
+
+        insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE])
+        total_tsvectors_inserted = 0
+
+        while insertable_nodes_tsvector:
+            logging.info("Inserting contentnode tsvectors.")
+
+            insert_objs = list()
+            for node in insertable_nodes_tsvector:
+                if node["channel_id"]:
+                    obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
+                                                    keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
+                    insert_objs.append(obj)
+
+            inserted_objs_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
+
+            current_inserts_count = len(inserted_objs_list)
+            total_tsvectors_inserted = total_tsvectors_inserted + current_inserts_count
+
+            logging.info("Inserted {} contentnode tsvectors.".format(current_inserts_count))
+
+            insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE])
+
+        logging.info("Completed! Successfully inserted total of {} contentnode tsvectors in {} seconds.".format(total_tsvectors_inserted, time.time() - start))
diff --git a/contentcuration/search/migrations/0003_fulltextsearch.py b/contentcuration/search/migrations/0003_fulltextsearch.py
new file mode 100644
index 0000000000..2885a4655f
--- /dev/null
+++ b/contentcuration/search/migrations/0003_fulltextsearch.py
@@ -0,0 +1,54 @@
+# Generated by Django 3.2.14 on 2022-09-08 07:19
+import uuid
+
+import django.contrib.postgres.indexes
+import django.contrib.postgres.search
+import django.db.models.deletion
+from django.contrib.postgres.operations import AddIndexConcurrently
+from django.db import migrations
+from django.db import models
+
+import contentcuration.models
+
+
+class Migration(migrations.Migration):
+
+    atomic = False
+
+    dependencies = [
+        ('contentcuration', '0140_delete_task'),
+        ('search', '0002_auto_20201215_2110'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='ContentNodeFullTextSearch',
+            fields=[
+                ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)),
+                ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
+                ('author_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
+                ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_nodes_fts', to='contentcuration.channel')),
+                ('contentnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')),
+            ],
+        ),
+        migrations.CreateModel(
+            name='ChannelFullTextSearch',
+            fields=[
+                ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)),
+                ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
+                ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')),
+            ],
+        ),
+        AddIndexConcurrently(
+            model_name='contentnodefulltextsearch',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['keywords_tsvector'], name='node_keywords_tsv__gin_idx'),
+        ),
+        AddIndexConcurrently(
+            model_name='contentnodefulltextsearch',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['author_tsvector'], name='node_author_tsv__gin_idx'),
+        ),
+        AddIndexConcurrently(
+            model_name='channelfulltextsearch',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['keywords_tsvector'], name='channel_keywords_tsv__gin_idx'),
+        ),
+    ]
diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py
index e1e550576b..64b0472c8a 100644
--- a/contentcuration/search/models.py
+++ b/contentcuration/search/models.py
@@ -1,8 +1,15 @@
 import uuid
 
 from django.conf import settings
+from django.contrib.postgres.indexes import GinIndex
+from django.contrib.postgres.search import SearchVector
+from django.contrib.postgres.search import SearchVectorField
 from django.db import models
 
+from contentcuration.models import Channel
+from contentcuration.models import ContentNode
+from contentcuration.models import UUIDField as StudioUUIDField
+
 
 class SavedSearch(models.Model):
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@@ -13,3 +20,49 @@ class SavedSearch(models.Model):
     saved_by = models.ForeignKey(
         settings.AUTH_USER_MODEL, related_name="searches", on_delete=models.CASCADE
     )
+
+
+POSTGRES_FTS_CONFIG = "simple"
+
+CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags")
+CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
+CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
+CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+
+class ContentNodeFullTextSearch(models.Model):
+    id = StudioUUIDField(primary_key=True, default=uuid.uuid4)
+
+    # The contentnode that this record points to.
+    contentnode = models.ForeignKey(ContentNode, on_delete=models.CASCADE, related_name="node_fts")
+
+    # The channel to which the contentnode belongs. Channel cannot be NULL because we only allow
+    # searches to be made inside channels.
+    channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_nodes_fts")
+
+    # This stores the keywords as tsvector.
+    keywords_tsvector = SearchVectorField(null=True, blank=True)
+
+    # This stores the author as tsvector.
+    author_tsvector = SearchVectorField(null=True, blank=True)
+
+    class Meta:
+        indexes = [GinIndex(fields=["keywords_tsvector"], name="node_keywords_tsv__gin_idx"),
+                   GinIndex(fields=["author_tsvector"], name="node_author_tsv__gin_idx")]
+
+
+class ChannelFullTextSearch(models.Model):
+    id = StudioUUIDField(primary_key=True, default=uuid.uuid4)
+
+    # The channel to which this record points.
+    channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_fts")
+
+    # This stores the channel keywords as tsvector for super fast searches.
+    keywords_tsvector = SearchVectorField(null=True, blank=True)
+
+    class Meta:
+        indexes = [GinIndex(fields=["keywords_tsvector"], name="channel_keywords_tsv__gin_idx")]

From 974b69e8edae9d039edce8b603ea8f5001ce70e0 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 8 Sep 2022 13:18:32 +0530
Subject: [PATCH 14/25] Resolve conflicts and remove old index refs

---
 contentcuration/contentcuration/models.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index a2a4af786d..33ef23355a 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -1092,9 +1092,6 @@ def delete(self, *args, **kwargs):
             self.secret_token.delete()
 
 
-CONTENT_TAG_NAME__INDEX_NAME = "contenttag_tag_name_gist_idx"
-
-
 class ContentTag(models.Model):
     id = UUIDField(primary_key=True, default=uuid.uuid4)
     tag_name = models.CharField(max_length=50)
@@ -1149,7 +1146,6 @@ def __str__(self):
 NODE_ID_INDEX_NAME = "node_id_idx"
 NODE_MODIFIED_INDEX_NAME = "node_modified_idx"
 NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx"
-NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx"
 CONTENTNODE_TREE_ID_CACHE_KEY = "contentnode_{pk}__tree_id"
 
 

From e15b015f17f11b703f98fcf667b2464e1aaf0576 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Sat, 10 Sep 2022 15:25:19 +0530
Subject: [PATCH 15/25] feat: full text search!

---
 .../contentcuration/utils/publish.py          |  75 +++++++++++
 .../contentcuration/viewsets/channel.py       |  26 ++--
 contentcuration/search/constants.py           |  23 ++++
 .../commands/set_channel_tsvectors.py         |   2 +-
 .../commands/set_contentnode_tsvectors.py     |   4 +-
 contentcuration/search/models.py              |  13 --
 contentcuration/search/serializers.py         |  18 ---
 contentcuration/search/utils.py               |   9 ++
 .../search/viewsets/contentnode.py            | 123 ++++++++++--------
 9 files changed, 185 insertions(+), 108 deletions(-)
 create mode 100644 contentcuration/search/constants.py
 delete mode 100644 contentcuration/search/serializers.py
 create mode 100644 contentcuration/search/utils.py

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 66e8b37e49..afe4abdef4 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -14,13 +14,17 @@
 from itertools import chain
 
 from django.conf import settings
+from django.contrib.postgres.aggregates import StringAgg
 from django.core.files import File
 from django.core.files.storage import default_storage as storage
 from django.core.management import call_command
 from django.db.models import Count
+from django.db.models import Exists
 from django.db.models import Max
+from django.db.models import OuterRef
 from django.db.models import Q
 from django.db.models import Sum
+from django.db.models import Value
 from django.db.utils import IntegrityError
 from django.template.loader import render_to_string
 from django.utils import timezone
@@ -37,6 +41,13 @@
 from le_utils.constants import roles
 from past.builtins import basestring
 from past.utils import old_div
+from search.constants import CHANNEL_KEYWORDS_TSVECTOR
+from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
+from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
+from search.constants import CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR
+from search.constants import CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR
+from search.models import ChannelFullTextSearch
+from search.models import ContentNodeFullTextSearch
 
 from contentcuration import models as ccmodels
 from contentcuration.decorators import delay_user_storage_calculation
@@ -808,6 +819,64 @@ def fill_published_fields(channel, version_notes):
     channel.save()
 
 
+def create_or_update_tsvectors(channel_id):
+    """
+    Create or update tsvectors for the channel and all its content nodes.
+    """
+    # Update or create channel tsvector entry.
+    logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id))
+
+    from contentcuration.viewsets.channel import primary_token_subquery
+
+    channel = (ccmodels.Channel.objects
+               .annotate(primary_channel_token=primary_token_subquery,
+                         keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+               .get(pk=channel_id))
+
+    if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists():
+        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel.keywords_tsvector)
+        logging.info("Updated {} channel tsvector.".format(update_count))
+    else:
+        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel.keywords_tsvector)
+        obj.save()
+        logging.info("Created 1 channel tsvector.")
+
+    # Update or create contentnodes tsvector entry for channel_id.
+    logging.info("Setting tsvectors for all contentnodes in channel {}.".format(channel_id))
+
+    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
+
+        # First, delete nodes that are no longer in main_tree.
+        nodes_no_longer_in_main_tree = ~Exists(channel.main_tree.get_family().filter(id=OuterRef("contentnode_id")))
+        ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete()
+
+        # Now, all remaining nodes are in main_tree, so let's update them.
+        update_count = (ContentNodeFullTextSearch.objects.filter(channel_id=channel_id)
+                        .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "))
+                        .update(keywords_tsvector=CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR))
+
+    # Insert newly created nodes.
+    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id")))
+    nodes_to_insert = (channel.main_tree.get_family()
+                       .filter(nodes_not_having_tsvector_record)
+                       .annotate(channel_id=Value(channel_id),
+                                 contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+                                 keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+                                 author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
+                       .values("id", "channel_id", "keywords_tsvector", "author_tsvector"))
+
+    insert_objs = list()
+
+    for node in nodes_to_insert:
+        obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
+                                        keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
+        insert_objs.append(obj)
+
+    inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
+
+    logging.info("Successfully inserted {} and updated {} contentnode tsvectors.".format(len(inserted_nodes_list), update_count))
+
+
 @delay_user_storage_calculation
 def publish_channel(
     user_id,
@@ -818,6 +887,8 @@ def publish_channel(
     send_email=False,
     progress_tracker=None,
     language=settings.LANGUAGE_CODE,
+
+
 ):
     """
     :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None
@@ -843,6 +914,10 @@ def publish_channel(
         if channel.public:
             delete_public_channel_cache_keys()
 
+        # Enqueue tsvector task to update or create channel tsvectors and all its
+        # contentnodes tsvector entries.
+        create_or_update_tsvectors(channel_id=channel_id)
+
         if send_email:
             with override(language):
                 send_emails(channel, user_id, version_notes=version_notes)
diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py
index 517e66f0fe..ca5c487e0a 100644
--- a/contentcuration/contentcuration/viewsets/channel.py
+++ b/contentcuration/contentcuration/viewsets/channel.py
@@ -24,6 +24,9 @@
 from rest_framework.serializers import CharField
 from rest_framework.serializers import FloatField
 from rest_framework.serializers import IntegerField
+from search.models import ChannelFullTextSearch
+from search.models import ContentNodeFullTextSearch
+from search.utils import get_fts_search_query
 
 from contentcuration.decorators import cache_no_user_data
 from contentcuration.models import Change
@@ -119,23 +122,12 @@ def filter_deleted(self, queryset, name, value):
         return queryset.filter(deleted=value)
 
     def filter_keywords(self, queryset, name, value):
-        # TODO: Wait until we show more metadata on cards to add this back in
-        # keywords_query = self.main_tree_query.filter(
-        #     Q(tags__tag_name__icontains=value)
-        #     | Q(author__icontains=value)
-        #     | Q(aggregator__icontains=value)
-        #     | Q(provider__icontains=value)
-        # )
-        return queryset.annotate(
-            # keyword_match_count=SQCount(keywords_query, field="content_id"),
-            primary_token=primary_token_subquery,
-        ).filter(
-            Q(name__icontains=value)
-            | Q(description__icontains=value)
-            | Q(pk__istartswith=value)
-            | Q(primary_token=value.replace("-", ""))
-            # | Q(keyword_match_count__gt=0)
-        )
+        channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter(
+            keywords_tsvector=get_fts_search_query(value.replace("-", "")), channel_id=OuterRef("id"))))
+        contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter(
+            Q(keywords_tsvector=get_fts_search_query(value)) | Q(author_tsvector=get_fts_search_query(value)), channel_id=OuterRef("id"))))
+
+        return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query))
 
     def filter_languages(self, queryset, name, value):
         languages = value.split(",")
diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py
new file mode 100644
index 0000000000..9b4a29d246
--- /dev/null
+++ b/contentcuration/search/constants.py
@@ -0,0 +1,23 @@
+from django.contrib.postgres.search import SearchVector
+
+# Postgres full text search configuration. We use "simple" to make search
+# language agnostic.
+POSTGRES_FTS_CONFIG = "simple"
+
+# ContentNode vectors and search fields.
+CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags")
+CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
+CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS = ("contentnode__id", "channel_id", "contentnode__node_id", "contentnode__content_id",
+                                                 "contentnode__tree_id", "contentnode__title", "contentnode__description", "contentnode_tags")
+CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS = ("contentnode__author", "contentnode__aggregator", "contentnode__provider")
+CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+# Channel vector and search fields.
+CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
+CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py
index 4d27927f53..305e9a8adb 100644
--- a/contentcuration/search/management/commands/set_channel_tsvectors.py
+++ b/contentcuration/search/management/commands/set_channel_tsvectors.py
@@ -7,7 +7,7 @@
 from django.core.management.base import BaseCommand
 from django.db.models import Exists
 from django.db.models import OuterRef
-from search.models import CHANNEL_KEYWORDS_TSVECTOR
+from search.constants import CHANNEL_KEYWORDS_TSVECTOR
 from search.models import ChannelFullTextSearch
 
 from contentcuration.models import Channel
diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
index 58cf9350e8..fbc862fae6 100644
--- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py
+++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
@@ -8,8 +8,8 @@
 from django.core.management.base import BaseCommand
 from django.db.models import Exists
 from django.db.models import OuterRef
-from search.models import CONTENTNODE_AUTHOR_TSVECTOR
-from search.models import CONTENTNODE_KEYWORDS_TSVECTOR
+from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
+from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
 from search.models import ContentNodeFullTextSearch
 
 from contentcuration.models import ContentNode
diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py
index 64b0472c8a..31793d4de7 100644
--- a/contentcuration/search/models.py
+++ b/contentcuration/search/models.py
@@ -2,7 +2,6 @@
 
 from django.conf import settings
 from django.contrib.postgres.indexes import GinIndex
-from django.contrib.postgres.search import SearchVector
 from django.contrib.postgres.search import SearchVectorField
 from django.db import models
 
@@ -22,18 +21,6 @@ class SavedSearch(models.Model):
     )
 
 
-POSTGRES_FTS_CONFIG = "simple"
-
-CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags")
-CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
-CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
-CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
-CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
-CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
-
 class ContentNodeFullTextSearch(models.Model):
     id = StudioUUIDField(primary_key=True, default=uuid.uuid4)
 
diff --git a/contentcuration/search/serializers.py b/contentcuration/search/serializers.py
deleted file mode 100644
index 4137c0e47f..0000000000
--- a/contentcuration/search/serializers.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from contentcuration import models as cc_models
-from rest_framework import serializers
-
-
-class ContentSearchResultSerializer(serializers.ModelSerializer):
-
-    class Meta:
-        model = cc_models.ContentNode
-        fields = (
-            'id',
-            'original_channel_id',
-            'source_channel_id',
-            'title',
-            'kind',
-            'tags',
-            'children',
-            'tree_id'
-        )
diff --git a/contentcuration/search/utils.py b/contentcuration/search/utils.py
new file mode 100644
index 0000000000..9cb205bb47
--- /dev/null
+++ b/contentcuration/search/utils.py
@@ -0,0 +1,9 @@
+from django.contrib.postgres.search import SearchQuery
+from search.constants import POSTGRES_FTS_CONFIG
+
+
+def get_fts_search_query(value):
+    """
+    Returns a `SearchQuery` with our postgres full text search config set on it.
+    """
+    return SearchQuery(value=value, config=POSTGRES_FTS_CONFIG)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index df5cf831cb..72c50aea3c 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -7,18 +7,20 @@
 from django.db.models import Q
 from django.db.models import Subquery
 from django.db.models import Value
+from django.db.models.functions import Coalesce
 from django_filters.rest_framework import BooleanFilter
 from django_filters.rest_framework import CharFilter
 from le_utils.constants import content_kinds
 from le_utils.constants import roles
 from rest_framework.permissions import IsAuthenticated
+from search.models import ContentNodeFullTextSearch
+from search.utils import get_fts_search_query
 
 from contentcuration.models import Channel
-from contentcuration.models import ContentNode
 from contentcuration.models import File
 from contentcuration.utils.pagination import CachedListPagination
+from contentcuration.viewsets.base import ReadOnlyValuesViewset
 from contentcuration.viewsets.base import RequiredFilterSet
-from contentcuration.viewsets.base import ValuesViewset
 from contentcuration.viewsets.common import NotNullMapArrayAgg
 from contentcuration.viewsets.common import UUIDFilter
 from contentcuration.viewsets.common import UUIDInFilter
@@ -48,110 +50,117 @@ def filter_channel_list(self, queryset, name, value):
         user = not self.request.user.is_anonymous and self.request.user
         channel_ids = []
         if value == "public":
-            channel_ids = Channel.objects.filter(public=True, deleted=False).values_list("id", flat=True)
+            channel_ids = Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True)
         elif value == "edit" and user:
-            channel_ids = user.editable_channels.values_list("id", flat=True)
+            channel_ids = user.editable_channels.filter(main_tree__published=True).values_list("id", flat=True)
         elif value == "bookmark" and user:
-            channel_ids = user.bookmarked_channels.values_list("id", flat=True)
+            channel_ids = user.bookmarked_channels.filter(main_tree__published=True).values_list("id", flat=True)
         elif value == "view" and user:
-            channel_ids = user.view_only_channels.values_list("id", flat=True)
+            channel_ids = user.view_only_channels.filter(main_tree__published=True).values_list("id", flat=True)
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        return ContentNode.search(queryset=queryset, search_term=value)
+        return queryset.filter(Q(keywords_tsvector=get_fts_search_query(value))
+                               | Q(author_tsvector=get_fts_search_query(value)))
 
     def filter_author(self, queryset, name, value):
-        return queryset.filter(
-            Q(author__icontains=value)
-            | Q(aggregator__icontains=value)
-            | Q(provider__icontains=value)
-        )
+        return queryset.filter(author_tsvector=get_fts_search_query(value))
 
     def filter_languages(self, queryset, name, value):
-        return queryset.filter(language__lang_code__in=value.split(","))
+        return queryset.filter(contentnode__language__lang_code__in=value.split(","))
 
     def filter_licenses(self, queryset, name, value):
         licenses = [int(li) for li in value.split(",")]
-        return queryset.filter(license__in=licenses)
+        return queryset.filter(contentnode__license__in=licenses)
 
     def filter_kinds(self, queryset, name, value):
-        return queryset.filter(kind_id__in=value.split(","))
+        return queryset.filter(contentnode__kind_id__in=value.split(","))
 
     def filter_coach(self, queryset, name, value):
-        return queryset.filter(role_visibility=roles.COACH)
+        return queryset.filter(contentnode__role_visibility=roles.COACH)
 
     def filter_resources(self, queryset, name, value):
-        return queryset.exclude(kind_id=content_kinds.TOPIC)
+        return queryset.exclude(contentnode__kind_id=content_kinds.TOPIC)
 
     def filter_assessments(self, queryset, name, value):
-        return queryset.filter(kind_id=content_kinds.EXERCISE)
+        return queryset.filter(contentnode__kind_id=content_kinds.EXERCISE)
 
     def filter_created_after(self, queryset, name, value):
         date = re.search(r"(\d{4})-0?(\d+)-(\d+)", value)
         return queryset.filter(
-            created__year__gte=date.group(1),
-            created__month__gte=date.group(2),
-            created__day__gte=date.group(3),
-        )
-
-    class Meta:
-        model = ContentNode
-        fields = (
-            "keywords",
-            "languages",
-            "licenses",
-            "kinds",
-            "coach",
-            "author",
-            "resources",
-            "assessments",
+            contentnode__created__year__gte=date.group(1),
+            contentnode__created__month__gte=date.group(2),
+            contentnode__created__day__gte=date.group(3),
         )
 
 
-class SearchContentNodeViewSet(ValuesViewset):
+class SearchContentNodeViewSet(ReadOnlyValuesViewset):
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
     permission_classes = [IsAuthenticated]
 
+    field_map = {
+        "id": "contentnode__id",
+        "content_id": "contentnode__content_id",
+        "node_id": "contentnode__node_id",
+        "title": "contentnode__title",
+        "description": "contentnode__description",
+        "author": "contentnode__author",
+        "provider": "contentnode__provider",
+        "kind__kind": "contentnode__kind__kind",
+        "thumbnail_encoding": "contentnode__thumbnail_encoding",
+        "published": "contentnode__published",
+        "modified": "contentnode__modified",
+        "parent_id": "contentnode__parent_id",
+        "changed": "contentnode__changed",
+    }
+
     values = (
-        "id",
-        "content_id",
-        "node_id",
-        "title",
-        "description",
-        "author",
-        "provider",
-        "kind__kind",
+        "contentnode__id",
+        "contentnode__content_id",
+        "contentnode__node_id",
+        "contentnode__title",
+        "contentnode__description",
+        "contentnode__author",
+        "contentnode__provider",
+        "contentnode__kind__kind",
+        "contentnode__thumbnail_encoding",
+        "contentnode__published",
+        "contentnode__modified",
+        "contentnode__parent_id",
+        "contentnode__changed",
         "channel_id",
         "resource_count",
         "thumbnail_checksum",
         "thumbnail_extension",
-        "thumbnail_encoding",
-        "published",
-        "modified",
-        "parent_id",
-        "changed",
         "content_tags",
         "original_channel_name",
     )
 
     def get_queryset(self):
-        return ContentNode._annotate_channel_id(ContentNode.objects)
+        return ContentNodeFullTextSearch.objects.select_related("contentnode")
 
     def annotate_queryset(self, queryset):
         """
         Annotates thumbnails, resources count and channel name.
         """
         thumbnails = File.objects.filter(
-            contentnode=OuterRef("id"), preset__thumbnail=True
+            contentnode=OuterRef("contentnode__id"), preset__thumbnail=True
         )
 
-        descendant_resources_count = ExpressionWrapper(((F("rght") - F("lft") - Value(1)) / Value(2)), output_field=IntegerField())
+        descendant_resources_count = ExpressionWrapper(((F("contentnode__rght") - F("contentnode__lft") - Value(1)) / Value(2)), output_field=IntegerField())
 
-        channel_name = Subquery(
-            Channel.objects.filter(pk=OuterRef("channel_id")).values(
-                "name"
-            )[:1]
+        original_channel_name = Coalesce(
+            Subquery(
+                Channel.objects.filter(pk=OuterRef("contentnode__original_channel_id")).values(
+                    "name"
+                )[:1]
+            ),
+            Subquery(
+                Channel.objects.filter(main_tree__tree_id=OuterRef("contentnode__tree_id")).values(
+                    "name"
+                )[:1]
+            ),
         )
 
         queryset = queryset.annotate(
@@ -160,8 +169,8 @@ def annotate_queryset(self, queryset):
             thumbnail_extension=Subquery(
                 thumbnails.values("file_format__extension")[:1]
             ),
-            content_tags=NotNullMapArrayAgg("tags__tag_name"),
-            original_channel_name=channel_name,
+            content_tags=NotNullMapArrayAgg("contentnode__tags__tag_name"),
+            original_channel_name=original_channel_name,
         )
 
         return queryset

From aa62dc2017a334d7c32710e99834f57cf1754fff Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Wed, 14 Sep 2022 18:07:29 +0530
Subject: [PATCH 16/25] Sync tsvectors on publish!

---
 .../views/ImportFromChannels/ChannelList.vue  |  1 +
 .../contentcuration/utils/publish.py          | 65 ++++++++++---------
 .../contentcuration/viewsets/channel.py       |  7 +-
 contentcuration/search/constants.py           |  7 --
 .../search/viewsets/contentnode.py            | 21 +++---
 5 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue
index 76dc9e1f8c..43de8e8ce4 100644
--- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue
+++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue
@@ -112,6 +112,7 @@
           [this.channelFilter]: true,
           page: this.$route.query.page || 1,
           exclude: this.currentChannelId,
+          published: true,
         }).then(page => {
           this.pageCount = page.total_pages;
           this.channels = page.results;
diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index afe4abdef4..2b49e13b24 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -44,8 +44,6 @@
 from search.constants import CHANNEL_KEYWORDS_TSVECTOR
 from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
 from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
-from search.constants import CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR
-from search.constants import CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR
 from search.models import ChannelFullTextSearch
 from search.models import ContentNodeFullTextSearch
 
@@ -819,9 +817,10 @@ def fill_published_fields(channel, version_notes):
     channel.save()
 
 
-def create_or_update_tsvectors(channel_id):
+def sync_contentnode_and_channel_tsvectors(channel_id):
     """
-    Create or update tsvectors for the channel and all its content nodes.
+    Creates, deletes and updates tsvectors of the channel and all its content nodes
+    to reflect the current state of channel's main tree.
     """
     # Update or create channel tsvector entry.
     logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id))
@@ -829,52 +828,63 @@ def create_or_update_tsvectors(channel_id):
     from contentcuration.viewsets.channel import primary_token_subquery
 
     channel = (ccmodels.Channel.objects
+               .filter(pk=channel_id)
                .annotate(primary_channel_token=primary_token_subquery,
                          keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
-               .get(pk=channel_id))
+               .values("keywords_tsvector", "main_tree__tree_id")
+               .get())
 
     if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists():
-        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel.keywords_tsvector)
+        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"])
         logging.info("Updated {} channel tsvector.".format(update_count))
     else:
-        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel.keywords_tsvector)
+        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"])
         obj.save()
         logging.info("Created 1 channel tsvector.")
 
     # Update or create contentnodes tsvector entry for channel_id.
-    logging.info("Setting tsvectors for all contentnodes in channel {}.".format(channel_id))
+    logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id))
 
-    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
+    nodes_tsvector_query = (ccmodels.ContentNode.objects
+                            .filter(tree_id=channel["main_tree__tree_id"])
+                            .annotate(channel_id=Value(channel_id),
+                                      contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+                                      keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+                                      author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
+                            .order_by())
 
+    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
         # First, delete nodes that are no longer in main_tree.
-        nodes_no_longer_in_main_tree = ~Exists(channel.main_tree.get_family().filter(id=OuterRef("contentnode_id")))
+        nodes_no_longer_in_main_tree = ~Exists(ccmodels.ContentNode.objects.filter(id=OuterRef("contentnode_id"), tree_id=channel["main_tree__tree_id"]))
         ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete()
 
         # Now, all remaining nodes are in main_tree, so let's update them.
-        update_count = (ContentNodeFullTextSearch.objects.filter(channel_id=channel_id)
-                        .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "))
-                        .update(keywords_tsvector=CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR))
+        # Update only changed nodes.
+        nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True)
+
+        update_objs = list()
+        for node in nodes_to_update:
+            corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first()
+            if corresponding_contentnode:
+                node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"]
+                node.author_tsvector = corresponding_contentnode["author_tsvector"]
+                update_objs.append(node)
+        ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"])
+        del update_objs
 
     # Insert newly created nodes.
-    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id")))
-    nodes_to_insert = (channel.main_tree.get_family()
+    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id))
+    nodes_to_insert = (nodes_tsvector_query
                        .filter(nodes_not_having_tsvector_record)
-                       .annotate(channel_id=Value(channel_id),
-                                 contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
-                                 keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
-                                 author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
                        .values("id", "channel_id", "keywords_tsvector", "author_tsvector"))
 
     insert_objs = list()
-
     for node in nodes_to_insert:
         obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
                                         keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
         insert_objs.append(obj)
-
     inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
-
-    logging.info("Successfully inserted {} and updated {} contentnode tsvectors.".format(len(inserted_nodes_list), update_count))
+    logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list)))
 
 
 @delay_user_storage_calculation
@@ -887,8 +897,6 @@ def publish_channel(
     send_email=False,
     progress_tracker=None,
     language=settings.LANGUAGE_CODE,
-
-
 ):
     """
     :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None
@@ -900,8 +908,9 @@ def publish_channel(
         set_channel_icon_encoding(channel)
         kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker)
         increment_channel_version(channel)
-        mark_all_nodes_as_published(channel)
         add_tokens_to_channel(channel)
+        sync_contentnode_and_channel_tsvectors(channel_id=channel.id)
+        mark_all_nodes_as_published(channel)
         fill_published_fields(channel, version_notes)
 
         # Attributes not getting set for some reason, so just save it here
@@ -914,10 +923,6 @@ def publish_channel(
         if channel.public:
             delete_public_channel_cache_keys()
 
-        # Enqueue tsvector task to update or create channel tsvectors and all its
-        # contentnodes tsvector entries.
-        create_or_update_tsvectors(channel_id=channel_id)
-
         if send_email:
             with override(language):
                 send_emails(channel, user_id, version_notes=version_notes)
diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py
index ca5c487e0a..90b9a8f84d 100644
--- a/contentcuration/contentcuration/viewsets/channel.py
+++ b/contentcuration/contentcuration/viewsets/channel.py
@@ -122,10 +122,13 @@ def filter_deleted(self, queryset, name, value):
         return queryset.filter(deleted=value)
 
     def filter_keywords(self, queryset, name, value):
+        search_query = get_fts_search_query(value)
+        dash_replaced_search_query = get_fts_search_query(value.replace("-", ""))
+
         channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter(
-            keywords_tsvector=get_fts_search_query(value.replace("-", "")), channel_id=OuterRef("id"))))
+            Q(keywords_tsvector=search_query) | Q(keywords_tsvector=dash_replaced_search_query), channel_id=OuterRef("id"))))
         contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter(
-            Q(keywords_tsvector=get_fts_search_query(value)) | Q(author_tsvector=get_fts_search_query(value)), channel_id=OuterRef("id"))))
+            Q(keywords_tsvector=search_query) | Q(author_tsvector=search_query), channel_id=OuterRef("id"))))
 
         return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query))
 
diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py
index 9b4a29d246..1ac316c3ae 100644
--- a/contentcuration/search/constants.py
+++ b/contentcuration/search/constants.py
@@ -11,13 +11,6 @@
 CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
 CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
 
-CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS = ("contentnode__id", "channel_id", "contentnode__node_id", "contentnode__content_id",
-                                                 "contentnode__tree_id", "contentnode__title", "contentnode__description", "contentnode_tags")
-CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
-CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS = ("contentnode__author", "contentnode__aggregator", "contentnode__provider")
-CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
 # Channel vector and search fields.
 CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
 CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 72c50aea3c..691dfbe70e 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -49,19 +49,22 @@ class ContentNodeFilter(RequiredFilterSet):
     def filter_channel_list(self, queryset, name, value):
         user = not self.request.user.is_anonymous and self.request.user
         channel_ids = []
+
         if value == "public":
-            channel_ids = Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True)
+            channel_ids = Channel.get_public_channels().values_list("id", flat=True)
         elif value == "edit" and user:
-            channel_ids = user.editable_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.editable_channels.values_list("id", flat=True)
         elif value == "bookmark" and user:
-            channel_ids = user.bookmarked_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.bookmarked_channels.values_list("id", flat=True)
         elif value == "view" and user:
-            channel_ids = user.view_only_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.view_only_channels.values_list("id", flat=True)
+
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        return queryset.filter(Q(keywords_tsvector=get_fts_search_query(value))
-                               | Q(author_tsvector=get_fts_search_query(value)))
+        search_query = get_fts_search_query(value)
+        return queryset.filter(Q(keywords_tsvector=search_query)
+                               | Q(author_tsvector=search_query))
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(author_tsvector=get_fts_search_query(value))
@@ -98,6 +101,7 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset):
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
     permission_classes = [IsAuthenticated]
+    queryset = ContentNodeFullTextSearch.objects.all()
 
     field_map = {
         "id": "contentnode__id",
@@ -137,12 +141,9 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset):
         "original_channel_name",
     )
 
-    def get_queryset(self):
-        return ContentNodeFullTextSearch.objects.select_related("contentnode")
-
     def annotate_queryset(self, queryset):
         """
-        Annotates thumbnails, resources count and channel name.
+        Annotates thumbnails, resources count and original channel name.
         """
         thumbnails = File.objects.filter(
             contentnode=OuterRef("contentnode__id"), preset__thumbnail=True

From 2672512de766ba0970a4be9bdc622b656521961a Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Wed, 14 Sep 2022 20:19:40 +0530
Subject: [PATCH 17/25] fix: tests and ready for merge! <3

---
 .../contentcuration/tests/testdata.py         | 18 +++---
 contentcuration/search/tests/test_search.py   | 62 +++++++++++--------
 2 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
index 5560e853a5..50895337b1 100644
--- a/contentcuration/contentcuration/tests/testdata.py
+++ b/contentcuration/contentcuration/tests/testdata.py
@@ -195,23 +195,21 @@ def node(data, parent=None):
     return new_node
 
 
-def tree(parent=None, tree_data=None):
+def tree(parent=None):
     # Read from json fixture
-    if tree_data is None:
-        filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
-        with open(filepath, "rb") as jsonfile:
-            tree_data = json.load(jsonfile)
+    filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
+    with open(filepath, "rb") as jsonfile:
+        data = json.load(jsonfile)
 
-    return node(tree_data, parent)
+    return node(data, parent)
 
 
-def channel(name="testchannel", create_main_tree=True, main_tree_data=None):
+def channel(name="testchannel"):
     channel = cc.Channel.objects.create(name=name)
     channel.save()
 
-    if create_main_tree:
-        channel.main_tree = tree(tree_data=main_tree_data)
-        channel.save()
+    channel.main_tree = tree()
+    channel.save()
 
     return channel
 
diff --git a/contentcuration/search/tests/test_search.py b/contentcuration/search/tests/test_search.py
index 6b3d34cc41..8489ece577 100644
--- a/contentcuration/search/tests/test_search.py
+++ b/contentcuration/search/tests/test_search.py
@@ -3,28 +3,39 @@
 from django.urls import reverse
 
 from contentcuration.models import Channel
+from contentcuration.models import ContentNode
 from contentcuration.tests import testdata
 from contentcuration.tests.base import StudioAPITestCase
+from contentcuration.utils.publish import sync_contentnode_and_channel_tsvectors
+
+
+def dummy_publish(channel):
+    channel_nodes = ContentNode.objects.filter(tree_id=channel.main_tree.tree_id)
+    for node in channel_nodes:
+        node.published = True
+        node.changed = False
+        node.save()
+    sync_contentnode_and_channel_tsvectors(channel_id=channel.id)
 
 
 class SearchViewsetTestCase(StudioAPITestCase):
+    def setUp(self):
+        super().setUp()
+        self.channel = testdata.channel()
+        self.user = testdata.user()
+        self.channel.editors.add(self.user)
+        dummy_publish(self.channel)
 
     def test_filter_exclude_channels(self):
-        user = testdata.user()
-        self.client.force_authenticate(user=user)
-        channel = testdata.channel()
-        channel.editors.add(user)
+        self.client.force_authenticate(user=self.user)
         response = self.client.get(
-            reverse("search-list"), data={"exclude_channel": channel.id}, format="json",
+            reverse("search-list"), data={"exclude_channel": self.channel.id}, format="json",
         )
         self.assertEqual(response.status_code, 200, response.content)
         self.assertEqual(response.data["results"], [])
 
     def test_filter_channels_by_edit(self):
-        user = testdata.user()
-        self.client.force_authenticate(user=user)
-        channel = testdata.channel()
-        channel.editors.add(user)
+        self.client.force_authenticate(user=self.user)
         response = self.client.get(
             reverse("search-list"), data={"channel_list": "edit"}, format="json",
         )
@@ -35,6 +46,7 @@ def test_search(self):
         users = []
         channels = []
 
+        # Create channels, users.
         for i in range(4):
             user = testdata.user(email="a{}@a.com".format(i))
             users.append(user)
@@ -46,20 +58,11 @@ def test_search(self):
 
         public_channel, editable_channel, viewable_channel, inaccessible_channel = channels
 
-        # Create public video node and publish it.
+        # Create public video node.
         public_video_node = testdata.node({
             "title": "Kolibri video",
             "kind_id": "video",
         }, parent=public_channel.main_tree)
-        public_video_node.complete = True
-        public_video_node.published = True
-        public_video_node.changed = False
-        public_video_node.save()
-
-        # Publish the public_channel.
-        public_channel.main_tree.published = True
-        public_channel.main_tree.changed = False
-        public_channel.main_tree.save()
         public_channel.public = True
         public_channel.save()
 
@@ -72,6 +75,10 @@ def test_search(self):
         public_video_node.copy_to(target=viewable_channel.main_tree)
         public_video_node.copy_to(target=inaccessible_channel.main_tree)
 
+        # Publish all channels to make them searchable.
+        for channel in channels:
+            dummy_publish(channel)
+
         # Get different nodes based on access.
         editable_channel.main_tree.refresh_from_db()
         editable_video_node = editable_channel.main_tree.get_descendants().first()
@@ -93,12 +100,13 @@ def test_search(self):
                 format="json",
             )
 
-            for result in response.data["results"]:
-                self.assertNotEqual(result["id"], inaccessible_video_node.id)
+            result = response.data["results"][0]
+
+            self.assertNotEqual(result["id"], inaccessible_video_node.id)
 
-                if channel_list == "public":
-                    self.assertEqual(result["id"], public_video_node.id)
-                elif channel_list == "edit":
-                    self.assertEqual(result["id"], editable_video_node.id)
-                elif channel_list == "view":
-                    self.assertEqual(result["id"], viewable_video_node.id)
+            if channel_list == "public":
+                self.assertEqual(result["id"], public_video_node.id)
+            elif channel_list == "edit":
+                self.assertEqual(result["id"], editable_video_node.id)
+            elif channel_list == "view":
+                self.assertEqual(result["id"], viewable_video_node.id)

From 1ffa30d7348a2bd2cd79961caf5b362cf0e1e39c Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 15 Sep 2022 01:41:51 +0530
Subject: [PATCH 18/25] fix: node command edge case; when published nodes go to
 trash tree, they remain as published

---
 .../management/commands/set_contentnode_tsvectors.py     | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
index fbc862fae6..4e5673d9ec 100644
--- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py
+++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
@@ -32,7 +32,7 @@ def handle(self, *args, **options):
                                .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
                                          keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
                                          author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
-                               .filter(tsvector_not_already_inserted_query, published=True)
+                               .filter(tsvector_not_already_inserted_query, published=True, channel_id__isnull=False)
                                .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by())
 
         insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE])
@@ -43,10 +43,9 @@ def handle(self, *args, **options):
 
             insert_objs = list()
             for node in insertable_nodes_tsvector:
-                if node["channel_id"]:
-                    obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
-                                                    keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
-                    insert_objs.append(obj)
+                obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
+                                                keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
+                insert_objs.append(obj)
 
             inserted_objs_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
 

From 57724e0787a08a77d9ee28f6be064aa8c3cf1e0d Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 16 Sep 2022 16:01:51 +0530
Subject: [PATCH 19/25] Enforce only-one search entries

---
 contentcuration/search/migrations/0003_fulltextsearch.py | 6 +++---
 contentcuration/search/models.py                         | 4 ++--
 contentcuration/search/viewsets/contentnode.py           | 5 +----
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/contentcuration/search/migrations/0003_fulltextsearch.py b/contentcuration/search/migrations/0003_fulltextsearch.py
index 2885a4655f..632df6a39e 100644
--- a/contentcuration/search/migrations/0003_fulltextsearch.py
+++ b/contentcuration/search/migrations/0003_fulltextsearch.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.14 on 2022-09-08 07:19
+# Generated by Django 3.2.14 on 2022-09-16 08:55
 import uuid
 
 import django.contrib.postgres.indexes
@@ -28,7 +28,7 @@ class Migration(migrations.Migration):
                 ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
                 ('author_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
                 ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_nodes_fts', to='contentcuration.channel')),
-                ('contentnode', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')),
+                ('contentnode', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='node_fts', to='contentcuration.contentnode')),
             ],
         ),
         migrations.CreateModel(
@@ -36,7 +36,7 @@ class Migration(migrations.Migration):
             fields=[
                 ('id', contentcuration.models.UUIDField(default=uuid.uuid4, max_length=32, primary_key=True, serialize=False)),
                 ('keywords_tsvector', django.contrib.postgres.search.SearchVectorField(blank=True, null=True)),
-                ('channel', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')),
+                ('channel', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='channel_fts', to='contentcuration.channel')),
             ],
         ),
         AddIndexConcurrently(
diff --git a/contentcuration/search/models.py b/contentcuration/search/models.py
index 31793d4de7..9e121af509 100644
--- a/contentcuration/search/models.py
+++ b/contentcuration/search/models.py
@@ -25,7 +25,7 @@ class ContentNodeFullTextSearch(models.Model):
     id = StudioUUIDField(primary_key=True, default=uuid.uuid4)
 
     # The contentnode that this record points to.
-    contentnode = models.ForeignKey(ContentNode, on_delete=models.CASCADE, related_name="node_fts")
+    contentnode = models.OneToOneField(ContentNode, on_delete=models.CASCADE, related_name="node_fts")
 
     # The channel to which the contentnode belongs. Channel cannot be NULL because we only allow
     # searches to be made inside channels.
@@ -46,7 +46,7 @@ class ChannelFullTextSearch(models.Model):
     id = StudioUUIDField(primary_key=True, default=uuid.uuid4)
 
     # The channel to which this record points.
-    channel = models.ForeignKey(Channel, on_delete=models.CASCADE, related_name="channel_fts")
+    channel = models.OneToOneField(Channel, on_delete=models.CASCADE, related_name="channel_fts")
 
     # This stores the channel keywords as tsvector for super fast searches.
     keywords_tsvector = SearchVectorField(null=True, blank=True)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 691dfbe70e..aa6c7fe9b7 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -4,7 +4,6 @@
 from django.db.models import F
 from django.db.models import IntegerField
 from django.db.models import OuterRef
-from django.db.models import Q
 from django.db.models import Subquery
 from django.db.models import Value
 from django.db.models.functions import Coalesce
@@ -62,9 +61,7 @@ def filter_channel_list(self, queryset, name, value):
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        search_query = get_fts_search_query(value)
-        return queryset.filter(Q(keywords_tsvector=search_query)
-                               | Q(author_tsvector=search_query))
+        return queryset.filter(keywords_tsvector=get_fts_search_query(value))
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(author_tsvector=get_fts_search_query(value))

From e53e56a2e300ea31c10f994dfe3cf5ff0ec25186 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 16 Sep 2022 16:41:27 +0530
Subject: [PATCH 20/25] Remove unnecessary select_related

---
 .../search/management/commands/set_channel_tsvectors.py       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py
index 305e9a8adb..d82f4848f5 100644
--- a/contentcuration/search/management/commands/set_channel_tsvectors.py
+++ b/contentcuration/search/management/commands/set_channel_tsvectors.py
@@ -27,8 +27,8 @@ def handle(self, *args, **options):
 
         channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id")))
 
-        channel_query = (Channel.objects.select_related("main_tree")
-                         .filter(channel_not_already_inserted_query, deleted=False, main_tree__published=True)
+        channel_query = (Channel.objects.filter(channel_not_already_inserted_query,
+                                                deleted=False, main_tree__published=True)
                          .annotate(primary_channel_token=primary_token_subquery,
                                    keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
                          .values("id", "keywords_tsvector"))

From 4b3d4c7d0a586afce5236de13bfb8ce12064e52e Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 16 Sep 2022 17:00:04 +0530
Subject: [PATCH 21/25] fix cache tests mock by setting
 ContentNodeFullTextSearch

---
 contentcuration/contentcuration/tests/utils/test_cache.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py
index 1bbf69d580..22a2a6a62b 100644
--- a/contentcuration/contentcuration/tests/utils/test_cache.py
+++ b/contentcuration/contentcuration/tests/utils/test_cache.py
@@ -1,5 +1,6 @@
 import mock
 from django.test import SimpleTestCase
+from search.models import ContentNodeFullTextSearch
 
 from ..helpers import mock_class_instance
 from contentcuration.models import ContentNode
@@ -9,7 +10,9 @@
 class ResourceSizeCacheTestCase(SimpleTestCase):
     def setUp(self):
         super(ResourceSizeCacheTestCase, self).setUp()
-        self.node = mock.Mock(spec_set=ContentNode())
+        c = ContentNode()
+        c.node_fts = ContentNodeFullTextSearch()
+        self.node = mock.Mock(spec_set=c)
         self.node.pk = "abcdefghijklmnopqrstuvwxyz"
         self.redis_client = mock_class_instance("redis.client.StrictRedis")
         self.cache_client = mock_class_instance("django_redis.client.DefaultClient")

From 44ab74c05b06881dabacb9c7bdd9d6bdbc8f4a65 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Fri, 16 Sep 2022 21:33:54 +0530
Subject: [PATCH 22/25] fix cache & nodes tests by using db-friendly TestCase

---
 .../contentcuration/tests/utils/test_cache.py            | 9 +++------
 .../contentcuration/tests/utils/test_nodes.py            | 4 ++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py
index 22a2a6a62b..5327da19ac 100644
--- a/contentcuration/contentcuration/tests/utils/test_cache.py
+++ b/contentcuration/contentcuration/tests/utils/test_cache.py
@@ -1,18 +1,15 @@
 import mock
-from django.test import SimpleTestCase
-from search.models import ContentNodeFullTextSearch
+from django.test import TestCase
 
 from ..helpers import mock_class_instance
 from contentcuration.models import ContentNode
 from contentcuration.utils.cache import ResourceSizeCache
 
 
-class ResourceSizeCacheTestCase(SimpleTestCase):
+class ResourceSizeCacheTestCase(TestCase):
     def setUp(self):
         super(ResourceSizeCacheTestCase, self).setUp()
-        c = ContentNode()
-        c.node_fts = ContentNodeFullTextSearch()
-        self.node = mock.Mock(spec_set=c)
+        self.node = mock.Mock(spec_set=ContentNode())
         self.node.pk = "abcdefghijklmnopqrstuvwxyz"
         self.redis_client = mock_class_instance("redis.client.StrictRedis")
         self.cache_client = mock_class_instance("django_redis.client.DefaultClient")
diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py
index 3b96c30a3c..83171288d6 100644
--- a/contentcuration/contentcuration/tests/utils/test_nodes.py
+++ b/contentcuration/contentcuration/tests/utils/test_nodes.py
@@ -6,7 +6,7 @@
 from dateutil.parser import isoparse
 from django.db.models import F
 from django.db.models import Max
-from django.test import SimpleTestCase
+from django.test import TestCase
 
 from ..base import StudioTestCase
 from contentcuration.models import ContentNode
@@ -42,7 +42,7 @@ def test_modified_since(self):
 
 @mock.patch("contentcuration.utils.nodes.ResourceSizeHelper")
 @mock.patch("contentcuration.utils.nodes.ResourceSizeCache")
-class CalculateResourceSizeTestCase(SimpleTestCase):
+class CalculateResourceSizeTestCase(TestCase):
     def setUp(self):
         super(CalculateResourceSizeTestCase, self).setUp()
         self.node = mock.Mock(spec_set=ContentNode())

From 001e788f38a486b883009a988faf9082e74eec5d Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Wed, 21 Sep 2022 22:53:33 +0530
Subject: [PATCH 23/25] Use command for tsv insertion & simpler tsv update on
 publish

---
 Makefile                                      |  2 +-
 .../views/ImportFromChannels/BrowsingCard.vue |  2 +-
 .../contentcuration/tests/helpers.py          |  8 ++
 .../contentcuration/tests/utils/test_cache.py |  7 +-
 .../contentcuration/tests/utils/test_nodes.py |  8 +-
 .../contentcuration/utils/publish.py          | 78 +++++++------------
 .../commands/set_channel_tsvectors.py         | 11 +--
 .../commands/set_contentnode_tsvectors.py     | 45 +++++++----
 contentcuration/search/utils.py               | 39 ++++++++++
 .../search/viewsets/contentnode.py            |  4 +-
 10 files changed, 118 insertions(+), 86 deletions(-)

diff --git a/Makefile b/Makefile
index 29fe984285..99b55d3762 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ learningactivities:
 
 set-tsvectors:
 	python contentcuration/manage.py set_channel_tsvectors
-	python contentcuration/manage.py set_contentnode_tsvectors
+	python contentcuration/manage.py set_contentnode_tsvectors --published
 
 ###############################################################
 # END PRODUCTION COMMANDS #####################################
diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
index 70f1ccafb7..5d0447df50 100644
--- a/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
+++ b/contentcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
@@ -149,7 +149,7 @@
         if (this.isTopic) {
           return `${baseUrl}#/${this.node.id}`;
         }
-        return `${baseUrl}#/${this.node.parent}/${this.node.id}`;
+        return `${baseUrl}#/${this.node.parent_id}/${this.node.id}`;
       },
       resourcesMsg() {
         let count;
diff --git a/contentcuration/contentcuration/tests/helpers.py b/contentcuration/contentcuration/tests/helpers.py
index 8e3172fcd4..cf1d54130e 100644
--- a/contentcuration/contentcuration/tests/helpers.py
+++ b/contentcuration/contentcuration/tests/helpers.py
@@ -2,7 +2,9 @@
 from importlib import import_module
 
 import mock
+from search.models import ContentNodeFullTextSearch
 
+from contentcuration.models import ContentNode
 from contentcuration.models import TaskResult
 
 
@@ -39,6 +41,12 @@ def mock_class_instance(target):
     else:
         target_cls = target
 
+    # ContentNode's node_fts field can be handled by Django when tests
+    # access the database but we mock it so that we don't need to query
+    # the database. By doing so we get faster test execution.
+    if type(target_cls) is ContentNode:
+        target_cls.node_fts = ContentNodeFullTextSearch()
+
     class MockClass(target_cls):
         def __new__(cls, *args, **kwargs):
             return mock.Mock(spec_set=cls)
diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py
index 5327da19ac..d16570648a 100644
--- a/contentcuration/contentcuration/tests/utils/test_cache.py
+++ b/contentcuration/contentcuration/tests/utils/test_cache.py
@@ -1,15 +1,14 @@
 import mock
-from django.test import TestCase
+from django.test import SimpleTestCase
 
 from ..helpers import mock_class_instance
-from contentcuration.models import ContentNode
 from contentcuration.utils.cache import ResourceSizeCache
 
 
-class ResourceSizeCacheTestCase(TestCase):
+class ResourceSizeCacheTestCase(SimpleTestCase):
     def setUp(self):
         super(ResourceSizeCacheTestCase, self).setUp()
-        self.node = mock.Mock(spec_set=ContentNode())
+        self.node = mock_class_instance("contentcuration.models.ContentNode")
         self.node.pk = "abcdefghijklmnopqrstuvwxyz"
         self.redis_client = mock_class_instance("redis.client.StrictRedis")
         self.cache_client = mock_class_instance("django_redis.client.DefaultClient")
diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py
index 83171288d6..be43d295dd 100644
--- a/contentcuration/contentcuration/tests/utils/test_nodes.py
+++ b/contentcuration/contentcuration/tests/utils/test_nodes.py
@@ -6,10 +6,10 @@
 from dateutil.parser import isoparse
 from django.db.models import F
 from django.db.models import Max
-from django.test import TestCase
+from django.test import SimpleTestCase
 
 from ..base import StudioTestCase
-from contentcuration.models import ContentNode
+from contentcuration.tests.helpers import mock_class_instance
 from contentcuration.utils.nodes import calculate_resource_size
 from contentcuration.utils.nodes import ResourceSizeHelper
 from contentcuration.utils.nodes import SlowCalculationError
@@ -42,10 +42,10 @@ def test_modified_since(self):
 
 @mock.patch("contentcuration.utils.nodes.ResourceSizeHelper")
 @mock.patch("contentcuration.utils.nodes.ResourceSizeCache")
-class CalculateResourceSizeTestCase(TestCase):
+class CalculateResourceSizeTestCase(SimpleTestCase):
     def setUp(self):
         super(CalculateResourceSizeTestCase, self).setUp()
-        self.node = mock.Mock(spec_set=ContentNode())
+        self.node = mock_class_instance("contentcuration.models.ContentNode")
 
     def assertCalculation(self, cache, helper, force=False):
         helper().get_size.return_value = 456
diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 2b49e13b24..017bf4a561 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -14,7 +14,6 @@
 from itertools import chain
 
 from django.conf import settings
-from django.contrib.postgres.aggregates import StringAgg
 from django.core.files import File
 from django.core.files.storage import default_storage as storage
 from django.core.management import call_command
@@ -23,8 +22,8 @@
 from django.db.models import Max
 from django.db.models import OuterRef
 from django.db.models import Q
+from django.db.models import Subquery
 from django.db.models import Sum
-from django.db.models import Value
 from django.db.utils import IntegrityError
 from django.template.loader import render_to_string
 from django.utils import timezone
@@ -41,11 +40,10 @@
 from le_utils.constants import roles
 from past.builtins import basestring
 from past.utils import old_div
-from search.constants import CHANNEL_KEYWORDS_TSVECTOR
-from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
-from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
 from search.models import ChannelFullTextSearch
 from search.models import ContentNodeFullTextSearch
+from search.utils import get_fts_annotated_channel_qs
+from search.utils import get_fts_annotated_contentnode_qs
 
 from contentcuration import models as ccmodels
 from contentcuration.decorators import delay_user_storage_calculation
@@ -823,35 +821,22 @@ def sync_contentnode_and_channel_tsvectors(channel_id):
     to reflect the current state of channel's main tree.
     """
     # Update or create channel tsvector entry.
-    logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id))
+    logging.info("Setting tsvector for channel with id {}.".format(channel_id))
 
-    from contentcuration.viewsets.channel import primary_token_subquery
-
-    channel = (ccmodels.Channel.objects
-               .filter(pk=channel_id)
-               .annotate(primary_channel_token=primary_token_subquery,
-                         keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+    channel = (get_fts_annotated_channel_qs()
                .values("keywords_tsvector", "main_tree__tree_id")
-               .get())
+               .get(pk=channel_id))
 
-    if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists():
-        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"])
-        logging.info("Updated {} channel tsvector.".format(update_count))
-    else:
-        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"])
-        obj.save()
+    obj, is_created = ChannelFullTextSearch.objects.update_or_create(channel_id=channel_id, defaults={"keywords_tsvector": channel["keywords_tsvector"]})
+    del obj
+
+    if is_created:
         logging.info("Created 1 channel tsvector.")
+    else:
+        logging.info("Updated 1 channel tsvector.")
 
     # Update or create contentnodes tsvector entry for channel_id.
-    logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id))
-
-    nodes_tsvector_query = (ccmodels.ContentNode.objects
-                            .filter(tree_id=channel["main_tree__tree_id"])
-                            .annotate(channel_id=Value(channel_id),
-                                      contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
-                                      keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
-                                      author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
-                            .order_by())
+    logging.info("Setting tsvectors for all main tree contentnodes in channel {}.".format(channel_id))
 
     if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
         # First, delete nodes that are no longer in main_tree.
@@ -860,31 +845,22 @@ def sync_contentnode_and_channel_tsvectors(channel_id):
 
         # Now, all remaining nodes are in main_tree, so let's update them.
         # Update only changed nodes.
-        nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True)
-
-        update_objs = list()
-        for node in nodes_to_update:
-            corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first()
-            if corresponding_contentnode:
-                node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"]
-                node.author_tsvector = corresponding_contentnode["author_tsvector"]
-                update_objs.append(node)
-        ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"])
-        del update_objs
+        changed_nodes_subquery = (get_fts_annotated_contentnode_qs(channel_id)
+                                  .filter(id=OuterRef("contentnode_id"),
+                                          tree_id=channel["main_tree__tree_id"],
+                                          complete=True,
+                                          changed=True)
+                                  .order_by())
+        ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).update(
+            keywords_tsvector=Subquery(changed_nodes_subquery.values_list("keywords_tsvector")[:1]),
+            author_tsvector=Subquery(changed_nodes_subquery.values_list("author_tsvector")[:1])
+        )
 
     # Insert newly created nodes.
-    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id))
-    nodes_to_insert = (nodes_tsvector_query
-                       .filter(nodes_not_having_tsvector_record)
-                       .values("id", "channel_id", "keywords_tsvector", "author_tsvector"))
-
-    insert_objs = list()
-    for node in nodes_to_insert:
-        obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
-                                        keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
-        insert_objs.append(obj)
-    inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
-    logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list)))
+    call_command("set_contentnode_tsvectors",
+                 "--channel-id={}".format(channel_id),
+                 "--tree-id={}".format(channel["main_tree__tree_id"]),
+                 "--complete")
 
 
 @delay_user_storage_calculation
diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py
index d82f4848f5..68d7e17b51 100644
--- a/contentcuration/search/management/commands/set_channel_tsvectors.py
+++ b/contentcuration/search/management/commands/set_channel_tsvectors.py
@@ -7,11 +7,8 @@
 from django.core.management.base import BaseCommand
 from django.db.models import Exists
 from django.db.models import OuterRef
-from search.constants import CHANNEL_KEYWORDS_TSVECTOR
 from search.models import ChannelFullTextSearch
-
-from contentcuration.models import Channel
-from contentcuration.viewsets.channel import primary_token_subquery
+from search.utils import get_fts_annotated_channel_qs
 
 
 logmodule.basicConfig(level=logmodule.INFO)
@@ -27,10 +24,8 @@ def handle(self, *args, **options):
 
         channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id")))
 
-        channel_query = (Channel.objects.filter(channel_not_already_inserted_query,
-                                                deleted=False, main_tree__published=True)
-                         .annotate(primary_channel_token=primary_token_subquery,
-                                   keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+        channel_query = (get_fts_annotated_channel_qs().filter(channel_not_already_inserted_query,
+                                                               deleted=False, main_tree__published=True)
                          .values("id", "keywords_tsvector"))
 
         insertable_channels = list(channel_query[:CHUNKSIZE])
diff --git a/contentcuration/search/management/commands/set_contentnode_tsvectors.py b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
index 4e5673d9ec..c5e78ca8d1 100644
--- a/contentcuration/search/management/commands/set_contentnode_tsvectors.py
+++ b/contentcuration/search/management/commands/set_contentnode_tsvectors.py
@@ -4,15 +4,11 @@
 import logging as logmodule
 import time
 
-from django.contrib.postgres.aggregates import StringAgg
 from django.core.management.base import BaseCommand
 from django.db.models import Exists
 from django.db.models import OuterRef
-from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
-from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
 from search.models import ContentNodeFullTextSearch
-
-from contentcuration.models import ContentNode
+from search.utils import get_fts_annotated_contentnode_qs
 
 
 logmodule.basicConfig(level=logmodule.INFO)
@@ -22,20 +18,39 @@
 
 
 class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument("--channel-id", type=str, dest="channel_id",
+                            help="The channel_id to annotate to the nodes. If not specified then each node's channel_id is queried and then annotated.")
+        parser.add_argument("--tree-id", type=int, dest="tree_id",
+                            help="Set tsvectors for a specific tree_id nodes only. If not specified then tsvectors for all nodes of ContentNode table are set.")
+        parser.add_argument("--published", dest="published", action="store_true", help="Filters on whether node is published or not.")
+        parser.add_argument("--complete", dest="complete", action="store_true", help="Filters on whether node is complete or not.")
 
-    def handle(self, *args, **options):
-        start = time.time()
+    def get_tsvector_nodes_queryset(self, *args, **options):
+        tsvector_nodes_queryset = get_fts_annotated_contentnode_qs(channel_id=options["channel_id"])
+
+        if options["tree_id"]:
+            tsvector_nodes_queryset = tsvector_nodes_queryset.filter(tree_id=options["tree_id"])
+
+        if options["complete"]:
+            tsvector_nodes_queryset = tsvector_nodes_queryset.filter(complete=True)
+
+        if options["published"]:
+            tsvector_nodes_queryset = tsvector_nodes_queryset.filter(published=True)
 
         tsvector_not_already_inserted_query = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id")))
+        tsvector_nodes_queryset = (tsvector_nodes_queryset
+                                   .filter(tsvector_not_already_inserted_query, channel_id__isnull=False)
+                                   .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by())
+
+        return tsvector_nodes_queryset
+
+    def handle(self, *args, **options):
+        start = time.time()
 
-        tsvector_node_query = (ContentNode._annotate_channel_id(ContentNode.objects)
-                               .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
-                                         keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
-                                         author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
-                               .filter(tsvector_not_already_inserted_query, published=True, channel_id__isnull=False)
-                               .values("id", "channel_id", "keywords_tsvector", "author_tsvector").order_by())
+        tsvector_nodes_queryset = self.get_tsvector_nodes_queryset(*args, **options)
 
-        insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE])
+        insertable_nodes_tsvector = list(tsvector_nodes_queryset[:CHUNKSIZE])
         total_tsvectors_inserted = 0
 
         while insertable_nodes_tsvector:
@@ -54,6 +69,6 @@ def handle(self, *args, **options):
 
             logging.info("Inserted {} contentnode tsvectors.".format(current_inserts_count))
 
-            insertable_nodes_tsvector = list(tsvector_node_query[:CHUNKSIZE])
+            insertable_nodes_tsvector = list(tsvector_nodes_queryset[:CHUNKSIZE])
 
         logging.info("Completed! Successfully inserted total of {} contentnode tsvectors in {} seconds.".format(total_tsvectors_inserted, time.time() - start))
diff --git a/contentcuration/search/utils.py b/contentcuration/search/utils.py
index 9cb205bb47..4f6768f650 100644
--- a/contentcuration/search/utils.py
+++ b/contentcuration/search/utils.py
@@ -1,4 +1,9 @@
+from django.contrib.postgres.aggregates import StringAgg
 from django.contrib.postgres.search import SearchQuery
+from django.db.models import Value
+from search.constants import CHANNEL_KEYWORDS_TSVECTOR
+from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
+from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
 from search.constants import POSTGRES_FTS_CONFIG
 
 
@@ -7,3 +12,37 @@ def get_fts_search_query(value):
     Returns a `SearchQuery` with our postgres full text search config set on it.
     """
     return SearchQuery(value=value, config=POSTGRES_FTS_CONFIG)
+
+
+def get_fts_annotated_contentnode_qs(channel_id=None):
+    """
+    Returns a `ContentNode` queryset annotated with fields required for full text search.
+
+    If `channel_id` is provided, annotates that specific `channel_id` else annotates
+    the `channel_id` to which the contentnode belongs.
+    """
+    from contentcuration.models import ContentNode
+
+    if channel_id:
+        queryset = ContentNode.objects.annotate(channel_id=Value(channel_id))
+    else:
+        queryset = ContentNode._annotate_channel_id(ContentNode.objects)
+
+    queryset = queryset.annotate(
+        contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+        keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+        author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR
+    )
+
+    return queryset
+
+
+def get_fts_annotated_channel_qs():
+    """
+    Returns a `Channel` queryset annotated with fields required for full text search.
+    """
+    from contentcuration.models import Channel
+    from contentcuration.viewsets.channel import primary_token_subquery
+
+    return Channel.objects.annotate(primary_channel_token=primary_token_subquery,
+                                    keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index aa6c7fe9b7..58660fce5b 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -17,7 +17,7 @@
 
 from contentcuration.models import Channel
 from contentcuration.models import File
-from contentcuration.utils.pagination import CachedListPagination
+from contentcuration.utils.pagination import ValuesViewsetPageNumberPagination
 from contentcuration.viewsets.base import ReadOnlyValuesViewset
 from contentcuration.viewsets.base import RequiredFilterSet
 from contentcuration.viewsets.common import NotNullMapArrayAgg
@@ -25,7 +25,7 @@
 from contentcuration.viewsets.common import UUIDInFilter
 
 
-class ListPagination(CachedListPagination):
+class ListPagination(ValuesViewsetPageNumberPagination):
     page_size = 25
     page_size_query_param = "page_size"
     max_page_size = 100

From 9ec60cf200e8f445556e2d89f6eee1a2d9e1bb52 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 29 Sep 2022 00:09:07 +0530
Subject: [PATCH 24/25] fixes the strict update subquery, lightens it up

---
 contentcuration/contentcuration/utils/publish.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 017bf4a561..5b38d85576 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -845,18 +845,14 @@ def sync_contentnode_and_channel_tsvectors(channel_id):
 
         # Now, all remaining nodes are in main_tree, so let's update them.
         # Update only changed nodes.
-        changed_nodes_subquery = (get_fts_annotated_contentnode_qs(channel_id)
-                                  .filter(id=OuterRef("contentnode_id"),
-                                          tree_id=channel["main_tree__tree_id"],
-                                          complete=True,
-                                          changed=True)
-                                  .order_by())
-        ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).update(
-            keywords_tsvector=Subquery(changed_nodes_subquery.values_list("keywords_tsvector")[:1]),
-            author_tsvector=Subquery(changed_nodes_subquery.values_list("author_tsvector")[:1])
+        node_tsv_subquery = get_fts_annotated_contentnode_qs(channel_id).filter(id=OuterRef("contentnode_id")).order_by()
+        ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__complete=True, contentnode__changed=True).update(
+            keywords_tsvector=Subquery(node_tsv_subquery.values("keywords_tsvector")[:1]),
+            author_tsvector=Subquery(node_tsv_subquery.values("author_tsvector")[:1])
         )
 
     # Insert newly created nodes.
+    # "set_contentnode_tsvectors" command is defined in "search/management/commands" directory.
     call_command("set_contentnode_tsvectors",
                  "--channel-id={}".format(channel_id),
                  "--tree-id={}".format(channel["main_tree__tree_id"]),

From aae3be1891e635a8d87ab58814f0b625dbee4d77 Mon Sep 17 00:00:00 2001
From: Vivek Agrawal <vivekmittalagrawal@gmail.com>
Date: Thu, 29 Sep 2022 01:51:33 +0530
Subject: [PATCH 25/25] Do not output deleted channel nodes on search

---
 contentcuration/search/viewsets/contentnode.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
index 58660fce5b..676698031d 100644
--- a/contentcuration/search/viewsets/contentnode.py
+++ b/contentcuration/search/viewsets/contentnode.py
@@ -52,11 +52,11 @@ def filter_channel_list(self, queryset, name, value):
         if value == "public":
             channel_ids = Channel.get_public_channels().values_list("id", flat=True)
         elif value == "edit" and user:
-            channel_ids = user.editable_channels.values_list("id", flat=True)
+            channel_ids = user.editable_channels.filter(deleted=False).values_list("id", flat=True)
         elif value == "bookmark" and user:
-            channel_ids = user.bookmarked_channels.values_list("id", flat=True)
+            channel_ids = user.bookmarked_channels.filter(deleted=False).values_list("id", flat=True)
         elif value == "view" and user:
-            channel_ids = user.view_only_channels.values_list("id", flat=True)
+            channel_ids = user.view_only_channels.filter(deleted=False).values_list("id", flat=True)
 
         return queryset.filter(channel_id__in=list(channel_ids))