learningequality · bjester · Oct 7, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 15, 2022
diff --git a/contentcuration/contentcuration/debug/middleware.py b/contentcuration/contentcuration/debug/middleware.py
diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py
@@ -1,8 +1,13 @@
 from .dev_settings import *  # noqa
 
-# These endpoints will throw an error on the django debug panel
+# These endpoints will throw an error on the django debug panel.
 EXCLUDED_DEBUG_URLS = [
     "/content/storage",
+
+    # Disabling sync API because as soon as the sync API gets polled
+    # the current request data gets overwritten.
+    # Can be removed after websockets deployment.
+    "/api/sync",
 ]
 
 DEBUG_PANEL_ACTIVE = True
@@ -14,10 +19,10 @@ def custom_show_toolbar(request):
     )  # noqa F405
 
 
-# if debug_panel exists, add it to our INSTALLED_APPS
+# if debug_panel exists, add it to our INSTALLED_APPS.
 INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler")  # noqa F405
 MIDDLEWARE += (  # noqa F405
-    "contentcuration.debug.middleware.CustomDebugPanelMiddleware",
+    "debug_toolbar.middleware.DebugToolbarMiddleware",
 )
 DEBUG_TOOLBAR_CONFIG = {
     "SHOW_TOOLBAR_CALLBACK": custom_show_toolbar,

diff --git a/...ntcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/...ntcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
@@ -160,13 +160,8 @@
         }
         return this.$tr('resourcesCount', { count });
       },
-      numLocations() {
-        return this.node.location_ids.length;
-      },
       goToLocationLabel() {
-        return this.numLocations > 1
-          ? this.$tr('goToPluralLocationsAction', { count: this.numLocations })
-          : this.$tr('goToSingleLocationAction');
+        return this.$tr('goToSingleLocationAction');
       },
       isTopic() {
         return this.node.kind === ContentKindsNames.TOPIC;
@@ -189,8 +184,6 @@
     $trs: {
       tagsList: 'Tags: {tags}',
       goToSingleLocationAction: 'Go to location',
-      goToPluralLocationsAction:
-        'In {count, number} {count, plural, one {location} other {locations}}',
       addToClipboardAction: 'Copy to clipboard',
       resourcesCount: '{count, number} {count, plural, one {resource} other {resources}}',
       coach: 'Resource for coaches',

diff --git a/...entcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue b/...entcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue
@@ -112,6 +112,7 @@
           [this.channelFilter]: true,
           page: this.$route.query.page || 1,
           exclude: this.currentChannelId,
+          published: true,
         }).then(page => {
           this.pageCount = page.total_pages;
           this.channels = page.results;

diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
@@ -8,7 +8,6 @@
 from datetime import datetime
 
 import pytz
-from celery import states
 from django.conf import settings
 from django.contrib.auth.base_user import AbstractBaseUser
 from django.contrib.auth.base_user import BaseUserManager

diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
@@ -85,6 +85,7 @@
     'webpack_loader',
     'django_filters',
     'mathfilters',
+    'django.contrib.postgres',
     'django_celery_results',
 )
 
@@ -220,7 +221,6 @@
 
 IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False
 
-
 DATABASE_ROUTERS = [
     "kolibri_content.router.ContentDBRouter",
 ]

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
@@ -14,13 +14,17 @@
 from itertools import chain
 
 from django.conf import settings
+from django.contrib.postgres.aggregates import StringAgg
 from django.core.files import File
 from django.core.files.storage import default_storage as storage
 from django.core.management import call_command
 from django.db.models import Count
+from django.db.models import Exists
 from django.db.models import Max
+from django.db.models import OuterRef
 from django.db.models import Q
 from django.db.models import Sum
+from django.db.models import Value
 from django.db.utils import IntegrityError
 from django.template.loader import render_to_string
 from django.utils import timezone
@@ -37,6 +41,11 @@
 from le_utils.constants import roles
 from past.builtins import basestring
 from past.utils import old_div
+from search.constants import CHANNEL_KEYWORDS_TSVECTOR
+from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
+from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
+from search.models import ChannelFullTextSearch
+from search.models import ContentNodeFullTextSearch
 
 from contentcuration import models as ccmodels
 from contentcuration.decorators import delay_user_storage_calculation
@@ -808,6 +817,76 @@ def fill_published_fields(channel, version_notes):
     channel.save()
 
 
+def sync_contentnode_and_channel_tsvectors(channel_id):
+    """
+    Creates, deletes and updates tsvectors of the channel and all its content nodes
+    to reflect the current state of channel's main tree.
+    """
+    # Update or create channel tsvector entry.
+    logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id))
+
+    from contentcuration.viewsets.channel import primary_token_subquery
+
+    channel = (ccmodels.Channel.objects
+               .filter(pk=channel_id)
+               .annotate(primary_channel_token=primary_token_subquery,
+                         keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+               .values("keywords_tsvector", "main_tree__tree_id")
+               .get())
+
+    if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists():
+        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"])
+        logging.info("Updated {} channel tsvector.".format(update_count))
+    else:
+        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"])
+        obj.save()
+        logging.info("Created 1 channel tsvector.")
+
+    # Update or create contentnodes tsvector entry for channel_id.
+    logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id))
+
+    nodes_tsvector_query = (ccmodels.ContentNode.objects
+                            .filter(tree_id=channel["main_tree__tree_id"])
+                            .annotate(channel_id=Value(channel_id),
+                                      contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+                                      keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+                                      author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
+                            .order_by())
+
+    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
+        # First, delete nodes that are no longer in main_tree.
+        nodes_no_longer_in_main_tree = ~Exists(ccmodels.ContentNode.objects.filter(id=OuterRef("contentnode_id"), tree_id=channel["main_tree__tree_id"]))
+        ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete()
+
+        # Now, all remaining nodes are in main_tree, so let's update them.
+        # Update only changed nodes.
+        nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True)
+
+        update_objs = list()
+        for node in nodes_to_update:
+            corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first()
+            if corresponding_contentnode:
+                node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"]
+                node.author_tsvector = corresponding_contentnode["author_tsvector"]
+                update_objs.append(node)
+        ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"])
+        del update_objs
+
+    # Insert newly created nodes.
+    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id))
+    nodes_to_insert = (nodes_tsvector_query
+                       .filter(nodes_not_having_tsvector_record)
+                       .values("id", "channel_id", "keywords_tsvector", "author_tsvector"))
+
+    insert_objs = list()
+    for node in nodes_to_insert:
+        obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
+                                        keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
+        insert_objs.append(obj)
+    inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
+    logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list)))
+
+
 @delay_user_storage_calculation
 def publish_channel(
     user_id,
@@ -829,8 +908,9 @@ def publish_channel(
         set_channel_icon_encoding(channel)
         kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker)
         increment_channel_version(channel)
-        mark_all_nodes_as_published(channel)
         add_tokens_to_channel(channel)
+        sync_contentnode_and_channel_tsvectors(channel_id=channel.id)
+        mark_all_nodes_as_published(channel)
         fill_published_fields(channel, version_notes)
 
         # Attributes not getting set for some reason, so just save it here

diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py
@@ -24,6 +24,9 @@
 from rest_framework.serializers import CharField
 from rest_framework.serializers import FloatField
 from rest_framework.serializers import IntegerField
+from search.models import ChannelFullTextSearch
+from search.models import ContentNodeFullTextSearch
+from search.utils import get_fts_search_query
 
 from contentcuration.decorators import cache_no_user_data
 from contentcuration.models import Change
@@ -119,23 +122,15 @@ def filter_deleted(self, queryset, name, value):
         return queryset.filter(deleted=value)
 
     def filter_keywords(self, queryset, name, value):
-        # TODO: Wait until we show more metadata on cards to add this back in
-        # keywords_query = self.main_tree_query.filter(
-        #     Q(tags__tag_name__icontains=value)
-        #     | Q(author__icontains=value)
-        #     | Q(aggregator__icontains=value)
-        #     | Q(provider__icontains=value)
-        # )
-        return queryset.annotate(
-            # keyword_match_count=SQCount(keywords_query, field="content_id"),
-            primary_token=primary_token_subquery,
-        ).filter(
-            Q(name__icontains=value)
-            | Q(description__icontains=value)
-            | Q(pk__istartswith=value)
-            | Q(primary_token=value.replace("-", ""))
-            # | Q(keyword_match_count__gt=0)
-        )
+        search_query = get_fts_search_query(value)
+        dash_replaced_search_query = get_fts_search_query(value.replace("-", ""))
+
+        channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter(
+            Q(keywords_tsvector=search_query) | Q(keywords_tsvector=dash_replaced_search_query), channel_id=OuterRef("id"))))
+        contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter(
+            Q(keywords_tsvector=search_query) | Q(author_tsvector=search_query), channel_id=OuterRef("id"))))
+
+        return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query))
 
     def filter_languages(self, queryset, name, value):
         languages = value.split(",")

diff --git a/contentcuration/search/admin.py b/contentcuration/search/admin.py
diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py
@@ -0,0 +1,16 @@
+from django.contrib.postgres.search import SearchVector
+
+# Postgres full text search configuration. We use "simple" to make search
+# language agnostic.
+POSTGRES_FTS_CONFIG = "simple"
+
+# ContentNode vectors and search fields.
+CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS = ("id", "channel_id", "node_id", "content_id", "tree_id", "title", "description", "contentnode_tags")
+CONTENTNODE_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
+CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
+
+# Channel vector and search fields.
+CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
+CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
diff --git a/...uration/contentcuration/debug/__init__.py → ...entcuration/search/management/__init__.py b/...uration/contentcuration/debug/__init__.py → ...entcuration/search/management/__init__.py
diff --git a/contentcuration/search/management/commands/__init__.py b/contentcuration/search/management/commands/__init__.py
diff --git a/contentcuration/search/management/commands/set_channel_tsvectors.py b/contentcuration/search/management/commands/set_channel_tsvectors.py
@@ -0,0 +1,57 @@
+"""
+This command inserts in bulk channel tsvectors to the ChannelFullTextSearch table.
+"""
+import logging as logmodule
+import time
+
+from django.core.management.base import BaseCommand
+from django.db.models import Exists
+from django.db.models import OuterRef
+from search.constants import CHANNEL_KEYWORDS_TSVECTOR
+from search.models import ChannelFullTextSearch
+
+from contentcuration.models import Channel
+from contentcuration.viewsets.channel import primary_token_subquery
+
+
+logmodule.basicConfig(level=logmodule.INFO)
+logging = logmodule.getLogger("command")
+
+CHUNKSIZE = 5000
+
+
+class Command(BaseCommand):
+
+    def handle(self, *args, **options):
+        start = time.time()
+
+        channel_not_already_inserted_query = ~Exists(ChannelFullTextSearch.objects.filter(channel_id=OuterRef("id")))
+
+        channel_query = (Channel.objects.select_related("main_tree")
+                         .filter(channel_not_already_inserted_query, deleted=False, main_tree__published=True)
+                         .annotate(primary_channel_token=primary_token_subquery,
+                                   keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
+                         .values("id", "keywords_tsvector"))
+
+        insertable_channels = list(channel_query[:CHUNKSIZE])
+        total_channel_tsvectors_inserted = 0
+
+        while insertable_channels:
+            logging.info("Inserting channel tsvectors.")
+
+            insert_objs = list()
+            for channel in insertable_channels:
+                obj = ChannelFullTextSearch(channel_id=channel["id"], keywords_tsvector=channel["keywords_tsvector"])
+                insert_objs.append(obj)
+
+            inserted_objs_list = ChannelFullTextSearch.objects.bulk_create(insert_objs)
+
+            current_inserts_count = len(inserted_objs_list)
+            total_channel_tsvectors_inserted = total_channel_tsvectors_inserted + current_inserts_count
+
+            logging.info("Inserted {} channel tsvectors.".format(current_inserts_count))
+
+            insertable_channels = list(channel_query[:CHUNKSIZE])
+
+        logging.info("Completed! successfully inserted total of {} channel tsvectors in {} seconds.".format(
+            total_channel_tsvectors_inserted, time.time() - start))