learningequality · bjester · Oct 7, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 15, 2022
diff --git a/...entcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue b/...entcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/ChannelList.vue
@@ -112,6 +112,7 @@
           [this.channelFilter]: true,
           page: this.$route.query.page || 1,
           exclude: this.currentChannelId,
+          published: true,
         }).then(page => {
           this.pageCount = page.total_pages;
           this.channels = page.results;

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
@@ -44,8 +44,6 @@
 from search.constants import CHANNEL_KEYWORDS_TSVECTOR
 from search.constants import CONTENTNODE_AUTHOR_TSVECTOR
 from search.constants import CONTENTNODE_KEYWORDS_TSVECTOR
-from search.constants import CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR
-from search.constants import CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR
 from search.models import ChannelFullTextSearch
 from search.models import ContentNodeFullTextSearch
 
@@ -819,62 +817,74 @@ def fill_published_fields(channel, version_notes):
     channel.save()
 
 
-def create_or_update_tsvectors(channel_id):
+def sync_contentnode_and_channel_tsvectors(channel_id):
     """
-    Create or update tsvectors for the channel and all its content nodes.
+    Creates, deletes and updates tsvectors of the channel and all its content nodes
+    to reflect the current state of channel's main tree.
     """
     # Update or create channel tsvector entry.
     logging.info("Starting to set tsvectors for channel with id {}.".format(channel_id))
 
     from contentcuration.viewsets.channel import primary_token_subquery
 
     channel = (ccmodels.Channel.objects
+               .filter(pk=channel_id)
                .annotate(primary_channel_token=primary_token_subquery,
                          keywords_tsvector=CHANNEL_KEYWORDS_TSVECTOR)
-               .get(pk=channel_id))
+               .values("keywords_tsvector", "main_tree__tree_id")
+               .get())
 
     if ChannelFullTextSearch.objects.filter(channel_id=channel_id).exists():
-        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel.keywords_tsvector)
+        update_count = ChannelFullTextSearch.objects.filter(channel_id=channel_id).update(keywords_tsvector=channel["keywords_tsvector"])
         logging.info("Updated {} channel tsvector.".format(update_count))
     else:
-        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel.keywords_tsvector)
+        obj = ChannelFullTextSearch(channel_id=channel_id, keywords_tsvector=channel["keywords_tsvector"])
         obj.save()
         logging.info("Created 1 channel tsvector.")
 
     # Update or create contentnodes tsvector entry for channel_id.
-    logging.info("Setting tsvectors for all contentnodes in channel {}.".format(channel_id))
+    logging.info("Starting to set tsvectors for all contentnodes in channel {}.".format(channel_id))
 
-    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
+    nodes_tsvector_query = (ccmodels.ContentNode.objects
+                            .filter(tree_id=channel["main_tree__tree_id"])
+                            .annotate(channel_id=Value(channel_id),
+                                      contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
+                                      keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
+                                      author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
+                            .order_by())
 
+    if ContentNodeFullTextSearch.objects.filter(channel_id=channel_id).exists():
         # First, delete nodes that are no longer in main_tree.
-        nodes_no_longer_in_main_tree = ~Exists(channel.main_tree.get_family().filter(id=OuterRef("contentnode_id")))
+        nodes_no_longer_in_main_tree = ~Exists(ccmodels.ContentNode.objects.filter(id=OuterRef("contentnode_id"), tree_id=channel["main_tree__tree_id"]))
         ContentNodeFullTextSearch.objects.filter(nodes_no_longer_in_main_tree, channel_id=channel_id).delete()
 
         # Now, all remaining nodes are in main_tree, so let's update them.
-        update_count = (ContentNodeFullTextSearch.objects.filter(channel_id=channel_id)
-                        .annotate(contentnode_tags=StringAgg("tags__tag_name", delimiter=" "))
-                        .update(keywords_tsvector=CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR, author_tsvector=CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR))
+        # Update only changed nodes.
+        nodes_to_update = ContentNodeFullTextSearch.objects.filter(channel_id=channel_id, contentnode__changed=True)
+
+        update_objs = list()
+        for node in nodes_to_update:
+            corresponding_contentnode = nodes_tsvector_query.filter(pk=node.contentnode_id).values("keywords_tsvector", "author_tsvector").first()
+            if corresponding_contentnode:
+                node.keywords_tsvector = corresponding_contentnode["keywords_tsvector"]
+                node.author_tsvector = corresponding_contentnode["author_tsvector"]
+                update_objs.append(node)
+        ContentNodeFullTextSearch.objects.bulk_update(update_objs, ["keywords_tsvector", "author_tsvector"])
+        del update_objs
 
     # Insert newly created nodes.
-    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id")))
-    nodes_to_insert = (channel.main_tree.get_family()
+    nodes_not_having_tsvector_record = ~Exists(ContentNodeFullTextSearch.objects.filter(contentnode_id=OuterRef("id"), channel_id=channel_id))
+    nodes_to_insert = (nodes_tsvector_query
                        .filter(nodes_not_having_tsvector_record)
-                       .annotate(channel_id=Value(channel_id),
-                                 contentnode_tags=StringAgg("tags__tag_name", delimiter=" "),
-                                 keywords_tsvector=CONTENTNODE_KEYWORDS_TSVECTOR,
-                                 author_tsvector=CONTENTNODE_AUTHOR_TSVECTOR)
                        .values("id", "channel_id", "keywords_tsvector", "author_tsvector"))
 
     insert_objs = list()
-
     for node in nodes_to_insert:
         obj = ContentNodeFullTextSearch(contentnode_id=node["id"], channel_id=node["channel_id"],
                                         keywords_tsvector=node["keywords_tsvector"], author_tsvector=node["author_tsvector"])
         insert_objs.append(obj)
-
     inserted_nodes_list = ContentNodeFullTextSearch.objects.bulk_create(insert_objs)
-
-    logging.info("Successfully inserted {} and updated {} contentnode tsvectors.".format(len(inserted_nodes_list), update_count))
+    logging.info("Successfully inserted {} contentnode tsvectors.".format(len(inserted_nodes_list)))
 
 
 @delay_user_storage_calculation
@@ -887,8 +897,6 @@ def publish_channel(
     send_email=False,
     progress_tracker=None,
     language=settings.LANGUAGE_CODE,
-
-
 ):
     """
     :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None
@@ -900,8 +908,9 @@ def publish_channel(
         set_channel_icon_encoding(channel)
         kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker)
         increment_channel_version(channel)
-        mark_all_nodes_as_published(channel)
         add_tokens_to_channel(channel)
+        sync_contentnode_and_channel_tsvectors(channel_id=channel.id)
+        mark_all_nodes_as_published(channel)
         fill_published_fields(channel, version_notes)
 
         # Attributes not getting set for some reason, so just save it here
@@ -914,10 +923,6 @@ def publish_channel(
         if channel.public:
             delete_public_channel_cache_keys()
 
-        # Enqueue tsvector task to update or create channel tsvectors and all its
-        # contentnodes tsvector entries.
-        create_or_update_tsvectors(channel_id=channel_id)
-
         if send_email:
             with override(language):
                 send_emails(channel, user_id, version_notes=version_notes)

diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py
@@ -122,10 +122,13 @@ def filter_deleted(self, queryset, name, value):
         return queryset.filter(deleted=value)
 
     def filter_keywords(self, queryset, name, value):
+        search_query = get_fts_search_query(value)
+        dash_replaced_search_query = get_fts_search_query(value.replace("-", ""))
+
         channel_keywords_query = (Exists(ChannelFullTextSearch.objects.filter(
-            keywords_tsvector=get_fts_search_query(value.replace("-", "")), channel_id=OuterRef("id"))))
+            Q(keywords_tsvector=search_query) | Q(keywords_tsvector=dash_replaced_search_query), channel_id=OuterRef("id"))))
         contentnode_search_query = (Exists(ContentNodeFullTextSearch.objects.filter(
-            Q(keywords_tsvector=get_fts_search_query(value)) | Q(author_tsvector=get_fts_search_query(value)), channel_id=OuterRef("id"))))
+            Q(keywords_tsvector=search_query) | Q(author_tsvector=search_query), channel_id=OuterRef("id"))))
 
         return queryset.filter(Q(channel_keywords_query) | Q(contentnode_search_query))
 

diff --git a/contentcuration/search/constants.py b/contentcuration/search/constants.py
@@ -11,13 +11,6 @@
 CONTENTNODE_AUTHOR_TSVECTOR_FIELDS = ("author", "aggregator", "provider")
 CONTENTNODE_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
 
-CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS = ("contentnode__id", "channel_id", "contentnode__node_id", "contentnode__content_id",
-                                                 "contentnode__tree_id", "contentnode__title", "contentnode__description", "contentnode_tags")
-CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
-CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS = ("contentnode__author", "contentnode__aggregator", "contentnode__provider")
-CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR = SearchVector(*CONTENTNODE_PREFIXED_AUTHOR_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
-
 # Channel vector and search fields.
 CHANNEL_KEYWORDS_TSVECTOR_FIELDS = ("id", "main_tree__tree_id", "name", "description", "tagline", "primary_channel_token")
 CHANNEL_KEYWORDS_TSVECTOR = SearchVector(*CHANNEL_KEYWORDS_TSVECTOR_FIELDS, config=POSTGRES_FTS_CONFIG)
diff --git a/contentcuration/search/viewsets/contentnode.py b/contentcuration/search/viewsets/contentnode.py
@@ -49,19 +49,22 @@ class ContentNodeFilter(RequiredFilterSet):
     def filter_channel_list(self, queryset, name, value):
         user = not self.request.user.is_anonymous and self.request.user
         channel_ids = []
+
         if value == "public":
-            channel_ids = Channel.objects.filter(public=True, deleted=False, main_tree__published=True).values_list("id", flat=True)
+            channel_ids = Channel.get_public_channels().values_list("id", flat=True)
         elif value == "edit" and user:
-            channel_ids = user.editable_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.editable_channels.values_list("id", flat=True)
         elif value == "bookmark" and user:
-            channel_ids = user.bookmarked_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.bookmarked_channels.values_list("id", flat=True)
         elif value == "view" and user:
-            channel_ids = user.view_only_channels.filter(main_tree__published=True).values_list("id", flat=True)
+            channel_ids = user.view_only_channels.values_list("id", flat=True)
+
         return queryset.filter(channel_id__in=list(channel_ids))
 
     def filter_keywords(self, queryset, name, value):
-        return queryset.filter(Q(keywords_tsvector=get_fts_search_query(value))
-                               | Q(author_tsvector=get_fts_search_query(value)))
+        search_query = get_fts_search_query(value)
+        return queryset.filter(Q(keywords_tsvector=search_query)
+                               | Q(author_tsvector=search_query))
 
     def filter_author(self, queryset, name, value):
         return queryset.filter(author_tsvector=get_fts_search_query(value))
@@ -98,6 +101,7 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset):
     filterset_class = ContentNodeFilter
     pagination_class = ListPagination
     permission_classes = [IsAuthenticated]
+    queryset = ContentNodeFullTextSearch.objects.all()
 
     field_map = {
         "id": "contentnode__id",
@@ -137,12 +141,9 @@ class SearchContentNodeViewSet(ReadOnlyValuesViewset):
         "original_channel_name",
     )
 
-    def get_queryset(self):
-        return ContentNodeFullTextSearch.objects.select_related("contentnode")
-
     def annotate_queryset(self, queryset):
         """
-        Annotates thumbnails, resources count and channel name.
+        Annotates thumbnails, resources count and original channel name.
         """
         thumbnails = File.objects.filter(
             contentnode=OuterRef("contentnode__id"), preset__thumbnail=True