learningequality · bjester · Oct 7, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 15, 2022
diff --git a/contentcuration/contentcuration/debug/middleware.py b/contentcuration/contentcuration/debug/middleware.py
diff --git a/contentcuration/contentcuration/debug_panel_settings.py b/contentcuration/contentcuration/debug_panel_settings.py
@@ -1,8 +1,12 @@
 from .dev_settings import *  # noqa
 
-# These endpoints will throw an error on the django debug panel
+# These endpoints will throw an error on the django debug panel.
 EXCLUDED_DEBUG_URLS = [
     "/content/storage",
+
+    # Disabling task API because as soon as the task API gets polled
+    # the current request data gets overwritten.
+    "/api/task",
 ]
 
 DEBUG_PANEL_ACTIVE = True
@@ -14,10 +18,10 @@ def custom_show_toolbar(request):
     )  # noqa F405
 
 
-# if debug_panel exists, add it to our INSTALLED_APPS
+# if debug_panel exists, add it to our INSTALLED_APPS.
 INSTALLED_APPS += ("debug_panel", "debug_toolbar", "pympler")  # noqa F405
 MIDDLEWARE += (  # noqa F405
-    "contentcuration.debug.middleware.CustomDebugPanelMiddleware",
+    "debug_toolbar.middleware.DebugToolbarMiddleware",
 )
 DEBUG_TOOLBAR_CONFIG = {
     "SHOW_TOOLBAR_CALLBACK": custom_show_toolbar,

diff --git a/...ntcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue b/...ntcuration/contentcuration/frontend/channelEdit/views/ImportFromChannels/BrowsingCard.vue
@@ -160,13 +160,8 @@
         }
         return this.$tr('resourcesCount', { count });
       },
-      numLocations() {
-        return this.node.location_ids.length;
-      },
       goToLocationLabel() {
-        return this.numLocations > 1
-          ? this.$tr('goToPluralLocationsAction', { count: this.numLocations })
-          : this.$tr('goToSingleLocationAction');
+        return this.$tr('goToSingleLocationAction');
       },
       isTopic() {
         return this.node.kind === ContentKindsNames.TOPIC;
@@ -189,8 +184,6 @@
     $trs: {
       tagsList: 'Tags: {tags}',
       goToSingleLocationAction: 'Go to location',
-      goToPluralLocationsAction:
-        'In {count, number} {count, plural, one {location} other {locations}}',
       addToClipboardAction: 'Copy to clipboard',
       resourcesCount: '{count, number} {count, plural, one {resource} other {resources}}',
       coach: 'Resource for coaches',

diff --git a/contentcuration/contentcuration/management/commands/set_tsvectors.py b/contentcuration/contentcuration/management/commands/set_tsvectors.py
@@ -0,0 +1,71 @@
+"""
+This command sets tsvector in title_description_search_vector field in batches.
+The batches are created on the basis of channel_id. This enables resumption. Also helps
+in cases of failure or memory overflow.
+"""
+import logging as logmodule
+
+from django.core.cache import cache
+from django.core.management.base import BaseCommand
+
+from contentcuration.models import Channel
+from contentcuration.models import ContentNode
+from contentcuration.models import POSTGRES_SEARCH_VECTOR
+
+
+logmodule.basicConfig(level=logmodule.INFO)
+logging = logmodule.getLogger(__name__)
+
+
+UPDATED_TS_VECTORS_CACHE_KEY = "tsvectors_updated_for_channel_ids"
+UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY = "tsvectors_updated_for_null_channels"
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--public",
+            action="store_true",
+            help="Set tsvector for only the public channel nodes instead of all nodes.",
+        )
+        parser.add_argument(
+            "--no-cache",
+            action="store_true",
+            help="Disables the cache. This updates all previously updated nodes.",
+        )
+
+    def handle(self, *args, **options):
+        if options["no_cache"]:
+            updated_channel_ids = []
+            do_update_nodes_with_null_channel_id = True
+        else:
+            updated_channel_ids = [] if cache.get(UPDATED_TS_VECTORS_CACHE_KEY) is None else cache.get(UPDATED_TS_VECTORS_CACHE_KEY)
+            do_update_nodes_with_null_channel_id = not cache.get(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY)
+
+        if options["public"]:
+            to_update_channel_ids = list(Channel.get_public_channels().exclude(id__in=updated_channel_ids).values_list("id", flat=True))
+            do_update_nodes_with_null_channel_id = False
+            logging.info("Started setting tsvector for public channel nodes.")
+        else:
+            to_update_channel_ids = list(Channel.objects.exclude(id__in=updated_channel_ids).values_list("id", flat=True))
+            logging.info("Started setting tsvector for all nodes.")
+
+        annotated_contentnode_qs = ContentNode._annotate_channel_id(ContentNode.objects)
+
+        for channel_id in to_update_channel_ids:
+            logging.info("Setting tsvector for nodes of channel {}.".format(channel_id))
+            annotated_contentnode_qs.filter(channel_id=channel_id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+            updated_channel_ids.append(channel_id)
+            cache.set(UPDATED_TS_VECTORS_CACHE_KEY, updated_channel_ids, None)
+            logging.info("Finished setting tsvector for nodes of channel {}.".format(channel_id))
+
+        if do_update_nodes_with_null_channel_id:
+            logging.info("Setting tsvector for nodes with NULL channel_id.")
+            annotated_contentnode_qs.filter(channel_id__isnull=True).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+            cache.set(UPDATED_TS_VECTORS_FOR_NULL_CHANNEL_CACHE_KEY, True, None)
+            logging.info("Finished setting tsvector for nodes with NULL channel_id.")
+
+        if options["public"]:
+            logging.info("Finished setting tsvector for public channel nodes.")
+        else:
+            logging.info("Finished setting tsvector for all nodes.")
diff --git a/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py b/contentcuration/contentcuration/migrations/0141_contentnode_search_vector.py
@@ -0,0 +1,34 @@
+# Generated by Django 3.2.13 on 2022-08-10 19:20
+import django.contrib.postgres.indexes
+import django.contrib.postgres.search
+from django.contrib.postgres.operations import AddIndexConcurrently
+from django.contrib.postgres.operations import TrigramExtension
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    atomic = False
+
+    dependencies = [
+        ('contentcuration', '0140_delete_task'),
+    ]
+
+    operations = [
+        # Installs the pg_trgm module that comes pre-bundled with PostgreSQL 9.6.
+        TrigramExtension(),
+
+        migrations.AddField(
+            model_name='contentnode',
+            name='title_description_search_vector',
+            field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True),
+        ),
+        AddIndexConcurrently(
+            model_name='contentnode',
+            index=django.contrib.postgres.indexes.GinIndex(fields=['title_description_search_vector'], name='node_search_vector_gin_idx'),
+        ),
+        AddIndexConcurrently(
+            model_name='contenttag',
+            index=django.contrib.postgres.indexes.GistIndex(fields=['tag_name'], name='contenttag_tag_name_gist_idx', opclasses=['gist_trgm_ops']),
+        ),
+    ]
diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
@@ -8,11 +8,15 @@
 from datetime import datetime
 
 import pytz
-from celery import states
 from django.conf import settings
 from django.contrib.auth.base_user import AbstractBaseUser
 from django.contrib.auth.base_user import BaseUserManager
 from django.contrib.auth.models import PermissionsMixin
+from django.contrib.postgres.indexes import GinIndex
+from django.contrib.postgres.indexes import GistIndex
+from django.contrib.postgres.search import SearchQuery
+from django.contrib.postgres.search import SearchVector
+from django.contrib.postgres.search import SearchVectorField
 from django.contrib.sessions.models import Session
 from django.core.cache import cache
 from django.core.exceptions import MultipleObjectsReturned
@@ -1093,6 +1097,9 @@ def delete(self, *args, **kwargs):
             self.secret_token.delete()
 
 
+CONTENT_TAG_NAME__INDEX_NAME = "contenttag_tag_name_gist_idx"
+
+
 class ContentTag(models.Model):
     id = UUIDField(primary_key=True, default=uuid.uuid4)
     tag_name = models.CharField(max_length=50)
@@ -1104,6 +1111,7 @@ def __str__(self):
 
     class Meta:
         unique_together = ['tag_name', 'channel']
+        indexes = [GistIndex(fields=["tag_name"], name=CONTENT_TAG_NAME__INDEX_NAME, opclasses=["gist_trgm_ops"])]
 
 
 def delegate_manager(method):
@@ -1147,6 +1155,12 @@ def __str__(self):
 NODE_ID_INDEX_NAME = "node_id_idx"
 NODE_MODIFIED_INDEX_NAME = "node_modified_idx"
 NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx"
+NODE_SEARCH_VECTOR_GIN_INDEX_NAME = "node_search_vector_gin_idx"
+
+# Ours postgres full text search configuration.
+POSTGRES_FTS_CONFIG = "simple"
+# Search vector to create tsvector of title and description concatenated.
+POSTGRES_SEARCH_VECTOR = SearchVector("title", "description", config=POSTGRES_FTS_CONFIG)
 CONTENTNODE_TREE_ID_CACHE_KEY = "contentnode_{pk}__tree_id"
 
 
@@ -1243,6 +1257,10 @@ class ContentNode(MPTTModel, models.Model):
     # this duration should be in seconds.
     suggested_duration = models.IntegerField(blank=True, null=True, help_text="Suggested duration for the content node (in seconds)")
 
+    # A field to store the ts_vector form of (title + ' ' + description).
+    # This significantly increases the search performance.
+    title_description_search_vector = SearchVectorField(blank=True, null=True)
+
     objects = CustomContentNodeTreeManager()
 
     # Track all updates and ignore a blacklist of attributes
@@ -1339,6 +1357,12 @@ def filter_view_queryset(cls, queryset, user):
             | Q(public=True)
         )
 
+    @classmethod
+    def search(self, queryset, search_term):
+        search_query = Q(title_description_search_vector=SearchQuery(value=search_term, config=POSTGRES_FTS_CONFIG, search_type="plain"))
+        tags_query = Q(tags__tag_name__icontains=search_term)
+        return queryset.filter(search_query | tags_query)
+
     @raise_if_unsaved
     def get_root(self):
         # Only topics can be root nodes
@@ -1822,8 +1846,10 @@ def set_default_learning_activity(self):
 
     def save(self, skip_lock=False, *args, **kwargs):
         if self._state.adding:
+            is_create = True
             self.on_create()
         else:
+            is_create = False
             self.on_update()
 
         # Logic borrowed from mptt - do a simple check to see if we have changed
@@ -1867,6 +1893,9 @@ def save(self, skip_lock=False, *args, **kwargs):
             if changed_ids:
                 ContentNode.objects.filter(id__in=changed_ids).update(changed=True)
 
+        if is_create:
+            ContentNode.filter_by_pk(pk=self.id).update(title_description_search_vector=POSTGRES_SEARCH_VECTOR)
+
     # Copied from MPTT
     save.alters_data = True
 
@@ -1909,6 +1938,7 @@ class Meta:
         indexes = [
             models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME),
             models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME),
+            GinIndex(fields=["title_description_search_vector"], name=NODE_SEARCH_VECTOR_GIN_INDEX_NAME),
         ]
 
 

diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py
@@ -86,6 +86,7 @@
     'webpack_loader',
     'django_filters',
     'mathfilters',
+    'django.contrib.postgres',
     'django_celery_results',
 )
 
@@ -221,7 +222,6 @@
 
 IS_CONTENTNODE_TABLE_PARTITIONED = os.getenv("IS_CONTENTNODE_TABLE_PARTITIONED") or False
 
-
 DATABASE_ROUTERS = [
     "kolibri_content.router.ContentDBRouter",
 ]

diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
@@ -195,21 +195,23 @@ def node(data, parent=None):
     return new_node
 
 
-def tree(parent=None):
+def tree(parent=None, tree_data=None):
     # Read from json fixture
-    filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
-    with open(filepath, "rb") as jsonfile:
-        data = json.load(jsonfile)
+    if tree_data is None:
+        filepath = os.path.sep.join([os.path.dirname(__file__), "fixtures", "tree.json"])
+        with open(filepath, "rb") as jsonfile:
+            tree_data = json.load(jsonfile)
 
-    return node(data, parent)
+    return node(tree_data, parent)
 
 
-def channel(name="testchannel"):
+def channel(name="testchannel", create_main_tree=True, main_tree_data=None):
     channel = cc.Channel.objects.create(name=name)
     channel.save()
 
-    channel.main_tree = tree()
-    channel.save()
+    if create_main_tree:
+        channel.main_tree = tree(tree_data=main_tree_data)
+        channel.save()
 
     return channel
 

diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
@@ -190,6 +190,10 @@ def queue_get_return_none_when_empty():
             logging.debug("Mapping node with id {id}".format(
                 id=node.pk))
 
+            # Update tsvector for this node.
+            node.title_description_search_vector = ccmodels.POSTGRES_SEARCH_VECTOR
+            node.save(update_fields=["title_description_search_vector"])
+
             if node.get_descendants(include_self=True).exclude(kind_id=content_kinds.TOPIC).exists() and node.complete:
                 children = (node.children.all())
                 node_queue.extend(children)

diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py
@@ -633,7 +633,8 @@ def delete_from_changes(self, changes):
 
 
 def dict_if_none(obj, field_name=None):
-    return obj[field_name] if obj[field_name] else {}
+    value = obj.get(field_name)
+    return value if value else {}
 
 
 # Apply mixin first to override ValuesViewset