diff --git a/kolibri/core/content/contentschema/versions/content_schema_current.py b/kolibri/core/content/contentschema/versions/content_schema_current.py index f3db9a66f42..939bb253929 100644 --- a/kolibri/core/content/contentschema/versions/content_schema_current.py +++ b/kolibri/core/content/contentschema/versions/content_schema_current.py @@ -2,9 +2,11 @@ from sqlalchemy import BigInteger from sqlalchemy import Boolean from sqlalchemy import CHAR +from sqlalchemy import CheckConstraint from sqlalchemy import Column from sqlalchemy import Float from sqlalchemy import ForeignKey +from sqlalchemy import ForeignKeyConstraint from sqlalchemy import Index from sqlalchemy import Integer from sqlalchemy import String @@ -45,6 +47,23 @@ class ContentLocalfile(Base): class ContentContentnode(Base): __tablename__ = "content_contentnode" __table_args__ = ( + CheckConstraint("lft >= 0"), + CheckConstraint("tree_id >= 0"), + CheckConstraint("level >= 0"), + CheckConstraint("duration >= 0"), + CheckConstraint("rght >= 0"), + ForeignKeyConstraint( + ["lang_id"], + ["content_language.id"], + deferrable=True, + initially="DEFERRED", + ), + ForeignKeyConstraint( + ["parent_id"], + ["content_contentnode.id"], + deferrable=True, + initially="DEFERRED", + ), Index( "content_contentnode_level_channel_id_available_29f0bb18_idx", "level", @@ -69,11 +88,10 @@ class ContentContentnode(Base): author = Column(String(200), nullable=False) kind = Column(String(200), nullable=False) available = Column(Boolean, nullable=False) - lft = Column(Integer, nullable=False, index=True) - rght = Column(Integer, nullable=False, index=True) + lft = Column(Integer, nullable=False) tree_id = Column(Integer, nullable=False, index=True) - level = Column(Integer, nullable=False, index=True) - lang_id = Column(ForeignKey("content_language.id"), index=True) + level = Column(Integer, nullable=False) + lang_id = Column(String(14), index=True) license_description = Column(Text) license_name = Column(String(50)) coach_content = Column(Boolean, nullable=False) @@ -94,7 +112,8 @@ class ContentContentnode(Base): learning_activities_bitmask_0 = Column(BigInteger) ancestors = Column(Text) admin_imported = Column(Boolean) - parent_id = Column(ForeignKey("content_contentnode.id"), index=True) + rght = Column(Integer, nullable=False) + parent_id = Column(CHAR(32), index=True) lang = relationship("ContentLanguage") parent = relationship("ContentContentnode", remote_side=[id]) @@ -118,6 +137,13 @@ class ContentAssessmentmetadata(Base): class ContentChannelmetadata(Base): __tablename__ = "content_channelmetadata" + __table_args__ = ( + CheckConstraint('"order" >= 0'), + ForeignKeyConstraint( + ["root_id"], + ["content_contentnode.id"], + ), + ) id = Column(CHAR(32), primary_key=True) name = Column(String(200), nullable=False) @@ -127,13 +153,15 @@ class ContentChannelmetadata(Base): thumbnail = Column(Text, nullable=False) last_updated = Column(String) min_schema_version = Column(String(50), nullable=False) - root_id = Column(ForeignKey("content_contentnode.id"), nullable=False, index=True) + root_id = Column(CHAR(32), nullable=False, index=True) published_size = Column(BigInteger) total_resource_count = Column(Integer) order = Column(Integer) public = Column(Boolean) tagline = Column(String(150)) partial = Column(Boolean) + included_categories = Column(Text) + included_grade_levels = Column(Text) root = relationship("ContentContentnode") @@ -242,12 +270,21 @@ class ContentFile(Base): class ContentChannelmetadataIncludedLanguages(Base): __tablename__ = "content_channelmetadata_included_languages" + __table_args__ = ( + Index( + "content_channelmetadata_included_languages_channelmetadata_id_language_id_51f20415_uniq", + "channelmetadata_id", + "language_id", + unique=True, + ), + ) id = Column(Integer, primary_key=True) channelmetadata_id = Column( - ForeignKey("content_channelmetadata.id"), nullable=False + ForeignKey("content_channelmetadata.id"), nullable=False, index=True ) - language_id = Column(ForeignKey("content_language.id"), nullable=False) + language_id = Column(ForeignKey("content_language.id"), nullable=False, index=True) + sort_value = Column(Integer, nullable=False) channelmetadata = relationship("ContentChannelmetadata") language = relationship("ContentLanguage") diff --git a/kolibri/core/content/management/commands/generate_schema.py b/kolibri/core/content/management/commands/generate_schema.py index 9e437214f6c..c6f2abe9a3f 100644 --- a/kolibri/core/content/management/commands/generate_schema.py +++ b/kolibri/core/content/management/commands/generate_schema.py @@ -1,3 +1,4 @@ +import inspect import io import json import os @@ -10,6 +11,30 @@ from django.core.management import call_command from django.core.management.base import BaseCommand from django.db import connections + +# Compatibility layer for Python 3.12+ where ArgSpec is removed +if not hasattr(inspect, "ArgSpec"): + + class ArgSpec: + def __init__(self, args, varargs, keywords, defaults): + self.args = args + self.varargs = varargs + self.keywords = keywords + self.defaults = defaults + + def getargspec(func): + spec = inspect.getfullargspec(func) + return ArgSpec( + args=spec.args, + varargs=spec.varargs, + keywords=spec.varkw, + defaults=spec.defaults, + ) + + inspect.ArgSpec = ArgSpec + inspect.getargspec = getargspec + + from sqlacodegen.codegen import CodeGenerator from sqlalchemy import create_engine from sqlalchemy import MetaData diff --git a/kolibri/core/content/migrations/0039_channelmetadata_ordered_fields.py b/kolibri/core/content/migrations/0039_channelmetadata_ordered_fields.py new file mode 100644 index 00000000000..ad51ea2940c --- /dev/null +++ b/kolibri/core/content/migrations/0039_channelmetadata_ordered_fields.py @@ -0,0 +1,35 @@ +# Generated by Django 3.2.25 on 2024-12-13 17:17 +import sortedm2m.fields +import sortedm2m.operations +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ("content", "0038_alter_localfile_extension"), + ] + + operations = [ + migrations.AddField( + model_name="channelmetadata", + name="included_categories", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="channelmetadata", + name="included_grade_levels", + field=models.TextField(blank=True, null=True), + ), + sortedm2m.operations.AlterSortedManyToManyField( + model_name="channelmetadata", + name="included_languages", + field=sortedm2m.fields.SortedManyToManyField( + blank=True, + related_name="channels", + to="content.Language", + verbose_name="languages", + ), + ), + ] diff --git a/kolibri/core/content/models.py b/kolibri/core/content/models.py index 9cb15586d7d..78e461fb865 100644 --- a/kolibri/core/content/models.py +++ b/kolibri/core/content/models.py @@ -36,6 +36,7 @@ from morango.models.fields import UUIDField from mptt.managers import TreeManager from mptt.querysets import TreeQuerySet +from sortedm2m.fields import SortedManyToManyField from .utils import paths from kolibri.core.auth.models import Facility @@ -376,9 +377,14 @@ class ChannelMetadata(base_models.ChannelMetadata): # precalculated fields during annotation/migration published_size = models.BigIntegerField(default=0, null=True, blank=True) total_resource_count = models.IntegerField(default=0, null=True, blank=True) - included_languages = models.ManyToManyField( - "Language", related_name="channels", verbose_name="languages", blank=True + included_languages = SortedManyToManyField( + Language, + related_name="channels", + verbose_name="languages", + blank=True, ) + included_categories = models.TextField(null=True, blank=True) + included_grade_levels = models.TextField(null=True, blank=True) order = models.PositiveIntegerField(default=0, null=True, blank=True) public = models.BooleanField(null=True) # Has only a subset of this channel's metadata been imported? diff --git a/kolibri/core/content/test/test_annotation.py b/kolibri/core/content/test/test_annotation.py index be7af247a4a..2de250711eb 100644 --- a/kolibri/core/content/test/test_annotation.py +++ b/kolibri/core/content/test/test_annotation.py @@ -17,6 +17,8 @@ from kolibri.core.content.models import LocalFile from kolibri.core.content.test.test_channel_upgrade import ChannelBuilder from kolibri.core.content.utils.annotation import calculate_included_languages +from kolibri.core.content.utils.annotation import calculate_ordered_categories +from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels from kolibri.core.content.utils.annotation import calculate_published_size from kolibri.core.content.utils.annotation import calculate_total_resource_count from kolibri.core.content.utils.annotation import mark_local_files_as_available @@ -962,6 +964,119 @@ def test_calculate_included_languages(self): list(self.channel.included_languages.values_list("id", flat=True)), ["en"] ) + def test_calculate_ordered_categories(self): + # Test with no categories + calculate_ordered_categories(self.channel) + self.assertIsNone(self.channel.included_categories) + + # Create nodes with different categories + ContentNode.objects.filter(id=self.node.id).update(categories="math,science") + ContentNode.objects.create( + title="test2", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + categories="math,history", + available=True, + ) + node3 = ContentNode.objects.create( + title="test3", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + categories="math", + available=True, + ) + + # Test ordering by frequency + calculate_ordered_categories(self.channel) + self.assertEqual(self.channel.included_categories, "math,science,history") + + # Test with unavailable node + node3.available = False + node3.save() + calculate_ordered_categories(self.channel) + self.assertEqual(self.channel.included_categories, "math,science,history") + + def test_calculate_ordered_grade_levels(self): + # Test with no grade levels + calculate_ordered_grade_levels(self.channel) + self.assertIsNone(self.channel.included_grade_levels) + + # Create nodes with different grade levels + ContentNode.objects.filter(id=self.node.id).update(grade_levels="1,2") + ContentNode.objects.create( + title="test2", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + grade_levels="2,3", + available=True, + ) + node3 = ContentNode.objects.create( + title="test3", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + grade_levels="2", + available=True, + ) + + # Test ordering by frequency + calculate_ordered_grade_levels(self.channel) + self.assertEqual(self.channel.included_grade_levels, "2,1,3") + + # Test with unavailable node + node3.available = False + node3.save() + calculate_ordered_grade_levels(self.channel) + self.assertEqual(self.channel.included_grade_levels, "2,1,3") + + def test_calculate_included_languages_frequency(self): + # Create additional languages + Language.objects.create(id="es", lang_code="es") + Language.objects.create(id="fr", lang_code="fr") + + # Create nodes with different languages + self.node.lang_id = "en" + self.node.save() + ContentNode.objects.create( + title="test2", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + lang_id="es", + available=True, + ) + node3 = ContentNode.objects.create( + title="test3", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + lang_id="es", + available=True, + ) + ContentNode.objects.create( + title="test4", + id=uuid.uuid4().hex, + content_id=uuid.uuid4().hex, + channel_id=self.node.channel_id, + lang_id="fr", + available=True, + ) + + # Test ordering by frequency + calculate_included_languages(self.channel) + languages = set(self.channel.included_languages.values_list("id", flat=True)) + self.assertEqual(languages, {"en", "es", "fr"}) + + # Test with unavailable node + node3.available = False + node3.save() + calculate_included_languages(self.channel) + languages = set(self.channel.included_languages.values_list("id", flat=True)) + self.assertEqual(languages, {"en", "es", "fr"}) + def test_calculate_total_resources(self): local_file = LocalFile.objects.create( id=uuid.uuid4().hex, extension="mp4", available=True, file_size=10 diff --git a/kolibri/core/content/upgrade.py b/kolibri/core/content/upgrade.py index 6145a7e6d1e..592acc70362 100644 --- a/kolibri/core/content/upgrade.py +++ b/kolibri/core/content/upgrade.py @@ -20,6 +20,9 @@ from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode from kolibri.core.content.tasks import enqueue_automatic_resource_import_if_needed +from kolibri.core.content.utils.annotation import calculate_included_languages +from kolibri.core.content.utils.annotation import calculate_ordered_categories +from kolibri.core.content.utils.annotation import calculate_ordered_grade_levels from kolibri.core.content.utils.annotation import set_channel_ancestors from kolibri.core.content.utils.annotation import set_content_visibility_from_disk from kolibri.core.content.utils.channel_import import FutureSchemaError @@ -343,3 +346,15 @@ def synchronize_content_requests_upgrade(): synchronize_content_requests(dataset_id, transfer_session=None) enqueue_automatic_resource_import_if_needed() + + +@version_upgrade(old_version="<0.18.0") +def ordered_metadata_in_channels(): + """ + Update the channel metadata to have grade_levels, categories, + and included languages ordered by occurrence in the channel resources + """ + for channel in ChannelMetadata.objects.all(): + calculate_ordered_categories(channel) + calculate_ordered_grade_levels(channel) + calculate_included_languages(channel) diff --git a/kolibri/core/content/utils/annotation.py b/kolibri/core/content/utils/annotation.py index fe9d4f51658..a9cba7f146e 100644 --- a/kolibri/core/content/utils/annotation.py +++ b/kolibri/core/content/utils/annotation.py @@ -4,6 +4,7 @@ from itertools import groupby from math import ceil +from django.db.models import Count from django.db.models import Max from django.db.models import Sum from le_utils.constants import content_kinds @@ -773,6 +774,8 @@ def set_channel_metadata_fields(channel_id, public=None): calculate_published_size(channel) calculate_total_resource_count(channel) calculate_included_languages(channel) + calculate_ordered_categories(channel) + calculate_ordered_grade_levels(channel) calculate_next_order(channel) if public is not None: @@ -810,11 +813,50 @@ def calculate_total_resource_count(channel): channel.save() +def _calculate_ordered_field_values(channel, field_name): + content_nodes = ContentNode.objects.filter( + channel_id=channel.id, available=True + ).exclude(**{field_name: None}) + all_values = [] + for node in content_nodes.values_list(field_name, flat=True): + if node: # just in case some field is an empty string + all_values.extend(node.split(",")) + + value_counts = {} + for value in all_values: + value_counts[value] = value_counts.get(value, 0) + 1 + + return sorted(value_counts.keys(), key=lambda x: value_counts[x], reverse=True) + + +def calculate_ordered_categories(channel): + ordered_categories = _calculate_ordered_field_values(channel, "categories") + channel.included_categories = ( + ",".join(ordered_categories) if ordered_categories else None + ) + channel.save() + + +def calculate_ordered_grade_levels(channel): + ordered_grade_levels = _calculate_ordered_field_values(channel, "grade_levels") + channel.included_grade_levels = ( + ",".join(ordered_grade_levels) if ordered_grade_levels else None + ) + channel.save() + + def calculate_included_languages(channel): content_nodes = ContentNode.objects.filter( channel_id=channel.id, available=True ).exclude(lang=None) - languages = content_nodes.order_by("lang").values_list("lang", flat=True).distinct() + languages = ( + content_nodes.values("lang") + .annotate(count=Count("lang")) + .order_by("-count") + .values_list("lang", flat=True) + .distinct() + ) + channel.included_languages.clear() channel.included_languages.add(*list(languages)) diff --git a/requirements/base.txt b/requirements/base.txt index 0f4c2e21395..d23ca4a6452 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -29,3 +29,4 @@ ifaddr==0.1.7 # Pin as version 0.2.0 only supports Python 3.7 and above importlib-metadata==4.8.3 importlib_resources==5.4.0 json-schema-validator==2.4.1 +django-sortedm2m==3.1.1