diff --git a/Makefile b/Makefile index 619fcee41e..051053bab3 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ migrate: # 4) Remove the management command from this `deploy-migrate` recipe # 5) Repeat! deploy-migrate: - python contentcuration/manage.py rectify_incorrect_contentnode_source_fields + echo "Nothing to do here!" contentnodegc: python contentcuration/manage.py garbage_collect diff --git a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py b/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py deleted file mode 100644 index a563fb712a..0000000000 --- a/contentcuration/contentcuration/tests/test_rectify_source_field_migraiton_command.py +++ /dev/null @@ -1,182 +0,0 @@ -# DELETE THIS FILE AFTER RUNNING THE MIGRATIONSSS -import datetime -import uuid - -from django.core.management import call_command -from django.utils import timezone -from le_utils.constants import content_kinds - -from contentcuration.models import Channel -from contentcuration.models import ContentNode -from contentcuration.models import License -from contentcuration.tests import testdata -from contentcuration.tests.base import StudioAPITestCase -from contentcuration.utils.publish import publish_channel - - -class TestRectifyMigrationCommand(StudioAPITestCase): - - @classmethod - def setUpClass(cls): - super(TestRectifyMigrationCommand, cls).setUpClass() - - def tearDown(self): - super(TestRectifyMigrationCommand, self).tearDown() - - def setUp(self): - super(TestRectifyMigrationCommand, self).setUp() - self.original_channel = testdata.channel() - self.license_original = License.objects.all()[0] - self.original_contentnode = ContentNode.objects.create( - id=uuid.uuid4().hex, - title="Original Node", - parent=self.original_channel.main_tree, - license=self.license_original, - original_channel_id=None, - source_channel_id=None, - author="old author" - ) - self.user = testdata.user() - self.original_channel.editors.add(self.user) - self.client.force_authenticate(user=self.user) - - def create_base_channel_and_contentnode(self, source_contentnode, source_channel): - base_channel = testdata.channel() - base_channel.public = True - base_channel.save() - license_changed = License.objects.all()[2] - base_node = ContentNode.objects.create( - id=uuid.uuid4().hex, - title="base contentnode", - parent=base_channel.main_tree, - kind_id=content_kinds.VIDEO, - original_channel_id=self.original_channel.id, - original_source_node_id=self.original_contentnode.node_id, - source_channel_id=source_channel.id, - source_node_id=source_contentnode.node_id, - author="source author", - license=license_changed, - ) - return base_node, base_channel - - def create_source_channel_and_contentnode(self): - source_channel = testdata.channel() - source_channel.public = True - source_channel.save() - license_changed = License.objects.all()[1] - source_node = ContentNode.objects.create( - id=uuid.uuid4().hex, - title="base contentnode", - parent=source_channel.main_tree, - kind_id=content_kinds.VIDEO, - license=license_changed, - original_channel_id=self.original_channel.id, - source_channel_id=self.original_channel.id, - source_node_id=self.original_contentnode.node_id, - original_source_node_id=self.original_contentnode.node_id, - author="source author", - ) - - return source_node, source_channel - - def run_migrations(self): - call_command('rectify_incorrect_contentnode_source_fields', user_id=self.user.id, is_test=True) - - def test_two_node_case(self): - base_node, base_channel = self.create_base_channel_and_contentnode(self.original_contentnode, self.original_channel) - - publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) - - # main_tree node still has changed=true even after the publish - for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): - print(node.id) - print(node.id == base_channel.main_tree.id) - print("checking if the node is complete or not ", node.complete) - node.changed = False - # This should probably again change the changed=true but suprisingly it doesnot - # Meaning the changed boolean doesnot change for the main_tree no matter what we do - # through ContentNode model methods like save. - node.save() - - ContentNode.objects.filter(pk=base_node.pk).update( - modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) - ) - - self.run_migrations() - updated_base_node = ContentNode.objects.get(pk=base_node.pk) - self.assertEqual(updated_base_node.license, self.original_contentnode.license) - self.assertEqual(updated_base_node.author, self.original_contentnode.author) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) - - def test_three_node_case_implicit(self): - source_node, source_channel = self.create_source_channel_and_contentnode() - base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) - source_node.aggregator = "Nami" - source_node.save() - # Implicit case - base_node.author = source_node.author - base_node.license = source_node.license - base_node.aggregator = source_node.aggregator - base_node.save() - - publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) - - for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): - print(node.id) - print(node.id == base_channel.main_tree.id) - print("checking if the node is complete or not ", node.complete) - node.changed = False - node.save() - - ContentNode.objects.filter(pk=base_node.pk).update( - modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) - ) - - ContentNode.objects.filter(pk=source_node.pk).update( - modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) - ) - - self.run_migrations() - updated_base_node = ContentNode.objects.get(pk=base_node.pk) - updated_source_node = ContentNode.objects.get(pk=source_node.pk) - self.assertEqual(updated_base_node.license, self.original_contentnode.license) - self.assertEqual(updated_base_node.author, self.original_contentnode.author) - self.assertEqual(updated_source_node.license, self.original_contentnode.license) - self.assertEqual(updated_source_node.author, self.original_contentnode.author) - self.assertEqual(updated_source_node.aggregator, self.original_contentnode.aggregator) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) - - def test_three_node_case_explicit(self): - source_node, source_channel = self.create_source_channel_and_contentnode() - base_node, base_channel = self.create_base_channel_and_contentnode(source_node, source_channel) - source_node.provider = "luffy" - base_node.provider = "zoro" - base_node.save() - source_node.save() - publish_channel(self.user.id, Channel.objects.get(pk=base_channel.pk).id) - - for node in Channel.objects.get(pk=base_channel.pk).main_tree.get_family().filter(changed=True): - print(node.id) - print(node.id == base_channel.main_tree.id) - print("checking if the node is complete or not ", node.complete) - node.changed = False - node.save() - - ContentNode.objects.filter(pk=base_node.pk).update( - modified=datetime.datetime(2023, 7, 5, tzinfo=timezone.utc) - ) - - ContentNode.objects.filter(pk=source_node.pk).update( - modified=datetime.datetime(2023, 3, 5, tzinfo=timezone.utc) - ) - - self.run_migrations() - updated_base_node = ContentNode.objects.get(pk=base_node.pk) - updated_source_node = ContentNode.objects.get(pk=source_node.pk) - self.assertEqual(updated_base_node.license, self.original_contentnode.license) - self.assertEqual(updated_base_node.author, self.original_contentnode.author) - self.assertEqual(updated_base_node.provider, self.original_contentnode.provider) - self.assertEqual(updated_source_node.license, self.original_contentnode.license) - self.assertEqual(updated_source_node.author, self.original_contentnode.author) - self.assertEqual(updated_source_node.provider, self.original_contentnode.provider) - self.assertEqual(Channel.objects.get(pk=base_channel.id).main_tree.get_family().filter(changed=True).exists(), False) diff --git a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py b/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py deleted file mode 100644 index 5c9ab9c3aa..0000000000 --- a/contentcuration/kolibri_public/management/commands/rectify_incorrect_contentnode_source_fields.py +++ /dev/null @@ -1,160 +0,0 @@ -import datetime -import logging - -from django.core.management.base import BaseCommand -from django.db.models import Exists -from django.db.models import F -from django.db.models import OuterRef -from django.db.models import Q -from django.db.models import Value -from django.db.models.functions import Coalesce -from django.utils import timezone -from django_cte import With - -from contentcuration.models import Channel -from contentcuration.models import ContentNode -from contentcuration.models import User -from contentcuration.utils.publish import publish_channel - -logger = logging.getLogger(__file__) - - -class Command(BaseCommand): - - def add_arguments(self, parser): - - parser.add_argument( - '--is_test', - action='store_true', - help="Indicate if the command is running in a test environment.", - ) - - parser.add_argument( - '--user_id', - type=int, - help="User ID for the operation", - ) - - def handle(self, *args, **options): - # Filter Date : July 9, 2023 - # Link https://github.com/learningequality/studio/pull/4189 - # The PR date for the frontend change is July 10, 2023 - # we would set the filter day one day back just to be sure - - is_test = options['is_test'] - user_id = options['user_id'] - - if not is_test: - user_id = User.objects.get(email='channeladmin@learningequality.org').pk - - filter_date = datetime.datetime(2023, 7, 9, tzinfo=timezone.utc) - main_trees_cte = With( - ( - Channel.objects.filter( - main_tree__isnull=False - ) - .annotate(channel_id=F("id")) - .values("channel_id", "deleted", tree_id=F("main_tree__tree_id")) - ), - name="main_trees", - ) - - nodes = main_trees_cte.join( - ContentNode.objects.all(), - tree_id=main_trees_cte.col.tree_id, - ).annotate(channel_id=main_trees_cte.col.channel_id, deleted=main_trees_cte.col.deleted) - - original_source_nodes = ( - nodes.with_cte(main_trees_cte) - .filter( - node_id=OuterRef("original_source_node_id"), - ) - .exclude( - tree_id=OuterRef("tree_id"), - ) - .annotate( - coalesced_provider=Coalesce("provider", Value("")), - coalesced_author=Coalesce("author", Value("")), - coalesced_aggregator=Coalesce("aggregator", Value("")), - coalesced_license_id=Coalesce("license_id", -1), - ) - ) - # We filter out according to last_modified date before this PR - # https://github.com/learningequality/studio/pull/4189 - # As we want to lossen up the public=True Filter and open the - # migration for all the nodes even if they are not published - diff = ( - nodes.with_cte(main_trees_cte).filter( - deleted=False, # we dont want the channel to be deleted or else we are fixing ghost nodes - source_node_id__isnull=False, - original_source_node_id__isnull=False, - modified__lt=filter_date - ) - ).annotate( - coalesced_provider=Coalesce("provider", Value("")), - coalesced_author=Coalesce("author", Value("")), - coalesced_aggregator=Coalesce("aggregator", Value("")), - coalesced_license_id=Coalesce("license_id", -1), - ) - diff_combined = diff.annotate( - original_source_node_f_changed=Exists( - original_source_nodes.filter( - ~Q(coalesced_provider=OuterRef("coalesced_provider")) - | ~Q(coalesced_author=OuterRef("coalesced_author")) - | ~Q(coalesced_aggregator=OuterRef("coalesced_aggregator")) - | ~Q(coalesced_license_id=OuterRef("coalesced_license_id")) - ) - ) - ).filter(original_source_node_f_changed=True) - - final_nodes = diff_combined.values( - "id", - "channel_id", - "original_channel_id", - "original_source_node_id", - "coalesced_provider", - "coalesced_author", - "coalesced_aggregator", - "coalesced_license_id", - "original_source_node_f_changed", - ).order_by() - - channel_ids_to_republish = set() - - for item in final_nodes: - base_node = ContentNode.objects.get(pk=item["id"]) - - original_source_channel_id = item["original_channel_id"] - original_source_node_id = item["original_source_node_id"] - tree_id = ( - Channel.objects.filter(pk=original_source_channel_id) - .values_list("main_tree__tree_id", flat=True) - .get() - ) - original_source_node = ContentNode.objects.filter( - tree_id=tree_id, node_id=original_source_node_id - ) - - base_channel = Channel.objects.get(pk=item['channel_id']) - - to_be_republished = not (base_channel.main_tree.get_family().filter(changed=True).exists()) - - if original_source_channel_id is not None and original_source_node.exists(): - # original source node exists and its source fields dont match - # update the base node - if base_node.author != original_source_node[0].author: - base_node.author = original_source_node[0].author - if base_node.provider != original_source_node[0].provider: - base_node.provider = original_source_node[0].provider - if base_node.aggregator != original_source_node[0].aggregator: - base_node.aggregator = original_source_node[0].aggregator - if base_node.license != original_source_node[0].license: - base_node.license = original_source_node[0].license - base_node.save() - - if to_be_republished and base_channel.last_published is not None: - channel_ids_to_republish.add(base_channel.id) - - # we would repbulish the channel - for channel_id in channel_ids_to_republish: - publish_channel(user_id, channel_id)