From aa92a57ecb6bec5956380b920bf60d4f667cba48 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 10 Oct 2022 19:22:51 +0530 Subject: [PATCH 1/6] Soft delete users --- contentcuration/contentcuration/forms.py | 3 +- .../migrations/0141_user_deleted.py | 18 ++++++++ contentcuration/contentcuration/models.py | 42 +++++++++---------- .../contentcuration/tests/test_models.py | 27 ++++++++++++ .../contentcuration/tests/test_settings.py | 29 ++++++++----- .../contentcuration/tests/views/test_users.py | 14 +++++++ .../contentcuration/views/settings.py | 2 + 7 files changed, 101 insertions(+), 34 deletions(-) create mode 100644 contentcuration/contentcuration/migrations/0141_user_deleted.py diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index 973916431e..d9dc781f61 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -7,6 +7,7 @@ from django.contrib.auth.forms import UserChangeForm from django.contrib.auth.forms import UserCreationForm from django.core import signing +from django.db.models import Q from django.template.loader import render_to_string from contentcuration.models import User @@ -45,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(email__iexact=email, is_active=True).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/migrations/0141_user_deleted.py b/contentcuration/contentcuration/migrations/0141_user_deleted.py new file mode 100644 index 0000000000..25444e6577 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0141_user_deleted.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.14 on 2022-10-06 11:18 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0140_delete_task'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='deleted', + field=models.BooleanField(db_index=True, default=False), + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 0e26e36c86..c2a23d1554 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -200,6 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) + deleted = models.BooleanField(default=False, db_index=True) _field_updates = FieldTracker(fields=[ # Field to watch for changes @@ -214,27 +215,21 @@ def __unicode__(self): return self.email def delete(self): - from contentcuration.viewsets.common import SQCount - # Remove any invitations associated to this account - self.sent_to.all().delete() - - # Delete channels associated with this user (if user is the only editor) - user_query = ( - User.objects.filter(editable_channels__id=OuterRef('id')) - .values_list('id', flat=True) - .distinct() - ) - self.editable_channels.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() - - # Delete channel collections associated with this user (if user is the only editor) - user_query = ( - User.objects.filter(channel_sets__id=OuterRef('id')) - .values_list('id', flat=True) - .distinct() - ) - self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + """ + Soft deletes the user. + """ + self.deleted = True + # Deactivate the user to disallow authentication and also + # to let the user verify the email again after recovery. + self.is_active = False + self.save() - super(User, self).delete() + def recover(self): + """ + Use this method when we want to recover a user. + """ + self.deleted = False + self.save() def can_edit(self, channel_id): return Channel.filter_edit_queryset(Channel.objects.all(), self).filter(pk=channel_id).exists() @@ -406,17 +401,20 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, **filters): + def get_for_email(cls, email, deleted=False, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned + Filters out deleted User records by default. Can be overridden with + deleted argument. + :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), **filters)\ + return User.objects.filter(email__iexact=email.strip(), deleted=deleted, **filters)\ .order_by("-is_active", "-id").first() diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 3c0caa9967..66720a0985 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -787,6 +787,7 @@ def test_get_for_email(self): user1 = self._create_user("tester@tester.com", is_active=False) user2 = self._create_user("tester@Tester.com", is_active=False) user3 = self._create_user("Tester@Tester.com", is_active=True) + user4 = self._create_user("testing@test.com", is_active=True) # active should be returned first self.assertEqual(user3, User.get_for_email("tester@tester.com")) @@ -801,6 +802,32 @@ def test_get_for_email(self): # ensure nothing found doesn't error self.assertIsNone(User.get_for_email("tester@tester.com")) + # ensure we don't return soft-deleted users + user4.delete() + self.assertIsNone(User.get_for_email("testing@test.com")) + + def test_delete__sets_deleted_true(self): + user = self._create_user("tester@tester.com") + user.delete() + self.assertEqual(user.deleted, True) + + def test_delete__sets_is_active_false(self): + user = self._create_user("tester@tester.com") + user.delete() + self.assertEqual(user.is_active, False) + + def test_recover__sets_deleted_false(self): + user = self._create_user("tester@tester.com") + user.delete() + user.recover() + self.assertEqual(user.deleted, False) + + def test_recover__keeps_is_active_false(self): + user = self._create_user("tester@tester.com") + user.delete() + user.recover() + self.assertEqual(user.is_active, False) + class ChannelHistoryTestCase(StudioTestCase): def setUp(self): diff --git a/contentcuration/contentcuration/tests/test_settings.py b/contentcuration/contentcuration/tests/test_settings.py index c216f088a2..25bbc71a54 100644 --- a/contentcuration/contentcuration/tests/test_settings.py +++ b/contentcuration/contentcuration/tests/test_settings.py @@ -1,5 +1,6 @@ import json +import mock from django.urls import reverse_lazy from .base import BaseAPITestCase @@ -10,7 +11,7 @@ class SettingsTestCase(BaseAPITestCase): def test_username_change(self): - data = json.dumps({"first_name": "New firstname", "last_name": "New lastname",}) + data = json.dumps({"first_name": "New firstname", "last_name": "New lastname", }) request = self.create_post_request( reverse_lazy("update_user_full_name"), data=data, @@ -40,13 +41,19 @@ def test_delete_account_invalid(self): self.assertTrue(User.objects.filter(email=self.user.email).exists()) def test_delete_account(self): - # TODO: send_email causes connection errors - data = json.dumps({"email": self.user.email}) - self.create_post_request( - reverse_lazy("delete_user_account"), - data=data, - content_type="application/json", - ) - # response = DeleteAccountView.as_view()(request) - # self.assertEqual(response.status_code, 200) - # self.assertFalse(User.objects.filter(email=self.user.email).exists()) + with mock.patch("contentcuration.views.users.djangologout") as djangologout: + self.user.delete = mock.Mock() + data = json.dumps({"email": self.user.email}) + request = self.create_post_request( + reverse_lazy("delete_user_account"), + data=data, + content_type="application/json", + ) + response = DeleteAccountView.as_view()(request) + + # Ensure successful response. + self.assertEqual(response.status_code, 200) + # Ensure user's delete method is called. + self.user.delete.assert_called_once() + # Ensure we logout the user. + djangologout.assert_called_once_with(request) diff --git a/contentcuration/contentcuration/tests/views/test_users.py b/contentcuration/contentcuration/tests/views/test_users.py index 0c03d5f626..e79e0cdbf9 100644 --- a/contentcuration/contentcuration/tests/views/test_users.py +++ b/contentcuration/contentcuration/tests/views/test_users.py @@ -98,6 +98,14 @@ def test_login__whitespace(self): self.assertIsInstance(redirect, HttpResponseRedirectBase) self.assertIn("channels", redirect['Location']) + def test_after_delete__no_login(self): + with mock.patch("contentcuration.views.users.djangologin") as djangologin: + self.user.delete() + response = login(self.request) + + self.assertIsInstance(response, HttpResponseForbidden) + djangologin.assert_not_called() + class UserRegistrationViewTestCase(StudioAPITestCase): def setUp(self): @@ -121,6 +129,12 @@ def test_post__no_duplicate_registration(self): response = self.view.post(self.request) self.assertIsInstance(response, HttpResponseForbidden) + def test_after_delete__no_registration(self): + user = testdata.user(email="tester@tester.com") + user.delete() + response = self.view.post(self.request) + self.assertIsInstance(response, HttpResponseForbidden) + class UserActivationViewTestCase(StudioAPITestCase): def setUp(self): diff --git a/contentcuration/contentcuration/views/settings.py b/contentcuration/contentcuration/views/settings.py index f41b6ac926..a186665dec 100644 --- a/contentcuration/contentcuration/views/settings.py +++ b/contentcuration/contentcuration/views/settings.py @@ -34,6 +34,7 @@ from contentcuration.utils.csv_writer import generate_user_csv_filename from contentcuration.utils.messages import get_messages from contentcuration.views.base import current_user_for_context +from contentcuration.views.users import logout from contentcuration.viewsets.channel import SettingsChannelSerializer ISSUE_UPDATE_DATE = datetime(2018, 10, 29) @@ -165,6 +166,7 @@ def form_valid(self, form): os.unlink(csv_path) self.request.user.delete() + logout(self.request) class StorageSettingsView(PostFormMixin, FormView): From ef2799114b0610b6b2400dd59259641ad612d4b6 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 10 Oct 2022 20:27:29 +0530 Subject: [PATCH 2/6] fixes a regression where its expected that user-only-editor channels are hard deleted --- contentcuration/contentcuration/models.py | 26 ++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index c2a23d1554..1b1963e9b4 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -216,12 +216,36 @@ def __unicode__(self): def delete(self): """ - Soft deletes the user. + Hard deletes invitation associated to this account, hard deletes channel and channet sets + only if user is the only editor. Then soft deletes the user account. """ + from contentcuration.viewsets.common import SQCount + + # Hard delete invitations associated to this account. + self.sent_to.all().delete() + + # Hard delete channels associated with this user (if user is the only editor). + user_query = ( + User.objects.filter(editable_channels__id=OuterRef('id')) + .values_list('id', flat=True) + .distinct() + ) + self.editable_channels.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + + # Hard delete channel collections associated with this user (if user is the only editor). + user_query = ( + User.objects.filter(channel_sets__id=OuterRef('id')) + .values_list('id', flat=True) + .distinct() + ) + self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + + # Soft delete user. self.deleted = True # Deactivate the user to disallow authentication and also # to let the user verify the email again after recovery. self.is_active = False + self.save() def recover(self): From 2a42f1895cbc1ec4cf3c7ffaeaf0b1d093cb7ffc Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Wed, 19 Oct 2022 16:29:21 +0530 Subject: [PATCH 3/6] User soft delete via garbage collection --- .../contentcuration/constants/user_history.py | 11 +++ contentcuration/contentcuration/forms.py | 2 +- .../management/commands/garbage_collect.py | 13 ++- .../migrations/0141_user_deleted.py | 18 ---- .../migrations/0141_user_soft_delete.py | 31 +++++++ contentcuration/contentcuration/models.py | 83 +++++++++++++------ contentcuration/contentcuration/settings.py | 6 +- .../contentcuration/tests/test_models.py | 28 ++++--- .../contentcuration/utils/garbage_collect.py | 73 +++++++++------- 9 files changed, 177 insertions(+), 88 deletions(-) create mode 100644 contentcuration/contentcuration/constants/user_history.py delete mode 100644 contentcuration/contentcuration/migrations/0141_user_deleted.py create mode 100644 contentcuration/contentcuration/migrations/0141_user_soft_delete.py diff --git a/contentcuration/contentcuration/constants/user_history.py b/contentcuration/contentcuration/constants/user_history.py new file mode 100644 index 0000000000..1eecf79c17 --- /dev/null +++ b/contentcuration/contentcuration/constants/user_history.py @@ -0,0 +1,11 @@ +from django.utils.translation import ugettext_lazy as _ + +DELETION = "deletion" +RECOVERY = "recovery" +RELATED_DATA_HARD_DELETION = "related-data-hard-deletion" + +choices = ( + (DELETION, _("User soft deletion")), + (RECOVERY, _("User soft deletion recovery")), + (RELATED_DATA_HARD_DELETION, _("User related data hard deletion")), +) diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index d9dc781f61..d0ae49893f 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -46,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted_at__isnull=False), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/management/commands/garbage_collect.py b/contentcuration/contentcuration/management/commands/garbage_collect.py index f31db7ad5c..f22f70dd4b 100644 --- a/contentcuration/contentcuration/management/commands/garbage_collect.py +++ b/contentcuration/contentcuration/management/commands/garbage_collect.py @@ -11,6 +11,7 @@ from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks @@ -26,15 +27,23 @@ def handle(self, *args, **options): Actual logic for garbage collection. """ - # clean up contentnodes, files and file objects on storage that are associated - # with the orphan tree + # Clean up users that are soft deleted and are older than ACCOUNT_DELETION_BUFFER (90 days). + # Also clean contentnodes, files and file objects on storage that are associated + # with the orphan tree. + logging.info("Cleaning up soft deleted users older than ACCOUNT_DELETION_BUFFER (90 days)") + clean_up_soft_deleted_users() + logging.info("Cleaning up contentnodes from the orphan tree") clean_up_contentnodes() + logging.info("Cleaning up deleted chef nodes") clean_up_deleted_chefs() + logging.info("Cleaning up feature flags") clean_up_feature_flags() + logging.info("Cleaning up stale file objects") clean_up_stale_files() + logging.info("Cleaning up tasks") clean_up_tasks() diff --git a/contentcuration/contentcuration/migrations/0141_user_deleted.py b/contentcuration/contentcuration/migrations/0141_user_deleted.py deleted file mode 100644 index 25444e6577..0000000000 --- a/contentcuration/contentcuration/migrations/0141_user_deleted.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 3.2.14 on 2022-10-06 11:18 -from django.db import migrations -from django.db import models - - -class Migration(migrations.Migration): - - dependencies = [ - ('contentcuration', '0140_delete_task'), - ] - - operations = [ - migrations.AddField( - model_name='user', - name='deleted', - field=models.BooleanField(db_index=True, default=False), - ), - ] diff --git a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py b/contentcuration/contentcuration/migrations/0141_user_soft_delete.py new file mode 100644 index 0000000000..1f58e4076a --- /dev/null +++ b/contentcuration/contentcuration/migrations/0141_user_soft_delete.py @@ -0,0 +1,31 @@ +# Generated by Django 3.2.14 on 2022-10-19 09:41 +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0140_delete_task'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='deleted_at', + field=models.DateTimeField(blank=True, null=True), + ), + migrations.CreateModel( + name='UserHistory', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('action', models.CharField(choices=[('deletion', 'User soft deletion'), ('recovery', 'User soft deletion recovery'), + ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), + ('performed_at', models.DateTimeField(default=django.utils.timezone.now)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='history', to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 079833c278..537f867978 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -66,6 +66,7 @@ from contentcuration.constants import channel_history from contentcuration.constants import completion_criteria +from contentcuration.constants import user_history from contentcuration.constants.contentnode import kind_activity_map from contentcuration.db.models.expressions import Array from contentcuration.db.models.functions import ArrayRemove @@ -199,7 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) - deleted = models.BooleanField(default=False, db_index=True) + deleted_at = models.DateTimeField(null=True, blank=True) _field_updates = FieldTracker(fields=[ # Field to watch for changes @@ -215,44 +216,66 @@ def __unicode__(self): def delete(self): """ - Hard deletes invitation associated to this account, hard deletes channel and channet sets - only if user is the only editor. Then soft deletes the user account. + Soft deletes the user account. + """ + self.deleted_at = timezone.now() + # Deactivate the user to disallow authentication and also + # to let the user verify the email again after recovery. + self.is_active = False + self.save() + self.history.create(user_id=self.pk, action=user_history.DELETION) + + def recover(self): + """ + Use this method when we want to recover a user. + """ + self.deleted_at = None + self.save() + self.history.create(user_id=self.pk, action=user_history.RECOVERY) + + def hard_delete_user_related_data(self): + """ + Hard delete all user related data. But keeps the user record itself intact. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. """ from contentcuration.viewsets.common import SQCount # Hard delete invitations associated to this account. self.sent_to.all().delete() + self.sent_by.all().delete() - # Hard delete channels associated with this user (if user is the only editor). - user_query = ( + editable_channels_user_query = ( User.objects.filter(editable_channels__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.editable_channels.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + non_public_channels_sole_editor = self.editable_channels.annotate(num_editors=SQCount( + editable_channels_user_query, field="id")).filter(num_editors=1, public=False) + + # Point sole editor non-public channels' contentnodes to orphan tree to let + # our garbage collection delete the nodes and underlying files. + ContentNode._annotate_channel_id(ContentNode.objects).filter(channel_id__in=list( + non_public_channels_sole_editor)).update(parent_id=settings.ORPHANAGE_ROOT_ID) - # Hard delete channel collections associated with this user (if user is the only editor). + # Hard delete non-public channels associated with this user (if user is the only editor). + non_public_channels_sole_editor.delete() + + # Hard delete non-public channel collections associated with this user (if user is the only editor). user_query = ( User.objects.filter(channel_sets__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1, public=False).delete() - # Soft delete user. - self.deleted = True - # Deactivate the user to disallow authentication and also - # to let the user verify the email again after recovery. - self.is_active = False - - self.save() - - def recover(self): - """ - Use this method when we want to recover a user. - """ - self.deleted = False - self.save() + # Create history! + self.history.create(user_id=self.pk, action=user_history.RELATED_DATA_HARD_DELETION) def can_edit(self, channel_id): return Channel.filter_edit_queryset(Channel.objects.all(), self).filter(pk=channel_id).exists() @@ -424,20 +447,20 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, deleted=False, **filters): + def get_for_email(cls, email, deleted_at__isnull=True, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned Filters out deleted User records by default. Can be overridden with - deleted argument. + deleted_at__isnull argument. :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), deleted=deleted, **filters)\ + return User.objects.filter(email__iexact=email.strip(), deleted_at__isnull=deleted_at__isnull, **filters)\ .order_by("-is_active", "-id").first() @@ -1060,6 +1083,16 @@ class Meta: ] +class UserHistory(models.Model): + """ + Model that stores the user's action history. + """ + user = models.ForeignKey(settings.AUTH_USER_MODEL, null=False, blank=False, related_name="history", on_delete=models.CASCADE) + action = models.CharField(max_length=32, choices=user_history.choices) + + performed_at = models.DateTimeField(default=timezone.now) + + class ChannelSet(models.Model): # NOTE: this is referred to as "channel collections" on the front-end, but we need to call it # something else as there is already a ChannelCollection model on the front-end diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index b9c3a73ca1..de05fb484a 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -329,8 +329,10 @@ def gettext(s): HELP_EMAIL = 'content@learningequality.org' DEFAULT_FROM_EMAIL = 'Kolibri Studio ' POLICY_EMAIL = 'legal@learningequality.org' -ACCOUNT_DELETION_BUFFER = 5 # Used to determine how many days a user -# has to undo accidentally deleting account + +# Used to determine how many days a user +# has to undo accidentally deleting account. +ACCOUNT_DELETION_BUFFER = 90 DEFAULT_LICENSE = 1 diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 66720a0985..e6fbe38a35 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -1,3 +1,4 @@ +import datetime import uuid import mock @@ -11,6 +12,7 @@ from le_utils.constants import format_presets from contentcuration.constants import channel_history +from contentcuration.constants import user_history from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ChannelHistory @@ -22,6 +24,7 @@ from contentcuration.models import Invitation from contentcuration.models import object_storage_name from contentcuration.models import User +from contentcuration.models import UserHistory from contentcuration.tests import testdata from contentcuration.tests.base import StudioTestCase @@ -806,27 +809,30 @@ def test_get_for_email(self): user4.delete() self.assertIsNone(User.get_for_email("testing@test.com")) - def test_delete__sets_deleted_true(self): + def test_delete(self): user = self._create_user("tester@tester.com") user.delete() - self.assertEqual(user.deleted, True) - def test_delete__sets_is_active_false(self): - user = self._create_user("tester@tester.com") - user.delete() + # Sets deleted_at? + self.assertIsInstance(user.deleted_at, datetime.datetime) + # Sets is_active to False? self.assertEqual(user.is_active, False) + # Creates user history? + user_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).first() + self.assertIsNotNone(user_delete_history) - def test_recover__sets_deleted_false(self): + def test_recover(self): user = self._create_user("tester@tester.com") user.delete() user.recover() - self.assertEqual(user.deleted, False) - def test_recover__keeps_is_active_false(self): - user = self._create_user("tester@tester.com") - user.delete() - user.recover() + # Sets deleted_at to None? + self.assertEqual(user.deleted_at, None) + # Keeps is_active to False? self.assertEqual(user.is_active, False) + # Creates user history? + user_recover_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RECOVERY).first() + self.assertIsNotNone(user_recover_history) class ChannelHistoryTestCase(StudioTestCase): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index 3343013b7c..de5a29921f 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -7,7 +7,9 @@ from celery import states from django.conf import settings +from django.core.files.storage import default_storage from django.db.models.expressions import CombinedExpression +from django.db.models.expressions import Exists from django.db.models.expressions import F from django.db.models.expressions import Value from django.db.models.signals import post_delete @@ -37,18 +39,57 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.receivers = None -def get_deleted_chefs_root(): +def _get_deleted_chefs_root(): deleted_chefs_node, _new = ContentNode.objects.get_or_create(pk=settings.DELETED_CHEFS_ROOT_ID, kind_id=content_kinds.TOPIC) return deleted_chefs_node +def _clean_up_files(contentnode_ids): + """ + Clean up the files (both in the DB and in object storage) + associated with the `contentnode_ids` iterable that are + not pointed by any other contentnode. + """ + files = File.objects.filter(contentnode__in=contentnode_ids) + files_on_storage = files.values_list("file_on_disk", flat=True) + + for disk_file_path in files_on_storage: + is_other_node_pointing = Exists(File.objects.filter(file_on_disk=disk_file_path).exclude(contentnode__in=contentnode_ids)) + if not is_other_node_pointing: + default_storage.delete(disk_file_path) + + # use _raw_delete for much fast file deletions + files._raw_delete(files.db) + + +def clean_up_soft_deleted_users(): + """ + Hard deletes user related data for soft deleted users that are older than ACCOUNT_DELETION_BUFFER. + + Note: User record itself is not hard deleted. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. + """ + account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) + users_to_delete = User.objects.filter(deleted_at__lt=account_deletion_buffer_delta) + + for user in users_to_delete: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}".format(user.email)) + + def clean_up_deleted_chefs(): """ Clean up all deleted chefs attached to the deleted chefs tree, including all child nodes in that tree. """ - deleted_chefs_node = get_deleted_chefs_root() + deleted_chefs_node = _get_deleted_chefs_root() # we cannot use MPTT methods like get_descendants() or use tree_id because for performance reasons # we are avoiding MPTT entirely. nodes_to_clean_up = ContentNode.objects.filter(parent=deleted_chefs_node) @@ -81,7 +122,7 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH # delete all files first with DisablePostDeleteSignal(): - clean_up_files(nodes_to_clean_up) + _clean_up_files(nodes_to_clean_up) # Use _raw_delete for fast bulk deletions try: @@ -92,32 +133,6 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH pass -def clean_up_files(contentnode_ids): - """ - Clean up the files (both in the DB and in object storage) - associated with the contentnode_ids given in the `contentnode_ids` - iterable. - """ - - # get all file objects associated with these contentnodes - files = File.objects.filter(contentnode__in=contentnode_ids) - # get all their associated real files in object storage - files_on_storage = files.values_list("file_on_disk") - for f in files_on_storage: - # values_list returns each set of items in a tuple, even - # if there's only one item in there. Extract the file_on_disk - # string value from inside that singleton tuple - f[0] - # NOTE (aron):call the storage's delete method on each file, one by one - # disabled for now until we implement logic to not delete files - # that are referenced by non-orphan nodes - # storage.delete(file_path) - - # finally, remove the entries from object storage - # use _raw_delete for much fast file deletions - files._raw_delete(files.db) - - def clean_up_feature_flags(): """ Removes lingering feature flag settings in User records that aren't currently present in the From 4be9df4359a9638a21c11582e613f3d75dc38786 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Fri, 21 Oct 2022 13:49:59 +0530 Subject: [PATCH 4/6] fix deleted chefs root references --- contentcuration/contentcuration/tests/test_user.py | 6 ++++-- contentcuration/contentcuration/utils/garbage_collect.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index f9995fb48c..36ae42f1c7 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -161,10 +161,12 @@ def test_user_csv_export(self): self.assertIn(videos[index - 1].original_filename, row) self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) + """ + Write and refactor for related data hard delete test cases below. + """ def test_account_deletion(self): - self.user.delete() - self.assertFalse(Channel.objects.filter(pk=self.channel.pk).exists()) + pass def test_account_deletion_shared_channels_preserved(self): # Deleting a user account shouldn't delete shared channels diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index de5a29921f..206ad74865 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -39,7 +39,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.receivers = None -def _get_deleted_chefs_root(): +def get_deleted_chefs_root(): deleted_chefs_node, _new = ContentNode.objects.get_or_create(pk=settings.DELETED_CHEFS_ROOT_ID, kind_id=content_kinds.TOPIC) return deleted_chefs_node @@ -89,7 +89,7 @@ def clean_up_deleted_chefs(): child nodes in that tree. """ - deleted_chefs_node = _get_deleted_chefs_root() + deleted_chefs_node = get_deleted_chefs_root() # we cannot use MPTT methods like get_descendants() or use tree_id because for performance reasons # we are avoiding MPTT entirely. nodes_to_clean_up = ContentNode.objects.filter(parent=deleted_chefs_node) From d3a4263ee3d0669f2c1c03c50373051dca12821b Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Sun, 23 Oct 2022 01:20:36 +0530 Subject: [PATCH 5/6] User UserHistory as single source of truth for timestamps! --- .../contentcuration/constants/user_history.py | 4 +- contentcuration/contentcuration/forms.py | 2 +- ...oft_delete.py => 0141_soft_delete_user.py} | 10 +-- contentcuration/contentcuration/models.py | 20 +++-- .../contentcuration/tests/test_models.py | 84 +++++++++++++++++-- .../contentcuration/tests/test_user.py | 15 ---- .../tests/utils/test_garbage_collect.py | 34 ++++++++ .../contentcuration/utils/garbage_collect.py | 12 ++- 8 files changed, 140 insertions(+), 41 deletions(-) rename contentcuration/contentcuration/migrations/{0141_user_soft_delete.py => 0141_soft_delete_user.py} (63%) diff --git a/contentcuration/contentcuration/constants/user_history.py b/contentcuration/contentcuration/constants/user_history.py index 1eecf79c17..76655993ef 100644 --- a/contentcuration/contentcuration/constants/user_history.py +++ b/contentcuration/contentcuration/constants/user_history.py @@ -1,7 +1,7 @@ from django.utils.translation import ugettext_lazy as _ -DELETION = "deletion" -RECOVERY = "recovery" +DELETION = "soft-deletion" +RECOVERY = "soft-recovery" RELATED_DATA_HARD_DELETION = "related-data-hard-deletion" choices = ( diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index d0ae49893f..d9dc781f61 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -46,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(Q(is_active=True) | Q(deleted_at__isnull=False), email__iexact=email).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py similarity index 63% rename from contentcuration/contentcuration/migrations/0141_user_soft_delete.py rename to contentcuration/contentcuration/migrations/0141_soft_delete_user.py index 1f58e4076a..df66bafcc0 100644 --- a/contentcuration/contentcuration/migrations/0141_user_soft_delete.py +++ b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.14 on 2022-10-19 09:41 +# Generated by Django 3.2.14 on 2022-10-22 18:30 import django.db.models.deletion import django.utils.timezone from django.conf import settings @@ -15,15 +15,15 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='user', - name='deleted_at', - field=models.DateTimeField(blank=True, null=True), + name='deleted', + field=models.BooleanField(db_index=True, default=False), ), migrations.CreateModel( name='UserHistory', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('action', models.CharField(choices=[('deletion', 'User soft deletion'), ('recovery', 'User soft deletion recovery'), - ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), + ('action', models.CharField(choices=[('soft-deletion', 'User soft deletion'), ('soft-recovery', + 'User soft deletion recovery'), ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), ('performed_at', models.DateTimeField(default=django.utils.timezone.now)), ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='history', to=settings.AUTH_USER_MODEL)), ], diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 537f867978..07aa39f2b1 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -200,7 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) - deleted_at = models.DateTimeField(null=True, blank=True) + deleted = models.BooleanField(default=False, db_index=True) _field_updates = FieldTracker(fields=[ # Field to watch for changes @@ -218,7 +218,7 @@ def delete(self): """ Soft deletes the user account. """ - self.deleted_at = timezone.now() + self.deleted = True # Deactivate the user to disallow authentication and also # to let the user verify the email again after recovery. self.is_active = False @@ -229,7 +229,7 @@ def recover(self): """ Use this method when we want to recover a user. """ - self.deleted_at = None + self.deleted = False self.save() self.history.create(user_id=self.pk, action=user_history.RECOVERY) @@ -261,7 +261,7 @@ def hard_delete_user_related_data(self): # Point sole editor non-public channels' contentnodes to orphan tree to let # our garbage collection delete the nodes and underlying files. ContentNode._annotate_channel_id(ContentNode.objects).filter(channel_id__in=list( - non_public_channels_sole_editor)).update(parent_id=settings.ORPHANAGE_ROOT_ID) + non_public_channels_sole_editor.values_list("id", flat=True))).update(parent_id=settings.ORPHANAGE_ROOT_ID) # Hard delete non-public channels associated with this user (if user is the only editor). non_public_channels_sole_editor.delete() @@ -447,21 +447,23 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, deleted_at__isnull=True, **filters): + def get_for_email(cls, email, deleted=False, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned - Filters out deleted User records by default. Can be overridden with - deleted_at__isnull argument. + Filters out deleted User records by default. To include both deleted and + undeleted user records pass None to the deleted argument. :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), deleted_at__isnull=deleted_at__isnull, **filters)\ - .order_by("-is_active", "-id").first() + user_qs = User.objects.filter(email__iexact=email.strip()) + if deleted is not None: + user_qs = user_qs.filter(deleted=deleted) + return user_qs.filter(**filters).order_by("-is_active", "-id").first() class UUIDField(models.CharField): diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index e6fbe38a35..9734ef1309 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -1,4 +1,3 @@ -import datetime import uuid import mock @@ -6,6 +5,7 @@ from django.conf import settings from django.core.cache import cache from django.core.exceptions import ValidationError +from django.db.models import Q from django.db.utils import IntegrityError from django.utils import timezone from le_utils.constants import content_kinds @@ -16,6 +16,7 @@ from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ChannelHistory +from contentcuration.models import ChannelSet from contentcuration.models import ContentNode from contentcuration.models import CONTENTNODE_TREE_ID_CACHE_KEY from contentcuration.models import File @@ -781,6 +782,51 @@ def _create_user(self, email, password='password', is_active=True): user.save() return user + def _setup_user_related_data(self): + user_a = self._create_user("a@tester.com") + user_b = self._create_user("b@tester.com") + + # Create a sole editor non-public channel. + sole_editor_channel = Channel.objects.create(name="sole-editor") + sole_editor_channel.editors.add(user_a) + + # Create sole-editor channel nodes. + for i in range(0, 3): + testdata.node({ + "title": "sole-editor-channel-node", + "kind_id": "video", + }, parent=sole_editor_channel.main_tree) + + # Create a sole editor public channel. + public_channel = testdata.channel("public") + public_channel.editors.add(user_a) + public_channel.public = True + public_channel.save() + + # Create a shared channel. + shared_channel = testdata.channel("shared-channel") + shared_channel.editors.add(user_a) + shared_channel.editors.add(user_b) + + # Invitations. + Invitation.objects.create(sender_id=user_a.id, invited_id=user_b.id) + Invitation.objects.create(sender_id=user_b.id, invited_id=user_a.id) + + # Channel sets. + channel_set = ChannelSet.objects.create(name="sole-editor") + channel_set.editors.add(user_a) + + channel_set = ChannelSet.objects.create(name="public") + channel_set.editors.add(user_a) + channel_set.public = True + channel_set.save() + + channel_set = ChannelSet.objects.create(name="shared-channelset") + channel_set.editors.add(user_a) + channel_set.editors.add(user_b) + + return user_a + def test_unique_lower_email(self): self._create_user("tester@tester.com") with self.assertRaises(IntegrityError): @@ -813,8 +859,8 @@ def test_delete(self): user = self._create_user("tester@tester.com") user.delete() - # Sets deleted_at? - self.assertIsInstance(user.deleted_at, datetime.datetime) + # Sets deleted? + self.assertEqual(user.deleted, True) # Sets is_active to False? self.assertEqual(user.is_active, False) # Creates user history? @@ -826,14 +872,42 @@ def test_recover(self): user.delete() user.recover() - # Sets deleted_at to None? - self.assertEqual(user.deleted_at, None) + # Sets deleted to False? + self.assertEqual(user.deleted, False) # Keeps is_active to False? self.assertEqual(user.is_active, False) # Creates user history? user_recover_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RECOVERY).first() self.assertIsNotNone(user_recover_history) + def test_hard_delete_user_related_data(self): + user = self._setup_user_related_data() + user.hard_delete_user_related_data() + + # Deletes sole-editor channels. + self.assertFalse(Channel.objects.filter(name="sole-editor").exists()) + # Preserves shared channels. + self.assertTrue(Channel.objects.filter(name="shared-channel").exists()) + # Preserves public channels. + self.assertTrue(Channel.objects.filter(name="public").exists()) + + # Deletes all user related invitations. + self.assertFalse(Invitation.objects.filter(Q(sender_id=user.id) | Q(invited_id=user.id)).exists()) + + # Deletes sole-editor channelsets. + self.assertFalse(ChannelSet.objects.filter(name="sole-editor").exists()) + # Preserves shared channelsets. + self.assertTrue(ChannelSet.objects.filter(name="shared-channelset").exists()) + # Preserves public channelsets. + self.assertTrue(ChannelSet.objects.filter(name="public").exists()) + + # All contentnodes of sole-editor channel points to ORPHANGE ROOT NODE? + self.assertFalse(ContentNode.objects.filter(~Q(parent_id=settings.ORPHANAGE_ROOT_ID) + & Q(title="sole-editor-channel-node")).exists()) + # Creates user history? + user_hard_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).first() + self.assertIsNotNone(user_hard_delete_history) + class ChannelHistoryTestCase(StudioTestCase): def setUp(self): diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index 36ae42f1c7..9fda1ceefe 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -15,7 +15,6 @@ from .base import BaseAPITestCase from .testdata import fileobj_video -from contentcuration.models import Channel from contentcuration.models import DEFAULT_CONTENT_DEFAULTS from contentcuration.models import Invitation from contentcuration.models import User @@ -161,17 +160,3 @@ def test_user_csv_export(self): self.assertIn(videos[index - 1].original_filename, row) self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) - """ - Write and refactor for related data hard delete test cases below. - """ - - def test_account_deletion(self): - pass - - def test_account_deletion_shared_channels_preserved(self): - # Deleting a user account shouldn't delete shared channels - newuser = self.create_user() - self.channel.editors.add(newuser) - self.channel.save() - self.user.delete() - self.assertTrue(Channel.objects.filter(pk=self.channel.pk).exists()) diff --git a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py index 6746fa43a6..178bc25656 100644 --- a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py +++ b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py @@ -17,15 +17,19 @@ from contentcuration import models as cc from contentcuration.api import activate_channel +from contentcuration.constants import user_history from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult +from contentcuration.models import UserHistory from contentcuration.tests.base import BaseAPITestCase from contentcuration.tests.base import StudioTestCase from contentcuration.tests.testdata import tree +from contentcuration.utils.db_tools import create_user from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks from contentcuration.utils.garbage_collect import get_deleted_chefs_root @@ -192,6 +196,36 @@ def _create_expired_contentnode(creation_date=THREE_MONTHS_AGO): return c +def _create_expired_deleted_user(email="test@test.com", deletion_date=THREE_MONTHS_AGO): + user = create_user(email, "password", "test", "test") + user.delete() + + user_latest_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() + user_latest_delete_history.performed_at = deletion_date + user_latest_delete_history.save() + return user + + +class CleanUpSoftDeletedExpiredUsersTestCase(StudioTestCase): + def test_cleanup__all_expired_soft_deleted_users(self): + expired_users = [] + for i in range(0, 5): + expired_users.append(_create_expired_deleted_user(email=f"test-{i}@test.com")) + + clean_up_soft_deleted_users() + + for user in expired_users: + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is True + + def test_no_cleanup__unexpired_soft_deleted_users(self): + # TO DO + pass + + def test_no_cleanup__undeleted_users(self): + # TO DO + pass + + class CleanUpContentNodesTestCase(StudioTestCase): def test_delete_all_contentnodes_in_orphanage_tree(self): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index 206ad74865..f7444f5f2e 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -17,11 +17,13 @@ from le_utils.constants import content_kinds from contentcuration.constants import feature_flags +from contentcuration.constants import user_history from contentcuration.db.models.functions import JSONObjectKeys from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult from contentcuration.models import User +from contentcuration.models import UserHistory class DisablePostDeleteSignal(object): @@ -76,11 +78,13 @@ def clean_up_soft_deleted_users(): - all user invitations. """ account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) - users_to_delete = User.objects.filter(deleted_at__lt=account_deletion_buffer_delta) + deleted_users = User.objects.filter(deleted=True) - for user in users_to_delete: - user.hard_delete_user_related_data() - logging.info("Hard deleted user related data for user {}".format(user.email)) + for user in deleted_users: + latest_deletion_time = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() + if latest_deletion_time and latest_deletion_time.performed_at < account_deletion_buffer_delta: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}".format(user.email)) def clean_up_deleted_chefs(): From 17a7ac1ec63ab197697eb1a89155cb5c588ee837 Mon Sep 17 00:00:00 2001 From: Vivek Agrawal Date: Mon, 31 Oct 2022 14:11:05 +0530 Subject: [PATCH 6/6] User cleanup garbage collection tests --- contentcuration/contentcuration/models.py | 1 + .../tests/utils/test_garbage_collect.py | 18 +++++++++++------- .../contentcuration/utils/garbage_collect.py | 17 ++++++++++------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 07aa39f2b1..f6eb908a61 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -200,6 +200,7 @@ class User(AbstractBaseUser, PermissionsMixin): content_defaults = JSONField(default=dict) policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) + deleted = models.BooleanField(default=False, db_index=True) _field_updates = FieldTracker(fields=[ diff --git a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py index 178bc25656..e4c9941df4 100644 --- a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py +++ b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py @@ -196,12 +196,12 @@ def _create_expired_contentnode(creation_date=THREE_MONTHS_AGO): return c -def _create_expired_deleted_user(email="test@test.com", deletion_date=THREE_MONTHS_AGO): +def _create_deleted_user_in_past(deletion_datetime, email="test@test.com"): user = create_user(email, "password", "test", "test") user.delete() user_latest_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() - user_latest_delete_history.performed_at = deletion_date + user_latest_delete_history.performed_at = deletion_datetime user_latest_delete_history.save() return user @@ -210,7 +210,7 @@ class CleanUpSoftDeletedExpiredUsersTestCase(StudioTestCase): def test_cleanup__all_expired_soft_deleted_users(self): expired_users = [] for i in range(0, 5): - expired_users.append(_create_expired_deleted_user(email=f"test-{i}@test.com")) + expired_users.append(_create_deleted_user_in_past(deletion_datetime=THREE_MONTHS_AGO, email=f"test-{i}@test.com")) clean_up_soft_deleted_users() @@ -218,12 +218,16 @@ def test_cleanup__all_expired_soft_deleted_users(self): assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is True def test_no_cleanup__unexpired_soft_deleted_users(self): - # TO DO - pass + two_months_ago = datetime.now() - timedelta(days=63) + user = _create_deleted_user_in_past(deletion_datetime=two_months_ago) + clean_up_soft_deleted_users() + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False def test_no_cleanup__undeleted_users(self): - # TO DO - pass + user = create_user("test@test.com", "password", "test", "test") + clean_up_soft_deleted_users() + assert user.deleted is False + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False class CleanUpContentNodesTestCase(StudioTestCase): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index f7444f5f2e..44e09e4dab 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -8,9 +8,11 @@ from celery import states from django.conf import settings from django.core.files.storage import default_storage +from django.db.models import Subquery from django.db.models.expressions import CombinedExpression from django.db.models.expressions import Exists from django.db.models.expressions import F +from django.db.models.expressions import OuterRef from django.db.models.expressions import Value from django.db.models.signals import post_delete from django.utils.timezone import now @@ -78,13 +80,14 @@ def clean_up_soft_deleted_users(): - all user invitations. """ account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) - deleted_users = User.objects.filter(deleted=True) - - for user in deleted_users: - latest_deletion_time = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() - if latest_deletion_time and latest_deletion_time.performed_at < account_deletion_buffer_delta: - user.hard_delete_user_related_data() - logging.info("Hard deleted user related data for user {}".format(user.email)) + user_latest_deletion_time_subquery = Subquery(UserHistory.objects.filter(user_id=OuterRef( + "id"), action=user_history.DELETION).values("performed_at").order_by("-performed_at")[:1]) + users_to_delete = User.objects.annotate(latest_deletion_time=user_latest_deletion_time_subquery).filter( + deleted=True, latest_deletion_time__lt=account_deletion_buffer_delta) + + for user in users_to_delete: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}.".format(user.email)) def clean_up_deleted_chefs():