diff --git a/contentcuration/contentcuration/constants/user_history.py b/contentcuration/contentcuration/constants/user_history.py new file mode 100644 index 0000000000..76655993ef --- /dev/null +++ b/contentcuration/contentcuration/constants/user_history.py @@ -0,0 +1,11 @@ +from django.utils.translation import ugettext_lazy as _ + +DELETION = "soft-deletion" +RECOVERY = "soft-recovery" +RELATED_DATA_HARD_DELETION = "related-data-hard-deletion" + +choices = ( + (DELETION, _("User soft deletion")), + (RECOVERY, _("User soft deletion recovery")), + (RELATED_DATA_HARD_DELETION, _("User related data hard deletion")), +) diff --git a/contentcuration/contentcuration/forms.py b/contentcuration/contentcuration/forms.py index 973916431e..d9dc781f61 100644 --- a/contentcuration/contentcuration/forms.py +++ b/contentcuration/contentcuration/forms.py @@ -7,6 +7,7 @@ from django.contrib.auth.forms import UserChangeForm from django.contrib.auth.forms import UserCreationForm from django.core import signing +from django.db.models import Q from django.template.loader import render_to_string from contentcuration.models import User @@ -45,7 +46,7 @@ class RegistrationForm(UserCreationForm, ExtraFormMixin): def clean_email(self): email = self.cleaned_data['email'].strip().lower() - if User.objects.filter(email__iexact=email, is_active=True).exists(): + if User.objects.filter(Q(is_active=True) | Q(deleted=True), email__iexact=email).exists(): raise UserWarning return email diff --git a/contentcuration/contentcuration/management/commands/garbage_collect.py b/contentcuration/contentcuration/management/commands/garbage_collect.py index f31db7ad5c..f22f70dd4b 100644 --- a/contentcuration/contentcuration/management/commands/garbage_collect.py +++ b/contentcuration/contentcuration/management/commands/garbage_collect.py @@ -11,6 +11,7 @@ from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks @@ -26,15 +27,23 @@ def handle(self, *args, **options): Actual logic for garbage collection. """ - # clean up contentnodes, files and file objects on storage that are associated - # with the orphan tree + # Clean up users that are soft deleted and are older than ACCOUNT_DELETION_BUFFER (90 days). + # Also clean contentnodes, files and file objects on storage that are associated + # with the orphan tree. + logging.info("Cleaning up soft deleted users older than ACCOUNT_DELETION_BUFFER (90 days)") + clean_up_soft_deleted_users() + logging.info("Cleaning up contentnodes from the orphan tree") clean_up_contentnodes() + logging.info("Cleaning up deleted chef nodes") clean_up_deleted_chefs() + logging.info("Cleaning up feature flags") clean_up_feature_flags() + logging.info("Cleaning up stale file objects") clean_up_stale_files() + logging.info("Cleaning up tasks") clean_up_tasks() diff --git a/contentcuration/contentcuration/migrations/0141_soft_delete_user.py b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py new file mode 100644 index 0000000000..df66bafcc0 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0141_soft_delete_user.py @@ -0,0 +1,31 @@ +# Generated by Django 3.2.14 on 2022-10-22 18:30 +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0140_delete_task'), + ] + + operations = [ + migrations.AddField( + model_name='user', + name='deleted', + field=models.BooleanField(db_index=True, default=False), + ), + migrations.CreateModel( + name='UserHistory', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('action', models.CharField(choices=[('soft-deletion', 'User soft deletion'), ('soft-recovery', + 'User soft deletion recovery'), ('related-data-hard-deletion', 'User related data hard deletion')], max_length=32)), + ('performed_at', models.DateTimeField(default=django.utils.timezone.now)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='history', to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 2b9aea4224..f6eb908a61 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -66,6 +66,7 @@ from contentcuration.constants import channel_history from contentcuration.constants import completion_criteria +from contentcuration.constants import user_history from contentcuration.constants.contentnode import kind_activity_map from contentcuration.db.models.expressions import Array from contentcuration.db.models.functions import ArrayRemove @@ -200,6 +201,8 @@ class User(AbstractBaseUser, PermissionsMixin): policies = JSONField(default=dict, null=True) feature_flags = JSONField(default=dict, null=True) + deleted = models.BooleanField(default=False, db_index=True) + _field_updates = FieldTracker(fields=[ # Field to watch for changes "disk_space", @@ -213,27 +216,67 @@ def __unicode__(self): return self.email def delete(self): + """ + Soft deletes the user account. + """ + self.deleted = True + # Deactivate the user to disallow authentication and also + # to let the user verify the email again after recovery. + self.is_active = False + self.save() + self.history.create(user_id=self.pk, action=user_history.DELETION) + + def recover(self): + """ + Use this method when we want to recover a user. + """ + self.deleted = False + self.save() + self.history.create(user_id=self.pk, action=user_history.RECOVERY) + + def hard_delete_user_related_data(self): + """ + Hard delete all user related data. But keeps the user record itself intact. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. + """ from contentcuration.viewsets.common import SQCount - # Remove any invitations associated to this account + + # Hard delete invitations associated to this account. self.sent_to.all().delete() + self.sent_by.all().delete() - # Delete channels associated with this user (if user is the only editor) - user_query = ( + editable_channels_user_query = ( User.objects.filter(editable_channels__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.editable_channels.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + non_public_channels_sole_editor = self.editable_channels.annotate(num_editors=SQCount( + editable_channels_user_query, field="id")).filter(num_editors=1, public=False) + + # Point sole editor non-public channels' contentnodes to orphan tree to let + # our garbage collection delete the nodes and underlying files. + ContentNode._annotate_channel_id(ContentNode.objects).filter(channel_id__in=list( + non_public_channels_sole_editor.values_list("id", flat=True))).update(parent_id=settings.ORPHANAGE_ROOT_ID) + + # Hard delete non-public channels associated with this user (if user is the only editor). + non_public_channels_sole_editor.delete() - # Delete channel collections associated with this user (if user is the only editor) + # Hard delete non-public channel collections associated with this user (if user is the only editor). user_query = ( User.objects.filter(channel_sets__id=OuterRef('id')) .values_list('id', flat=True) .distinct() ) - self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1).delete() + self.channel_sets.annotate(num_editors=SQCount(user_query, field="id")).filter(num_editors=1, public=False).delete() - super(User, self).delete() + # Create history! + self.history.create(user_id=self.pk, action=user_history.RELATED_DATA_HARD_DELETION) def can_edit(self, channel_id): return Channel.filter_edit_queryset(Channel.objects.all(), self).filter(pk=channel_id).exists() @@ -405,18 +448,23 @@ def filter_edit_queryset(cls, queryset, user): return queryset.filter(pk=user.pk) @classmethod - def get_for_email(cls, email, **filters): + def get_for_email(cls, email, deleted=False, **filters): """ Returns the appropriate User record given an email, ordered by: - those with is_active=True first, which there should only ever be one - otherwise by ID DESC so most recent inactive shoud be returned + Filters out deleted User records by default. To include both deleted and + undeleted user records pass None to the deleted argument. + :param email: A string of the user's email :param filters: Additional filters to filter the User queryset :return: User or None """ - return User.objects.filter(email__iexact=email.strip(), **filters)\ - .order_by("-is_active", "-id").first() + user_qs = User.objects.filter(email__iexact=email.strip()) + if deleted is not None: + user_qs = user_qs.filter(deleted=deleted) + return user_qs.filter(**filters).order_by("-is_active", "-id").first() class UUIDField(models.CharField): @@ -1038,6 +1086,16 @@ class Meta: ] +class UserHistory(models.Model): + """ + Model that stores the user's action history. + """ + user = models.ForeignKey(settings.AUTH_USER_MODEL, null=False, blank=False, related_name="history", on_delete=models.CASCADE) + action = models.CharField(max_length=32, choices=user_history.choices) + + performed_at = models.DateTimeField(default=timezone.now) + + class ChannelSet(models.Model): # NOTE: this is referred to as "channel collections" on the front-end, but we need to call it # something else as there is already a ChannelCollection model on the front-end diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index b9c3a73ca1..de05fb484a 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -329,8 +329,10 @@ def gettext(s): HELP_EMAIL = 'content@learningequality.org' DEFAULT_FROM_EMAIL = 'Kolibri Studio ' POLICY_EMAIL = 'legal@learningequality.org' -ACCOUNT_DELETION_BUFFER = 5 # Used to determine how many days a user -# has to undo accidentally deleting account + +# Used to determine how many days a user +# has to undo accidentally deleting account. +ACCOUNT_DELETION_BUFFER = 90 DEFAULT_LICENSE = 1 diff --git a/contentcuration/contentcuration/tests/test_models.py b/contentcuration/contentcuration/tests/test_models.py index 3c0caa9967..9734ef1309 100644 --- a/contentcuration/contentcuration/tests/test_models.py +++ b/contentcuration/contentcuration/tests/test_models.py @@ -5,15 +5,18 @@ from django.conf import settings from django.core.cache import cache from django.core.exceptions import ValidationError +from django.db.models import Q from django.db.utils import IntegrityError from django.utils import timezone from le_utils.constants import content_kinds from le_utils.constants import format_presets from contentcuration.constants import channel_history +from contentcuration.constants import user_history from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ChannelHistory +from contentcuration.models import ChannelSet from contentcuration.models import ContentNode from contentcuration.models import CONTENTNODE_TREE_ID_CACHE_KEY from contentcuration.models import File @@ -22,6 +25,7 @@ from contentcuration.models import Invitation from contentcuration.models import object_storage_name from contentcuration.models import User +from contentcuration.models import UserHistory from contentcuration.tests import testdata from contentcuration.tests.base import StudioTestCase @@ -778,6 +782,51 @@ def _create_user(self, email, password='password', is_active=True): user.save() return user + def _setup_user_related_data(self): + user_a = self._create_user("a@tester.com") + user_b = self._create_user("b@tester.com") + + # Create a sole editor non-public channel. + sole_editor_channel = Channel.objects.create(name="sole-editor") + sole_editor_channel.editors.add(user_a) + + # Create sole-editor channel nodes. + for i in range(0, 3): + testdata.node({ + "title": "sole-editor-channel-node", + "kind_id": "video", + }, parent=sole_editor_channel.main_tree) + + # Create a sole editor public channel. + public_channel = testdata.channel("public") + public_channel.editors.add(user_a) + public_channel.public = True + public_channel.save() + + # Create a shared channel. + shared_channel = testdata.channel("shared-channel") + shared_channel.editors.add(user_a) + shared_channel.editors.add(user_b) + + # Invitations. + Invitation.objects.create(sender_id=user_a.id, invited_id=user_b.id) + Invitation.objects.create(sender_id=user_b.id, invited_id=user_a.id) + + # Channel sets. + channel_set = ChannelSet.objects.create(name="sole-editor") + channel_set.editors.add(user_a) + + channel_set = ChannelSet.objects.create(name="public") + channel_set.editors.add(user_a) + channel_set.public = True + channel_set.save() + + channel_set = ChannelSet.objects.create(name="shared-channelset") + channel_set.editors.add(user_a) + channel_set.editors.add(user_b) + + return user_a + def test_unique_lower_email(self): self._create_user("tester@tester.com") with self.assertRaises(IntegrityError): @@ -787,6 +836,7 @@ def test_get_for_email(self): user1 = self._create_user("tester@tester.com", is_active=False) user2 = self._create_user("tester@Tester.com", is_active=False) user3 = self._create_user("Tester@Tester.com", is_active=True) + user4 = self._create_user("testing@test.com", is_active=True) # active should be returned first self.assertEqual(user3, User.get_for_email("tester@tester.com")) @@ -801,6 +851,63 @@ def test_get_for_email(self): # ensure nothing found doesn't error self.assertIsNone(User.get_for_email("tester@tester.com")) + # ensure we don't return soft-deleted users + user4.delete() + self.assertIsNone(User.get_for_email("testing@test.com")) + + def test_delete(self): + user = self._create_user("tester@tester.com") + user.delete() + + # Sets deleted? + self.assertEqual(user.deleted, True) + # Sets is_active to False? + self.assertEqual(user.is_active, False) + # Creates user history? + user_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).first() + self.assertIsNotNone(user_delete_history) + + def test_recover(self): + user = self._create_user("tester@tester.com") + user.delete() + user.recover() + + # Sets deleted to False? + self.assertEqual(user.deleted, False) + # Keeps is_active to False? + self.assertEqual(user.is_active, False) + # Creates user history? + user_recover_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RECOVERY).first() + self.assertIsNotNone(user_recover_history) + + def test_hard_delete_user_related_data(self): + user = self._setup_user_related_data() + user.hard_delete_user_related_data() + + # Deletes sole-editor channels. + self.assertFalse(Channel.objects.filter(name="sole-editor").exists()) + # Preserves shared channels. + self.assertTrue(Channel.objects.filter(name="shared-channel").exists()) + # Preserves public channels. + self.assertTrue(Channel.objects.filter(name="public").exists()) + + # Deletes all user related invitations. + self.assertFalse(Invitation.objects.filter(Q(sender_id=user.id) | Q(invited_id=user.id)).exists()) + + # Deletes sole-editor channelsets. + self.assertFalse(ChannelSet.objects.filter(name="sole-editor").exists()) + # Preserves shared channelsets. + self.assertTrue(ChannelSet.objects.filter(name="shared-channelset").exists()) + # Preserves public channelsets. + self.assertTrue(ChannelSet.objects.filter(name="public").exists()) + + # All contentnodes of sole-editor channel points to ORPHANGE ROOT NODE? + self.assertFalse(ContentNode.objects.filter(~Q(parent_id=settings.ORPHANAGE_ROOT_ID) + & Q(title="sole-editor-channel-node")).exists()) + # Creates user history? + user_hard_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).first() + self.assertIsNotNone(user_hard_delete_history) + class ChannelHistoryTestCase(StudioTestCase): def setUp(self): diff --git a/contentcuration/contentcuration/tests/test_settings.py b/contentcuration/contentcuration/tests/test_settings.py index c216f088a2..25bbc71a54 100644 --- a/contentcuration/contentcuration/tests/test_settings.py +++ b/contentcuration/contentcuration/tests/test_settings.py @@ -1,5 +1,6 @@ import json +import mock from django.urls import reverse_lazy from .base import BaseAPITestCase @@ -10,7 +11,7 @@ class SettingsTestCase(BaseAPITestCase): def test_username_change(self): - data = json.dumps({"first_name": "New firstname", "last_name": "New lastname",}) + data = json.dumps({"first_name": "New firstname", "last_name": "New lastname", }) request = self.create_post_request( reverse_lazy("update_user_full_name"), data=data, @@ -40,13 +41,19 @@ def test_delete_account_invalid(self): self.assertTrue(User.objects.filter(email=self.user.email).exists()) def test_delete_account(self): - # TODO: send_email causes connection errors - data = json.dumps({"email": self.user.email}) - self.create_post_request( - reverse_lazy("delete_user_account"), - data=data, - content_type="application/json", - ) - # response = DeleteAccountView.as_view()(request) - # self.assertEqual(response.status_code, 200) - # self.assertFalse(User.objects.filter(email=self.user.email).exists()) + with mock.patch("contentcuration.views.users.djangologout") as djangologout: + self.user.delete = mock.Mock() + data = json.dumps({"email": self.user.email}) + request = self.create_post_request( + reverse_lazy("delete_user_account"), + data=data, + content_type="application/json", + ) + response = DeleteAccountView.as_view()(request) + + # Ensure successful response. + self.assertEqual(response.status_code, 200) + # Ensure user's delete method is called. + self.user.delete.assert_called_once() + # Ensure we logout the user. + djangologout.assert_called_once_with(request) diff --git a/contentcuration/contentcuration/tests/test_user.py b/contentcuration/contentcuration/tests/test_user.py index f9995fb48c..9fda1ceefe 100644 --- a/contentcuration/contentcuration/tests/test_user.py +++ b/contentcuration/contentcuration/tests/test_user.py @@ -15,7 +15,6 @@ from .base import BaseAPITestCase from .testdata import fileobj_video -from contentcuration.models import Channel from contentcuration.models import DEFAULT_CONTENT_DEFAULTS from contentcuration.models import Invitation from contentcuration.models import User @@ -161,15 +160,3 @@ def test_user_csv_export(self): self.assertIn(videos[index - 1].original_filename, row) self.assertIn(_format_size(videos[index - 1].file_size), row) self.assertEqual(index, len(videos)) - - def test_account_deletion(self): - self.user.delete() - self.assertFalse(Channel.objects.filter(pk=self.channel.pk).exists()) - - def test_account_deletion_shared_channels_preserved(self): - # Deleting a user account shouldn't delete shared channels - newuser = self.create_user() - self.channel.editors.add(newuser) - self.channel.save() - self.user.delete() - self.assertTrue(Channel.objects.filter(pk=self.channel.pk).exists()) diff --git a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py index 6746fa43a6..e4c9941df4 100644 --- a/contentcuration/contentcuration/tests/utils/test_garbage_collect.py +++ b/contentcuration/contentcuration/tests/utils/test_garbage_collect.py @@ -17,15 +17,19 @@ from contentcuration import models as cc from contentcuration.api import activate_channel +from contentcuration.constants import user_history from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult +from contentcuration.models import UserHistory from contentcuration.tests.base import BaseAPITestCase from contentcuration.tests.base import StudioTestCase from contentcuration.tests.testdata import tree +from contentcuration.utils.db_tools import create_user from contentcuration.utils.garbage_collect import clean_up_contentnodes from contentcuration.utils.garbage_collect import clean_up_deleted_chefs from contentcuration.utils.garbage_collect import clean_up_feature_flags +from contentcuration.utils.garbage_collect import clean_up_soft_deleted_users from contentcuration.utils.garbage_collect import clean_up_stale_files from contentcuration.utils.garbage_collect import clean_up_tasks from contentcuration.utils.garbage_collect import get_deleted_chefs_root @@ -192,6 +196,40 @@ def _create_expired_contentnode(creation_date=THREE_MONTHS_AGO): return c +def _create_deleted_user_in_past(deletion_datetime, email="test@test.com"): + user = create_user(email, "password", "test", "test") + user.delete() + + user_latest_delete_history = UserHistory.objects.filter(user_id=user.id, action=user_history.DELETION).order_by("-performed_at").first() + user_latest_delete_history.performed_at = deletion_datetime + user_latest_delete_history.save() + return user + + +class CleanUpSoftDeletedExpiredUsersTestCase(StudioTestCase): + def test_cleanup__all_expired_soft_deleted_users(self): + expired_users = [] + for i in range(0, 5): + expired_users.append(_create_deleted_user_in_past(deletion_datetime=THREE_MONTHS_AGO, email=f"test-{i}@test.com")) + + clean_up_soft_deleted_users() + + for user in expired_users: + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is True + + def test_no_cleanup__unexpired_soft_deleted_users(self): + two_months_ago = datetime.now() - timedelta(days=63) + user = _create_deleted_user_in_past(deletion_datetime=two_months_ago) + clean_up_soft_deleted_users() + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False + + def test_no_cleanup__undeleted_users(self): + user = create_user("test@test.com", "password", "test", "test") + clean_up_soft_deleted_users() + assert user.deleted is False + assert UserHistory.objects.filter(user_id=user.id, action=user_history.RELATED_DATA_HARD_DELETION).exists() is False + + class CleanUpContentNodesTestCase(StudioTestCase): def test_delete_all_contentnodes_in_orphanage_tree(self): diff --git a/contentcuration/contentcuration/tests/views/test_users.py b/contentcuration/contentcuration/tests/views/test_users.py index 0c03d5f626..e79e0cdbf9 100644 --- a/contentcuration/contentcuration/tests/views/test_users.py +++ b/contentcuration/contentcuration/tests/views/test_users.py @@ -98,6 +98,14 @@ def test_login__whitespace(self): self.assertIsInstance(redirect, HttpResponseRedirectBase) self.assertIn("channels", redirect['Location']) + def test_after_delete__no_login(self): + with mock.patch("contentcuration.views.users.djangologin") as djangologin: + self.user.delete() + response = login(self.request) + + self.assertIsInstance(response, HttpResponseForbidden) + djangologin.assert_not_called() + class UserRegistrationViewTestCase(StudioAPITestCase): def setUp(self): @@ -121,6 +129,12 @@ def test_post__no_duplicate_registration(self): response = self.view.post(self.request) self.assertIsInstance(response, HttpResponseForbidden) + def test_after_delete__no_registration(self): + user = testdata.user(email="tester@tester.com") + user.delete() + response = self.view.post(self.request) + self.assertIsInstance(response, HttpResponseForbidden) + class UserActivationViewTestCase(StudioAPITestCase): def setUp(self): diff --git a/contentcuration/contentcuration/utils/garbage_collect.py b/contentcuration/contentcuration/utils/garbage_collect.py index 3343013b7c..44e09e4dab 100755 --- a/contentcuration/contentcuration/utils/garbage_collect.py +++ b/contentcuration/contentcuration/utils/garbage_collect.py @@ -7,19 +7,25 @@ from celery import states from django.conf import settings +from django.core.files.storage import default_storage +from django.db.models import Subquery from django.db.models.expressions import CombinedExpression +from django.db.models.expressions import Exists from django.db.models.expressions import F +from django.db.models.expressions import OuterRef from django.db.models.expressions import Value from django.db.models.signals import post_delete from django.utils.timezone import now from le_utils.constants import content_kinds from contentcuration.constants import feature_flags +from contentcuration.constants import user_history from contentcuration.db.models.functions import JSONObjectKeys from contentcuration.models import ContentNode from contentcuration.models import File from contentcuration.models import TaskResult from contentcuration.models import User +from contentcuration.models import UserHistory class DisablePostDeleteSignal(object): @@ -42,6 +48,48 @@ def get_deleted_chefs_root(): return deleted_chefs_node +def _clean_up_files(contentnode_ids): + """ + Clean up the files (both in the DB and in object storage) + associated with the `contentnode_ids` iterable that are + not pointed by any other contentnode. + """ + files = File.objects.filter(contentnode__in=contentnode_ids) + files_on_storage = files.values_list("file_on_disk", flat=True) + + for disk_file_path in files_on_storage: + is_other_node_pointing = Exists(File.objects.filter(file_on_disk=disk_file_path).exclude(contentnode__in=contentnode_ids)) + if not is_other_node_pointing: + default_storage.delete(disk_file_path) + + # use _raw_delete for much fast file deletions + files._raw_delete(files.db) + + +def clean_up_soft_deleted_users(): + """ + Hard deletes user related data for soft deleted users that are older than ACCOUNT_DELETION_BUFFER. + + Note: User record itself is not hard deleted. + + User related data that gets hard deleted are: + - sole editor non-public channels. + - sole editor non-public channelsets. + - sole editor non-public channels' content nodes and its underlying files that are not + used by any other channel. + - all user invitations. + """ + account_deletion_buffer_delta = now() - datetime.timedelta(days=settings.ACCOUNT_DELETION_BUFFER) + user_latest_deletion_time_subquery = Subquery(UserHistory.objects.filter(user_id=OuterRef( + "id"), action=user_history.DELETION).values("performed_at").order_by("-performed_at")[:1]) + users_to_delete = User.objects.annotate(latest_deletion_time=user_latest_deletion_time_subquery).filter( + deleted=True, latest_deletion_time__lt=account_deletion_buffer_delta) + + for user in users_to_delete: + user.hard_delete_user_related_data() + logging.info("Hard deleted user related data for user {}.".format(user.email)) + + def clean_up_deleted_chefs(): """ Clean up all deleted chefs attached to the deleted chefs tree, including all @@ -81,7 +129,7 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH # delete all files first with DisablePostDeleteSignal(): - clean_up_files(nodes_to_clean_up) + _clean_up_files(nodes_to_clean_up) # Use _raw_delete for fast bulk deletions try: @@ -92,32 +140,6 @@ def clean_up_contentnodes(delete_older_than=settings.ORPHAN_DATE_CLEAN_UP_THRESH pass -def clean_up_files(contentnode_ids): - """ - Clean up the files (both in the DB and in object storage) - associated with the contentnode_ids given in the `contentnode_ids` - iterable. - """ - - # get all file objects associated with these contentnodes - files = File.objects.filter(contentnode__in=contentnode_ids) - # get all their associated real files in object storage - files_on_storage = files.values_list("file_on_disk") - for f in files_on_storage: - # values_list returns each set of items in a tuple, even - # if there's only one item in there. Extract the file_on_disk - # string value from inside that singleton tuple - f[0] - # NOTE (aron):call the storage's delete method on each file, one by one - # disabled for now until we implement logic to not delete files - # that are referenced by non-orphan nodes - # storage.delete(file_path) - - # finally, remove the entries from object storage - # use _raw_delete for much fast file deletions - files._raw_delete(files.db) - - def clean_up_feature_flags(): """ Removes lingering feature flag settings in User records that aren't currently present in the diff --git a/contentcuration/contentcuration/views/settings.py b/contentcuration/contentcuration/views/settings.py index f41b6ac926..a186665dec 100644 --- a/contentcuration/contentcuration/views/settings.py +++ b/contentcuration/contentcuration/views/settings.py @@ -34,6 +34,7 @@ from contentcuration.utils.csv_writer import generate_user_csv_filename from contentcuration.utils.messages import get_messages from contentcuration.views.base import current_user_for_context +from contentcuration.views.users import logout from contentcuration.viewsets.channel import SettingsChannelSerializer ISSUE_UPDATE_DATE = datetime(2018, 10, 29) @@ -165,6 +166,7 @@ def form_valid(self, form): os.unlink(csv_path) self.request.user.delete() + logout(self.request) class StorageSettingsView(PostFormMixin, FormView):