Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make deletion of multiple sources asynchronous #5257

Merged
merged 1 commit into from
May 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ systemd_services:
- securedrop_rqrequeue.service
- securedrop_rqworker.service
- securedrop_shredder.service
- securedrop_source_deleter.service

# SecureDrop rq worker log directory
securedrop_worker_log_dir: /var/log/securedrop_worker
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[Unit]
Description=SecureDrop Source deleter

[Service]
Environment=PYTHONPATH="{{ securedrop_code }}:{{ securedrop_venv_site_packages }}"
ExecStart={{ securedrop_venv_bin }}/python {{ securedrop_code }}/scripts/source_deleter --interval 10
PrivateDevices=yes
PrivateTmp=yes
ProtectSystem=full
ReadOnlyDirectories=/
ReadWriteDirectories={{ securedrop_data }}
Restart=always
RestartSec=10s
UMask=077
User=www-data
WorkingDirectory={{ securedrop_code }}

[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ etc/apparmor.d/usr.sbin.tor /etc/apparmor.d
lib/systemd/system/securedrop_rqrequeue.service /lib/systemd/system
lib/systemd/system/securedrop_rqworker.service /lib/systemd/system
lib/systemd/system/securedrop_shredder.service /lib/systemd/system
lib/systemd/system/securedrop_source_deleter.service /lib/systemd/system
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest


testinfra_hosts = ["app-staging"]


def test_securedrop_source_deleter_service(host):
"""
Verify configuration of securedrop_source_deleter systemd service.
"""
securedrop_test_vars = pytest.securedrop_test_vars
service_file = "/lib/systemd/system/securedrop_source_deleter.service"
expected_content = "\n".join([
"[Unit]",
"Description=SecureDrop Source deleter",
"",
"[Service]",
'Environment=PYTHONPATH="{}:{}"'.format(
securedrop_test_vars.securedrop_code, securedrop_test_vars.securedrop_venv_site_packages
),
"ExecStart={}/python /var/www/securedrop/scripts/source_deleter --interval 10".format(
securedrop_test_vars.securedrop_venv_bin
),
"PrivateDevices=yes",
"PrivateTmp=yes",
"ProtectSystem=full",
"ReadOnlyDirectories=/",
"ReadWriteDirectories={}".format(securedrop_test_vars.securedrop_data),
"Restart=always",
"RestartSec=10s",
"UMask=077",
"User={}".format(securedrop_test_vars.securedrop_user),
"WorkingDirectory={}".format(securedrop_test_vars.securedrop_code),
"",
"[Install]",
"WantedBy=multi-user.target\n",
])

f = host.file(service_file)
assert f.is_file
assert f.mode == 0o644
assert f.user == "root"
assert f.group == "root"
assert f.content_string == expected_content

s = host.service("securedrop_source_deleter")
assert s.is_enabled
assert s.is_running
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""add Source.deleted_at

Revision ID: 35513370ba0d
Revises: 523fff3f969c
Create Date: 2020-05-06 22:28:01.214359

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '35513370ba0d'
down_revision = '523fff3f969c'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('sources', schema=None) as batch_op:
batch_op.add_column(sa.Column('deleted_at', sa.DateTime(), nullable=True))

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('sources', schema=None) as batch_op:
batch_op.drop_column('deleted_at')

# ### end Alembic commands ###
1 change: 1 addition & 0 deletions securedrop/bin/run
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ reset_demo
/opt/venvs/securedrop-app-code/bin/rqworker &
PYTHONPATH="${REPOROOT}/securedrop" /opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/rqrequeue" --interval 60 &
/opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/shredder" --interval 60 &
/opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/source_deleter" --interval 10 &

./manage.py run
38 changes: 35 additions & 3 deletions securedrop/crypto_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pretty_bad_protocol as gnupg
import os
import io
import re
import scrypt
from random import SystemRandom

Expand Down Expand Up @@ -76,6 +77,8 @@ class CryptoUtil:
REDIS_FINGERPRINT_HASH = "sd/crypto-util/fingerprints"
REDIS_KEY_HASH = "sd/crypto-util/keys"

SOURCE_KEY_UID_RE = re.compile(r"(Source|Autogenerated) Key <[-A-Za-z0-9+/=_]+>")

def __init__(self,
scrypt_params,
scrypt_id_pepper,
Expand Down Expand Up @@ -213,23 +216,52 @@ def genkeypair(self, name, secret):
key_length=self.__gpg_key_length,
passphrase=secret,
name_email=name,
name_real="Source Key",
creation_date=self.DEFAULT_KEY_CREATION_DATE.isoformat(),
expire_date=self.DEFAULT_KEY_EXPIRATION_DATE
))
return genkey_obj

def find_source_key(self, fingerprint: str) -> typing.Optional[typing.Dict]:
"""
Searches the GPG keyring for a source key.

A source key has the given fingerprint and is labeled either
"Source Key" or "Autogenerated Key".

Returns the key or None.
"""
keys = self.gpg.list_keys()
for key in keys:
if fingerprint != key["fingerprint"]:
continue

for uid in key["uids"]:
if self.SOURCE_KEY_UID_RE.match(uid):
return key
else:
return None
return None

def delete_reply_keypair(self, source_filesystem_id):
key = self.get_fingerprint(source_filesystem_id)
fingerprint = self.get_fingerprint(source_filesystem_id)

# If this source was never flagged for review, they won't have a reply
# keypair
if not key:
if not fingerprint:
return

# verify that the key with the given fingerprint belongs to a source
key = self.find_source_key(fingerprint)
if not key:
raise ValueError("source key not found")

# Always delete keys without invoking pinentry-mode = loopback
# see: https://lists.gnupg.org/pipermail/gnupg-users/2016-May/055965.html
temp_gpg = gnupg.GPG(binary='gpg2', homedir=self.gpg_key_dir)

# The subkeys keyword argument deletes both secret and public keys.
temp_gpg.delete_keys(key, secret=True, subkeys=True)
temp_gpg.delete_keys(fingerprint, secret=True, subkeys=True)
self.redis.hdel(self.REDIS_KEY_HASH, self.get_fingerprint(source_filesystem_id))
self.redis.hdel(self.REDIS_FINGERPRINT_HASH, source_filesystem_id)

Expand Down
2 changes: 1 addition & 1 deletion securedrop/journalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def prime_keycache():
Preloads CryptoUtil.keycache.
"""
with app.app_context():
for source in Source.query.filter_by(pending=False).all():
for source in Source.query.filter_by(pending=False, deleted_at=None).all():
app.crypto_util.get_pubkey(source.filesystem_id)


Expand Down
2 changes: 1 addition & 1 deletion securedrop/journalist_app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_token():
@api.route('/sources', methods=['GET'])
@token_required
def get_all_sources():
sources = Source.query.filter_by(pending=False).all()
sources = Source.query.filter_by(pending=False, deleted_at=None).all()
return jsonify(
{'sources': [source.to_json() for source in sources]}), 200

Expand Down
7 changes: 6 additions & 1 deletion securedrop/journalist_app/col.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@ def col(filesystem_id):
def delete_single(filesystem_id):
"""deleting a single collection from its /col page"""
source = get_source(filesystem_id)
delete_collection(filesystem_id)
try:
delete_collection(filesystem_id)
except ValueError as e:
current_app.logger.error("error deleting collection: %s", e)
abort(500)

flash(gettext("{source_name}'s collection deleted")
.format(source_name=source.journalist_designation),
"notification")
Expand Down
10 changes: 5 additions & 5 deletions securedrop/journalist_app/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from flask_babel import lazy_gettext as gettext
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileAllowed, FileRequired
from wtforms import (TextAreaField, TextField, BooleanField, HiddenField,
from wtforms import (TextAreaField, StringField, BooleanField, HiddenField,
ValidationError)
from wtforms.validators import InputRequired, Optional

Expand Down Expand Up @@ -38,16 +38,16 @@ def name_length_validation(form, field):


class NewUserForm(FlaskForm):
username = TextField('username', validators=[
username = StringField('username', validators=[
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(for other reviewers: TextField is deprecated)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was just trying to reduce the noise.

InputRequired(message=gettext('This field is required.')),
minimum_length_validation
])
first_name = TextField('first_name', validators=[name_length_validation, Optional()])
last_name = TextField('last_name', validators=[name_length_validation, Optional()])
first_name = StringField('first_name', validators=[name_length_validation, Optional()])
last_name = StringField('last_name', validators=[name_length_validation, Optional()])
password = HiddenField('password')
is_admin = BooleanField('is_admin')
is_hotp = BooleanField('is_hotp')
otp_secret = TextField('otp_secret', validators=[
otp_secret = StringField('otp_secret', validators=[
otp_secret_validation,
Optional()
])
Expand Down
4 changes: 2 additions & 2 deletions securedrop/journalist_app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def index():
# Long SQLAlchemy statements look best when formatted according to
# the Pocoo style guide, IMHO:
# http://www.pocoo.org/internal/styleguide/
sources = Source.query.filter_by(pending=False) \
sources = Source.query.filter_by(pending=False, deleted_at=None) \
.filter(Source.last_updated.isnot(None)) \
.order_by(Source.last_updated.desc()) \
.all()
Expand Down Expand Up @@ -171,7 +171,7 @@ def bulk():
@view.route('/download_unread/<filesystem_id>')
def download_unread_filesystem_id(filesystem_id):
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions = Submission.query.filter(
Submission.source_id == id,
Submission.downloaded == false()).all()
Expand Down
53 changes: 41 additions & 12 deletions securedrop/journalist_app/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
import binascii
import datetime
import os

from datetime import datetime
from flask import (g, flash, current_app, abort, send_file, redirect, url_for,
render_template, Markup, sessions, request)
from flask_babel import gettext, ngettext
Expand Down Expand Up @@ -53,11 +54,17 @@ def commit_account_changes(user):
flash(gettext("Account updated."), "success")


def get_source(filesystem_id):
"""Return a Source object, representing the database row, for the source
with the `filesystem_id`"""
def get_source(filesystem_id, include_deleted=False):
"""
Return the Source object with `filesystem_id`

If `include_deleted` is False, only sources with a null `deleted_at` will
be returned.
"""
source = None
query = Source.query.filter(Source.filesystem_id == filesystem_id)
if not include_deleted:
query = query.filter_by(deleted_at=None)
source = get_one_or_else(query, current_app.logger, abort)

return source
Expand Down Expand Up @@ -157,7 +164,7 @@ def download(zip_basename, submissions):
zf = current_app.storage.get_bulk_archive(submissions,
zip_directory=zip_basename)
attachment_filename = "{}--{}.zip".format(
zip_basename, datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S"))
zip_basename, datetime.datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S"))

# Mark the submissions that have been downloaded as such
for submission in submissions:
Expand Down Expand Up @@ -233,8 +240,11 @@ def col_delete(cols_selected):
if len(cols_selected) < 1:
flash(gettext("No collections selected for deletion."), "error")
else:
for filesystem_id in cols_selected:
delete_collection(filesystem_id)
now = datetime.datetime.utcnow()
sources = Source.query.filter(Source.filesystem_id.in_(cols_selected))
sources.update({Source.deleted_at: now}, synchronize_session="fetch")
db.session.commit()

num = len(cols_selected)
flash(ngettext('{num} collection deleted', '{num} collections deleted',
num).format(num=num),
Expand All @@ -259,17 +269,36 @@ def make_password(config):
def delete_collection(filesystem_id):
# Delete the source's collection of submissions
path = current_app.storage.path(filesystem_id)
current_app.storage.move_to_shredder(path)
if os.path.exists(path):
current_app.storage.move_to_shredder(path)

# Delete the source's reply keypair
current_app.crypto_util.delete_reply_keypair(filesystem_id)
try:
current_app.crypto_util.delete_reply_keypair(filesystem_id)
except ValueError as e:
current_app.logger.error("could not delete reply keypair: %s", e)
raise

# Delete their entry in the db
source = get_source(filesystem_id)
source = get_source(filesystem_id, include_deleted=True)
db.session.delete(source)
db.session.commit()


def purge_deleted_sources():
"""
Deletes all Sources with a non-null `deleted_at` attribute.
"""
sources = Source.query.filter(Source.deleted_at.isnot(None)).order_by(Source.deleted_at).all()
if sources:
current_app.logger.info("Purging deleted sources (%s)", len(sources))
for source in sources:
try:
delete_collection(source.filesystem_id)
except Exception as e:
current_app.logger.error("Error deleting source %s: %s", source.uuid, e)


def set_name(user, first_name, last_name):
try:
user.set_name(first_name, last_name)
Expand Down Expand Up @@ -312,7 +341,7 @@ def col_download_unread(cols_selected):
submissions = []
for filesystem_id in cols_selected:
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions += Submission.query.filter(
Submission.downloaded == false(),
Submission.source_id == id).all()
Expand All @@ -328,7 +357,7 @@ def col_download_all(cols_selected):
submissions = []
for filesystem_id in cols_selected:
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions += Submission.query.filter(
Submission.source_id == id).all()
return download("all", submissions)
Expand Down
3 changes: 3 additions & 0 deletions securedrop/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class Source(db.Model):
# keep track of how many interactions have happened, for filenames
interaction_count = Column(Integer, default=0, nullable=False)

# when deletion of the source was requested
deleted_at = Column(DateTime)

# Don't create or bother checking excessively long codenames to prevent DoS
NUM_WORDS = 7
MAX_CODENAME_LEN = 128
Expand Down
Loading