Skip to content

Commit

Permalink
Make deletion of multiple sources asynchronous
Browse files Browse the repository at this point in the history
Add Source.deleted_at timestamp. Start generally filtering sources
that have it set. When deleting multiple sources with
journalist_app.utils.col_delete, just mark them deleted with this and
defer the removal of their submissions, keypair and database record to
asynchronous processing. Add a new systemd service to periodically
purge sources marked for deletion.

Assign explicit names to source GPG keys. Instead of using the
python-gnupg default of "Autogenerated Key", label source keys with
"Source Key". When asked to delete a reply keypair, verify that its
UID contains one of those labels.
  • Loading branch information
rmol committed May 22, 2020
1 parent d0d9afb commit 4fb965f
Show file tree
Hide file tree
Showing 23 changed files with 379 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ systemd_services:
- securedrop_rqrequeue.service
- securedrop_rqworker.service
- securedrop_shredder.service
- securedrop_source_deleter.service

# SecureDrop rq worker log directory
securedrop_worker_log_dir: /var/log/securedrop_worker
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[Unit]
Description=SecureDrop Source deleter

[Service]
Environment=PYTHONPATH="{{ securedrop_code }}:{{ securedrop_venv_site_packages }}"
ExecStart={{ securedrop_venv_bin }}/python {{ securedrop_code }}/scripts/source_deleter --interval 10
PrivateDevices=yes
PrivateTmp=yes
ProtectSystem=full
ReadOnlyDirectories=/
ReadWriteDirectories={{ securedrop_data }}
Restart=always
RestartSec=10s
UMask=077
User=www-data
WorkingDirectory={{ securedrop_code }}

[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ etc/apparmor.d/usr.sbin.tor /etc/apparmor.d
lib/systemd/system/securedrop_rqrequeue.service /lib/systemd/system
lib/systemd/system/securedrop_rqworker.service /lib/systemd/system
lib/systemd/system/securedrop_shredder.service /lib/systemd/system
lib/systemd/system/securedrop_source_deleter.service /lib/systemd/system
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pytest


testinfra_hosts = ["app-staging"]


def test_securedrop_source_deleter_service(host):
"""
Verify configuration of securedrop_source_deleter systemd service.
"""
securedrop_test_vars = pytest.securedrop_test_vars
service_file = "/lib/systemd/system/securedrop_source_deleter.service"
expected_content = "\n".join([
"[Unit]",
"Description=SecureDrop Source deleter",
"",
"[Service]",
'Environment=PYTHONPATH="{}:{}"'.format(
securedrop_test_vars.securedrop_code, securedrop_test_vars.securedrop_venv_site_packages
),
"ExecStart={}/python /var/www/securedrop/scripts/source_deleter --interval 10".format(
securedrop_test_vars.securedrop_venv_bin
),
"PrivateDevices=yes",
"PrivateTmp=yes",
"ProtectSystem=full",
"ReadOnlyDirectories=/",
"ReadWriteDirectories={}".format(securedrop_test_vars.securedrop_data),
"Restart=always",
"RestartSec=10s",
"UMask=077",
"User={}".format(securedrop_test_vars.securedrop_user),
"WorkingDirectory={}".format(securedrop_test_vars.securedrop_code),
"",
"[Install]",
"WantedBy=multi-user.target\n",
])

f = host.file(service_file)
assert f.is_file
assert f.mode == 0o644
assert f.user == "root"
assert f.group == "root"
assert f.content_string == expected_content

s = host.service("securedrop_source_deleter")
assert s.is_enabled
assert s.is_running
32 changes: 32 additions & 0 deletions securedrop/alembic/versions/35513370ba0d_add_source_deleted_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""add Source.deleted_at
Revision ID: 35513370ba0d
Revises: 523fff3f969c
Create Date: 2020-05-06 22:28:01.214359
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '35513370ba0d'
down_revision = '523fff3f969c'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('sources', schema=None) as batch_op:
batch_op.add_column(sa.Column('deleted_at', sa.DateTime(), nullable=True))

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('sources', schema=None) as batch_op:
batch_op.drop_column('deleted_at')

# ### end Alembic commands ###
1 change: 1 addition & 0 deletions securedrop/bin/run
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ reset_demo
/opt/venvs/securedrop-app-code/bin/rqworker &
PYTHONPATH="${REPOROOT}/securedrop" /opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/rqrequeue" --interval 60 &
/opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/shredder" --interval 60 &
/opt/venvs/securedrop-app-code/bin/python "${REPOROOT}/securedrop/scripts/source_deleter" --interval 10 &

./manage.py run
38 changes: 35 additions & 3 deletions securedrop/crypto_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pretty_bad_protocol as gnupg
import os
import io
import re
import scrypt
from random import SystemRandom

Expand Down Expand Up @@ -76,6 +77,8 @@ class CryptoUtil:
REDIS_FINGERPRINT_HASH = "sd/crypto-util/fingerprints"
REDIS_KEY_HASH = "sd/crypto-util/keys"

SOURCE_KEY_UID_RE = re.compile(r"(Source|Autogenerated) Key <[-A-Za-z0-9+/=_]+>")

def __init__(self,
scrypt_params,
scrypt_id_pepper,
Expand Down Expand Up @@ -213,23 +216,52 @@ def genkeypair(self, name, secret):
key_length=self.__gpg_key_length,
passphrase=secret,
name_email=name,
name_real="Source Key",
creation_date=self.DEFAULT_KEY_CREATION_DATE.isoformat(),
expire_date=self.DEFAULT_KEY_EXPIRATION_DATE
))
return genkey_obj

def find_source_key(self, fingerprint: str) -> typing.Optional[typing.Dict]:
"""
Searches the GPG keyring for a source key.
A source key has the given fingerprint and is labeled either
"Source Key" or "Autogenerated Key".
Returns the key or None.
"""
keys = self.gpg.list_keys()
for key in keys:
if fingerprint != key["fingerprint"]:
continue

for uid in key["uids"]:
if self.SOURCE_KEY_UID_RE.match(uid):
return key
else:
return None
return None

def delete_reply_keypair(self, source_filesystem_id):
key = self.get_fingerprint(source_filesystem_id)
fingerprint = self.get_fingerprint(source_filesystem_id)

# If this source was never flagged for review, they won't have a reply
# keypair
if not key:
if not fingerprint:
return

# verify that the key with the given fingerprint belongs to a source
key = self.find_source_key(fingerprint)
if not key:
raise ValueError("source key not found")

# Always delete keys without invoking pinentry-mode = loopback
# see: https://lists.gnupg.org/pipermail/gnupg-users/2016-May/055965.html
temp_gpg = gnupg.GPG(binary='gpg2', homedir=self.gpg_key_dir)

# The subkeys keyword argument deletes both secret and public keys.
temp_gpg.delete_keys(key, secret=True, subkeys=True)
temp_gpg.delete_keys(fingerprint, secret=True, subkeys=True)
self.redis.hdel(self.REDIS_KEY_HASH, self.get_fingerprint(source_filesystem_id))
self.redis.hdel(self.REDIS_FINGERPRINT_HASH, source_filesystem_id)

Expand Down
2 changes: 1 addition & 1 deletion securedrop/journalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def prime_keycache():
Preloads CryptoUtil.keycache.
"""
with app.app_context():
for source in Source.query.filter_by(pending=False).all():
for source in Source.query.filter_by(pending=False, deleted_at=None).all():
app.crypto_util.get_pubkey(source.filesystem_id)


Expand Down
2 changes: 1 addition & 1 deletion securedrop/journalist_app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_token():
@api.route('/sources', methods=['GET'])
@token_required
def get_all_sources():
sources = Source.query.filter_by(pending=False).all()
sources = Source.query.filter_by(pending=False, deleted_at=None).all()
return jsonify(
{'sources': [source.to_json() for source in sources]}), 200

Expand Down
7 changes: 6 additions & 1 deletion securedrop/journalist_app/col.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@ def col(filesystem_id):
def delete_single(filesystem_id):
"""deleting a single collection from its /col page"""
source = get_source(filesystem_id)
delete_collection(filesystem_id)
try:
delete_collection(filesystem_id)
except ValueError as e:
current_app.logger.error("error deleting collection: %s", e)
abort(500)

flash(gettext("{source_name}'s collection deleted")
.format(source_name=source.journalist_designation),
"notification")
Expand Down
10 changes: 5 additions & 5 deletions securedrop/journalist_app/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from flask_babel import lazy_gettext as gettext
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileAllowed, FileRequired
from wtforms import (TextAreaField, TextField, BooleanField, HiddenField,
from wtforms import (TextAreaField, StringField, BooleanField, HiddenField,
ValidationError)
from wtforms.validators import InputRequired, Optional

Expand Down Expand Up @@ -38,16 +38,16 @@ def name_length_validation(form, field):


class NewUserForm(FlaskForm):
username = TextField('username', validators=[
username = StringField('username', validators=[
InputRequired(message=gettext('This field is required.')),
minimum_length_validation
])
first_name = TextField('first_name', validators=[name_length_validation, Optional()])
last_name = TextField('last_name', validators=[name_length_validation, Optional()])
first_name = StringField('first_name', validators=[name_length_validation, Optional()])
last_name = StringField('last_name', validators=[name_length_validation, Optional()])
password = HiddenField('password')
is_admin = BooleanField('is_admin')
is_hotp = BooleanField('is_hotp')
otp_secret = TextField('otp_secret', validators=[
otp_secret = StringField('otp_secret', validators=[
otp_secret_validation,
Optional()
])
Expand Down
4 changes: 2 additions & 2 deletions securedrop/journalist_app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def index():
# Long SQLAlchemy statements look best when formatted according to
# the Pocoo style guide, IMHO:
# http://www.pocoo.org/internal/styleguide/
sources = Source.query.filter_by(pending=False) \
sources = Source.query.filter_by(pending=False, deleted_at=None) \
.filter(Source.last_updated.isnot(None)) \
.order_by(Source.last_updated.desc()) \
.all()
Expand Down Expand Up @@ -171,7 +171,7 @@ def bulk():
@view.route('/download_unread/<filesystem_id>')
def download_unread_filesystem_id(filesystem_id):
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions = Submission.query.filter(
Submission.source_id == id,
Submission.downloaded == false()).all()
Expand Down
53 changes: 41 additions & 12 deletions securedrop/journalist_app/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf-8 -*-
import binascii
import datetime
import os

from datetime import datetime
from flask import (g, flash, current_app, abort, send_file, redirect, url_for,
render_template, Markup, sessions, request)
from flask_babel import gettext, ngettext
Expand Down Expand Up @@ -53,11 +54,17 @@ def commit_account_changes(user):
flash(gettext("Account updated."), "success")


def get_source(filesystem_id):
"""Return a Source object, representing the database row, for the source
with the `filesystem_id`"""
def get_source(filesystem_id, include_deleted=False):
"""
Return the Source object with `filesystem_id`
If `include_deleted` is False, only sources with a null `deleted_at` will
be returned.
"""
source = None
query = Source.query.filter(Source.filesystem_id == filesystem_id)
if not include_deleted:
query = query.filter_by(deleted_at=None)
source = get_one_or_else(query, current_app.logger, abort)

return source
Expand Down Expand Up @@ -157,7 +164,7 @@ def download(zip_basename, submissions):
zf = current_app.storage.get_bulk_archive(submissions,
zip_directory=zip_basename)
attachment_filename = "{}--{}.zip".format(
zip_basename, datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S"))
zip_basename, datetime.datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S"))

# Mark the submissions that have been downloaded as such
for submission in submissions:
Expand Down Expand Up @@ -233,8 +240,11 @@ def col_delete(cols_selected):
if len(cols_selected) < 1:
flash(gettext("No collections selected for deletion."), "error")
else:
for filesystem_id in cols_selected:
delete_collection(filesystem_id)
now = datetime.datetime.utcnow()
sources = Source.query.filter(Source.filesystem_id.in_(cols_selected))
sources.update({Source.deleted_at: now}, synchronize_session="fetch")
db.session.commit()

num = len(cols_selected)
flash(ngettext('{num} collection deleted', '{num} collections deleted',
num).format(num=num),
Expand All @@ -259,17 +269,36 @@ def make_password(config):
def delete_collection(filesystem_id):
# Delete the source's collection of submissions
path = current_app.storage.path(filesystem_id)
current_app.storage.move_to_shredder(path)
if os.path.exists(path):
current_app.storage.move_to_shredder(path)

# Delete the source's reply keypair
current_app.crypto_util.delete_reply_keypair(filesystem_id)
try:
current_app.crypto_util.delete_reply_keypair(filesystem_id)
except ValueError as e:
current_app.logger.error("could not delete reply keypair: %s", e)
raise

# Delete their entry in the db
source = get_source(filesystem_id)
source = get_source(filesystem_id, include_deleted=True)
db.session.delete(source)
db.session.commit()


def purge_deleted_sources():
"""
Deletes all Sources with a non-null `deleted_at` attribute.
"""
sources = Source.query.filter(Source.deleted_at.isnot(None)).order_by(Source.deleted_at).all()
if sources:
current_app.logger.info("Purging deleted sources (%s)", len(sources))
for source in sources:
try:
delete_collection(source.filesystem_id)
except Exception as e:
current_app.logger.error("Error deleting source %s: %s", source.uuid, e)


def set_name(user, first_name, last_name):
try:
user.set_name(first_name, last_name)
Expand Down Expand Up @@ -312,7 +341,7 @@ def col_download_unread(cols_selected):
submissions = []
for filesystem_id in cols_selected:
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions += Submission.query.filter(
Submission.downloaded == false(),
Submission.source_id == id).all()
Expand All @@ -328,7 +357,7 @@ def col_download_all(cols_selected):
submissions = []
for filesystem_id in cols_selected:
id = Source.query.filter(Source.filesystem_id == filesystem_id) \
.one().id
.filter_by(deleted_at=None).one().id
submissions += Submission.query.filter(
Submission.source_id == id).all()
return download("all", submissions)
Expand Down
3 changes: 3 additions & 0 deletions securedrop/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ class Source(db.Model):
# keep track of how many interactions have happened, for filenames
interaction_count = Column(Integer, default=0, nullable=False)

# when deletion of the source was requested
deleted_at = Column(DateTime)

# Don't create or bother checking excessively long codenames to prevent DoS
NUM_WORDS = 7
MAX_CODENAME_LEN = 128
Expand Down
Loading

0 comments on commit 4fb965f

Please sign in to comment.