From b5ee020f6e849bcadbf3ed497ff7144c3b952f2e Mon Sep 17 00:00:00 2001 From: John Hensley Date: Thu, 26 Mar 2020 14:37:41 -0400 Subject: [PATCH 1/3] Make qa_loader.py nicer for load and workstation testing This version: - generates legible names, messages, and replies - generates recent dates - generates source keys - permits specification of exact numbers: - of journalists and sources - number of submissions and replies per source - fraction of sources starred - fraction of sources replied to The resulting database produces sensible API results, so functions better for testing the SecureDrop Workstation. --- securedrop/qa_loader.py | 304 ++++++++++++++++++++++++---------------- 1 file changed, 184 insertions(+), 120 deletions(-) diff --git a/securedrop/qa_loader.py b/securedrop/qa_loader.py index dae78e6536..f966af7a53 100755 --- a/securedrop/qa_loader.py +++ b/securedrop/qa_loader.py @@ -1,37 +1,36 @@ #!/opt/venvs/securedrop-app-code/bin/python # -*- coding: utf-8 -*- -import math import os import random import string import sys - from argparse import ArgumentParser from datetime import datetime -from flask import current_app +from itertools import cycle from os import path -from sqlalchemy import text + +from flask import current_app from crypto_util import DICEWARE_SAFE_CHARS from db import db from journalist_app import create_app -from models import (Journalist, Source, Submission, SourceStar, Reply, - JournalistLoginAttempt) +from models import Journalist, JournalistLoginAttempt, Reply, Source, SourceStar, Submission from sdconfig import config as sdconfig -random.seed('~(=^–^)') # mrow? + +random.seed("~(=^–^)") # mrow? def random_bool(): return bool(random.getrandbits(1)) -def random_chars(len, nullable, chars=string.printable): +def random_chars(len, nullable, chars=string.ascii_letters): if nullable and random_bool(): return None else: - return ''.join([random.choice(chars) for _ in range(len)]) + return "".join([random.choice(chars) for _ in range(len)]) def bool_or_none(): @@ -42,10 +41,11 @@ def random_datetime(nullable): if nullable and random_bool(): return None else: + now = datetime.now() return datetime( - year=random.randint(1, 9999), - month=random.randint(1, 12), - day=random.randint(1, 28), + year=random.randint(2013, now.year), + month=random.randint(1, now.month), + day=random.randint(1, now.day), hour=random.randint(0, 23), minute=random.randint(0, 59), second=random.randint(0, 59), @@ -56,43 +56,83 @@ def random_datetime(nullable): def positive_int(s): i = int(s) if i < 1: - raise ValueError('{} is not >= 1'.format(s)) + raise ValueError("{} is not >= 1".format(s)) return i -class QaLoader(object): +def fraction(s): + f = float(s) + if 0 <= f <= 1: + return f + raise ValueError("{} should be a float between 0 and 1".format(s)) + + +submissions = cycle( + [ + "This is a test submission without markup!", + 'This is a test submission with markup and characters such as \, \\, \', " and ". ' + + "This text should not be bold!", # noqa: W605, E501 + ] +) + + +replies = cycle( + [ + "This is a test reply without markup!", + 'This is a test reply with markup and characters such as \, \\, \', " and ". ' + + "This text should not be bold!", # noqa: W605, E501 + ] +) - JOURNALIST_COUNT = 10 - SOURCE_COUNT = 50 - def __init__(self, config, multiplier): +class QaLoader(object): + def __init__( + self, + config, + journalist_count=10, + source_count=50, + submissions_per_source=1, + replies_per_source=1, + source_star_fraction=0.1, + source_reply_fraction=0.5, + ): + """ + source_star_fraction and source_reply_fraction are simply the + fraction of sources starred or replied to. + """ self.config = config self.app = create_app(config) - self.multiplier = multiplier + + self.journalist_count = journalist_count + self.source_count = source_count + self.submissions_per_source = submissions_per_source + self.replies_per_source = replies_per_source + self.source_star_fraction = source_star_fraction + self.source_reply_fraction = source_reply_fraction self.journalists = [] self.sources = [] - self.submissions = [] def new_journalist(self): # Make a diceware-like password - pw = ' '.join( - [random_chars(3, nullable=False, chars=DICEWARE_SAFE_CHARS) - for _ in range(7)]) - journalist = Journalist(username=random_chars(random.randint(3, 32), nullable=False), - password=pw, - is_admin=random_bool()) + pw = " ".join( + [random_chars(3, nullable=False, chars=DICEWARE_SAFE_CHARS) for _ in range(7)] + ) + journalist = Journalist( + username=random_chars(random.randint(3, 32), nullable=False), + password=pw, + is_admin=random_bool(), + ) if random_bool(): # to add legacy passwords back in journalist.passphrase_hash = None - journalist.pw_salt = random_chars(32, nullable=False).encode('utf-8') - journalist.pw_hash = random_chars(64, nullable=False).encode('utf-8') + journalist.pw_salt = random_chars(32, nullable=False).encode("utf-8") + journalist.pw_hash = random_chars(64, nullable=False).encode("utf-8") journalist.is_admin = bool_or_none() journalist.is_totp = bool_or_none() - journalist.hotp_counter = (random.randint(-1000, 1000) - if random_bool() else None) + journalist.hotp_counter = random.randint(-1000, 1000) if random_bool() else None journalist.created_on = random_datetime(nullable=True) journalist.last_access = random_datetime(nullable=True) @@ -101,57 +141,36 @@ def new_journalist(self): self.journalists.append(journalist.id) def new_source(self): - fid_len = random.randint(4, 32) - designation_len = random.randint(4, 32) - source = Source(random_chars(fid_len, nullable=False, - chars=string.ascii_lowercase), - random_chars(designation_len, nullable=False)) - source.flagged = bool_or_none() - source.last_updated = random_datetime(nullable=False) - source.pending = False - + codename = current_app.crypto_util.genrandomid() + filesystem_id = current_app.crypto_util.hash_codename(codename) + journalist_designation = current_app.crypto_util.display_id() + source = Source(filesystem_id, journalist_designation) db.session.add(source) db.session.flush() + + # Generate submissions directory and generate source key + os.mkdir(current_app.storage.path(source.filesystem_id)) + current_app.crypto_util.genkeypair(source.filesystem_id, codename) + self.sources.append(source.id) def new_submission(self, source_id): source = Source.query.get(source_id) - # A source may have a null fid according to the DB, but this will - # break storage.path. - if source.filesystem_id is None: - return - - filename = self.fake_file(source.filesystem_id) - submission = Submission(source, filename) - - # For issue #1189 - if random_bool(): - submission.source_id = None + source.interaction_count += 1 + fpath = current_app.storage.save_message_submission( + source.filesystem_id, + source.interaction_count, + source.journalist_filename, + next(submissions), + ) + submission = Submission(source, fpath) + db.session.add(submission) - submission.downloaded = bool_or_none() + source.pending = False + source.last_updated = datetime.utcnow() - db.session.add(submission) db.session.flush() - self.submissions.append(submission.id) - - def fake_file(self, source_fid): - source_dir = path.join(self.config.STORE_DIR, source_fid) - if not path.exists(source_dir): - os.mkdir(source_dir) - - filename = random_chars(20, - nullable=False, - chars=string.ascii_lowercase) - num = random.randint(0, 100) - msg_type = 'msg' if random_bool() else 'doc.gz' - filename = '{}-{}-{}.gpg'.format(num, filename, msg_type) - f_len = int(math.floor(random.expovariate(100000) * 1024 * 1024 * 500)) - sub_path = current_app.storage.path(source_fid, filename) - with open(sub_path, 'w') as f: - f.write('x' * f_len) - - return filename def new_source_star(self, source_id): source = Source.query.get(source_id) @@ -161,15 +180,21 @@ def new_source_star(self, source_id): def new_reply(self, journalist_id, source_id): source = Source.query.get(source_id) - # A source may have a null fid according to the DB, but this will - # break storage.path. - if source.filesystem_id is None: - return - journalist = Journalist.query.get(journalist_id) - filename = self.fake_file(source.filesystem_id) - reply = Reply(journalist, source, filename) + + source.interaction_count += 1 + source.last_updated = datetime.utcnow() + + fname = "{}-{}-reply.gpg".format(source.interaction_count, source.journalist_filename) + current_app.crypto_util.encrypt( + next(replies), + [current_app.crypto_util.getkey(source.filesystem_id), sdconfig.JOURNALIST_KEY], + current_app.storage.path(source.filesystem_id, fname), + ) + + reply = Reply(journalist, source, fname) db.session.add(reply) + db.session.flush() def new_journalist_login_attempt(self, journalist_id): journalist = Journalist.query.get(journalist_id) @@ -177,77 +202,116 @@ def new_journalist_login_attempt(self, journalist_id): attempt.timestamp = random_datetime(nullable=True) db.session.add(attempt) - def new_abandoned_submission(self, source_id): - '''For issue #1189''' - - source = Source.query.filter(Source.filesystem_id.isnot(None)).all()[0] - filename = self.fake_file(source.filesystem_id) - - # Use this as hack to create a real submission then swap out the - # source_id - submission = Submission(source, filename) - submission.source_id = source_id - db.session.add(submission) - db.session.commit() - self.delete_source(source_id) - - def delete_source(self, source_id): - '''For issue #1189''' - db.session.execute(text('DELETE FROM sources WHERE id = :source_id'), - {'source_id': source_id}) - def load(self): with self.app.app_context(): - for _ in range(self.JOURNALIST_COUNT * self.multiplier): + print("Creating {:d} journalists...".format(self.journalist_count)) + for i in range(1, self.journalist_count + 1): self.new_journalist() + if i % min(10, max(1, int(self.journalist_count / 10))) == 0: + sys.stdout.write("{}\r{}".format(" " * len(str(self.journalist_count + 1)), i)) + print("\n") db.session.commit() - for _ in range(self.SOURCE_COUNT * self.multiplier): + print("Creating {:d} sources...".format(self.source_count)) + for i in range(1, self.source_count + 1): self.new_source() + if i % min(10, max(1, int(self.source_count / 10))) == 0: + sys.stdout.write("{}\r{}".format(" " * len(str(self.source_count + 1)), i)) + print("\n") db.session.commit() - for sid in self.sources[0::5]: - for _ in range(1, self.multiplier + 1): + print( + "Creating submissions ({:d} each) for each source...".format( + self.submissions_per_source + ) + ) + for sid in self.sources: + for _ in range(1, self.submissions_per_source + 1): self.new_submission(sid) db.session.commit() - for sid in self.sources[0::5]: + print("Starring {:.2f}% of all sources...".format(self.source_star_fraction * 100)) + for sid in random.sample( + self.sources, int(self.source_count * self.source_star_fraction) + ): self.new_source_star(sid) db.session.commit() - for jid in self.journalists[0::10]: - for sid in self.sources[0::10]: - for _ in range(1, 3): - self.new_reply(jid, sid) + print( + "Creating replies ({:d} each) for {:.2f}% of sources...".format( + self.replies_per_source, self.source_reply_fraction * 100 + ) + ) + for sid in random.sample( + self.sources, int(self.source_count * self.source_reply_fraction) + ): + jid = random.choice(self.journalists) + for _ in range(self.replies_per_source): + self.new_reply(jid, sid) db.session.commit() - for jid in self.journalists[0::10]: + for jid in self.journalists: self.new_journalist_login_attempt(jid) db.session.commit() - for sid in random.sample(self.sources, self.multiplier): - self.new_abandoned_submission(sid) - def arg_parser(): parser = ArgumentParser( - path.basename(__file__), - description='Loads data into the database for testing upgrades') - parser.add_argument('-m', '--multiplier', type=positive_int, default=25, - help=('Factor to multiply the loaded data by ' - '(default 25)')) + path.basename(__file__), description="Loads data into the database for testing upgrades" + ) + parser.add_argument( + "--journalist-count", + type=positive_int, + default=10, + help=("Number of journalists to create"), + ) + parser.add_argument( + "--source-count", type=positive_int, default=50, help=("Number of sources to create") + ) + parser.add_argument( + "--submissions-per-source", + type=positive_int, + default=1, + help=("Number of submissions to create for each source"), + ) + parser.add_argument( + "--replies-per-source", + type=positive_int, + default=1, + help=("Number of replies to create for each source"), + ) + parser.add_argument( + "--source-star-fraction", + type=fraction, + default=0.1, + help=("Fraction of sources to star"), + ) + parser.add_argument( + "--source-reply-fraction", + type=fraction, + default=0.5, + help=("Fraction of sources to reply to"), + ) return parser def main(): args = arg_parser().parse_args() - print('Loading data. This make take a while.') - QaLoader(sdconfig, args.multiplier).load() - - -if __name__ == '__main__': + print("Loading data. This may take a while.") + QaLoader( + sdconfig, + args.journalist_count, + args.source_count, + args.submissions_per_source, + args.replies_per_source, + args.source_star_fraction, + args.source_reply_fraction, + ).load() + + +if __name__ == "__main__": try: main() except KeyboardInterrupt: - print('') # for prompt on a newline + print("") # for prompt on a newline sys.exit(1) From 8684a45f94a97e340e18f116494ce153db0c1739 Mon Sep 17 00:00:00 2001 From: Kevin O Gorman Date: Thu, 9 Apr 2020 16:03:11 -0400 Subject: [PATCH 2/3] fixed qa loader script test --- securedrop/tests/test_qa_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/securedrop/tests/test_qa_loader.py b/securedrop/tests/test_qa_loader.py index cb48bd35a8..66f2a50c61 100644 --- a/securedrop/tests/test_qa_loader.py +++ b/securedrop/tests/test_qa_loader.py @@ -5,4 +5,4 @@ def test_load_data(journalist_app, config): # Use the journalist_app fixture to init the DB - QaLoader(config, multiplier=1).load() + QaLoader(config).load() From e3ad82eef1c71a01efbacb3f8654fcf9b0c7cfb9 Mon Sep 17 00:00:00 2001 From: Kevin O Gorman Date: Thu, 9 Apr 2020 16:13:10 -0400 Subject: [PATCH 3/3] updated Tor Browser version --- securedrop/dockerfiles/xenial/python3/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/securedrop/dockerfiles/xenial/python3/Dockerfile b/securedrop/dockerfiles/xenial/python3/Dockerfile index d0b8fd676d..5033a51ad5 100644 --- a/securedrop/dockerfiles/xenial/python3/Dockerfile +++ b/securedrop/dockerfiles/xenial/python3/Dockerfile @@ -29,7 +29,7 @@ RUN curl -LO https://ftp.mozilla.org/pub/firefox/releases/${FF_ESR_VER}/linux-x8 COPY ./tor_project_public.pub /opt/ -ENV TBB_VERSION 9.0.7 +ENV TBB_VERSION 9.0.9 RUN gpg --import /opt/tor_project_public.pub && \ wget https://www.torproject.org/dist/torbrowser/${TBB_VERSION}/tor-browser-linux64-${TBB_VERSION}_en-US.tar.xz && \ wget https://www.torproject.org/dist/torbrowser/${TBB_VERSION}/tor-browser-linux64-${TBB_VERSION}_en-US.tar.xz.asc && \