From b5ee020f6e849bcadbf3ed497ff7144c3b952f2e Mon Sep 17 00:00:00 2001
From: John Hensley <john@freedom.press>
Date: Thu, 26 Mar 2020 14:37:41 -0400
Subject: [PATCH 1/3] Make qa_loader.py nicer for load and workstation testing

This version:
- generates legible names, messages, and replies
- generates recent dates
- generates source keys
- permits specification of exact numbers:
  - of journalists and sources
  - number of submissions and replies per source
  - fraction of sources starred
  - fraction of sources replied to

The resulting database produces sensible API results, so functions
better for testing the SecureDrop Workstation.
---
 securedrop/qa_loader.py | 304 ++++++++++++++++++++++++----------------
 1 file changed, 184 insertions(+), 120 deletions(-)
diff --git a/securedrop/qa_loader.py b/securedrop/qa_loader.py
index dae78e6536..f966af7a53 100755
--- a/securedrop/qa_loader.py
+++ b/securedrop/qa_loader.py
@@ -1,37 +1,36 @@
 #!/opt/venvs/securedrop-app-code/bin/python
 # -*- coding: utf-8 -*-
 
-import math
 import os
 import random
 import string
 import sys
-
 from argparse import ArgumentParser
 from datetime import datetime
-from flask import current_app
+from itertools import cycle
 from os import path
-from sqlalchemy import text
+
+from flask import current_app
 
 from crypto_util import DICEWARE_SAFE_CHARS
 from db import db
 from journalist_app import create_app
-from models import (Journalist, Source, Submission, SourceStar, Reply,
-                    JournalistLoginAttempt)
+from models import Journalist, JournalistLoginAttempt, Reply, Source, SourceStar, Submission
 from sdconfig import config as sdconfig
 
-random.seed('~(=^–^)')  # mrow?
+
+random.seed("~(=^–^)")  # mrow?
 
 
 def random_bool():
     return bool(random.getrandbits(1))
 
 
-def random_chars(len, nullable, chars=string.printable):
+def random_chars(len, nullable, chars=string.ascii_letters):
     if nullable and random_bool():
         return None
     else:
-        return ''.join([random.choice(chars) for _ in range(len)])
+        return "".join([random.choice(chars) for _ in range(len)])
 
 
 def bool_or_none():
@@ -42,10 +41,11 @@ def random_datetime(nullable):
     if nullable and random_bool():
         return None
     else:
+        now = datetime.now()
         return datetime(
-            year=random.randint(1, 9999),
-            month=random.randint(1, 12),
-            day=random.randint(1, 28),
+            year=random.randint(2013, now.year),
+            month=random.randint(1, now.month),
+            day=random.randint(1, now.day),
             hour=random.randint(0, 23),
             minute=random.randint(0, 59),
             second=random.randint(0, 59),
@@ -56,43 +56,83 @@ def random_datetime(nullable):
 def positive_int(s):
     i = int(s)
     if i < 1:
-        raise ValueError('{} is not >= 1'.format(s))
+        raise ValueError("{} is not >= 1".format(s))
     return i
 
 
-class QaLoader(object):
+def fraction(s):
+    f = float(s)
+    if 0 <= f <= 1:
+        return f
+    raise ValueError("{} should be a float between 0 and 1".format(s))
+
+
+submissions = cycle(
+    [
+        "This is a test submission without markup!",
+        'This is a test submission with markup and characters such as \, \\, \', " and ". '
+        + "<strong>This text should not be bold</strong>!",  # noqa: W605, E501
+    ]
+)
+
+
+replies = cycle(
+    [
+        "This is a test reply without markup!",
+        'This is a test reply with markup and characters such as \, \\, \', " and ". '
+        + "<strong>This text should not be bold</strong>!",  # noqa: W605, E501
+    ]
+)
 
-    JOURNALIST_COUNT = 10
-    SOURCE_COUNT = 50
 
-    def __init__(self, config, multiplier):
+class QaLoader(object):
+    def __init__(
+        self,
+        config,
+        journalist_count=10,
+        source_count=50,
+        submissions_per_source=1,
+        replies_per_source=1,
+        source_star_fraction=0.1,
+        source_reply_fraction=0.5,
+    ):
+        """
+        source_star_fraction and source_reply_fraction are simply the
+        fraction of sources starred or replied to.
+        """
         self.config = config
         self.app = create_app(config)
-        self.multiplier = multiplier
+
+        self.journalist_count = journalist_count
+        self.source_count = source_count
+        self.submissions_per_source = submissions_per_source
+        self.replies_per_source = replies_per_source
+        self.source_star_fraction = source_star_fraction
+        self.source_reply_fraction = source_reply_fraction
 
         self.journalists = []
         self.sources = []
-        self.submissions = []
 
     def new_journalist(self):
         # Make a diceware-like password
-        pw = ' '.join(
-            [random_chars(3, nullable=False, chars=DICEWARE_SAFE_CHARS)
-             for _ in range(7)])
-        journalist = Journalist(username=random_chars(random.randint(3, 32), nullable=False),
-                                password=pw,
-                                is_admin=random_bool())
+        pw = " ".join(
+            [random_chars(3, nullable=False, chars=DICEWARE_SAFE_CHARS) for _ in range(7)]
+        )
+        journalist = Journalist(
+            username=random_chars(random.randint(3, 32), nullable=False),
+            password=pw,
+            is_admin=random_bool(),
+        )
         if random_bool():
             # to add legacy passwords back in
             journalist.passphrase_hash = None
-            journalist.pw_salt = random_chars(32, nullable=False).encode('utf-8')
-            journalist.pw_hash = random_chars(64, nullable=False).encode('utf-8')
+            journalist.pw_salt = random_chars(32, nullable=False).encode("utf-8")
+            journalist.pw_hash = random_chars(64, nullable=False).encode("utf-8")
 
         journalist.is_admin = bool_or_none()
 
         journalist.is_totp = bool_or_none()
-        journalist.hotp_counter = (random.randint(-1000, 1000)
-                                   if random_bool() else None)
+        journalist.hotp_counter = random.randint(-1000, 1000) if random_bool() else None
         journalist.created_on = random_datetime(nullable=True)
         journalist.last_access = random_datetime(nullable=True)
 
@@ -101,57 +141,36 @@ def new_journalist(self):
         self.journalists.append(journalist.id)
 
     def new_source(self):
-        fid_len = random.randint(4, 32)
-        designation_len = random.randint(4, 32)
-        source = Source(random_chars(fid_len, nullable=False,
-                                     chars=string.ascii_lowercase),
-                        random_chars(designation_len, nullable=False))
-        source.flagged = bool_or_none()
-        source.last_updated = random_datetime(nullable=False)
-        source.pending = False
-
+        codename = current_app.crypto_util.genrandomid()
+        filesystem_id = current_app.crypto_util.hash_codename(codename)
+        journalist_designation = current_app.crypto_util.display_id()
+        source = Source(filesystem_id, journalist_designation)
         db.session.add(source)
         db.session.flush()
+
+        # Generate submissions directory and generate source key
+        os.mkdir(current_app.storage.path(source.filesystem_id))
+        current_app.crypto_util.genkeypair(source.filesystem_id, codename)
+
         self.sources.append(source.id)
 
     def new_submission(self, source_id):
         source = Source.query.get(source_id)
 
-        # A source may have a null fid according to the DB, but this will
-        # break storage.path.
-        if source.filesystem_id is None:
-            return
-
-        filename = self.fake_file(source.filesystem_id)
-        submission = Submission(source, filename)
-
-        # For issue #1189
-        if random_bool():
-            submission.source_id = None
+        source.interaction_count += 1
+        fpath = current_app.storage.save_message_submission(
+            source.filesystem_id,
+            source.interaction_count,
+            source.journalist_filename,
+            next(submissions),
+        )
+        submission = Submission(source, fpath)
+        db.session.add(submission)
 
-        submission.downloaded = bool_or_none()
+        source.pending = False
+        source.last_updated = datetime.utcnow()
 
-        db.session.add(submission)
         db.session.flush()
-        self.submissions.append(submission.id)
-
-    def fake_file(self, source_fid):
-        source_dir = path.join(self.config.STORE_DIR, source_fid)
-        if not path.exists(source_dir):
-            os.mkdir(source_dir)
-
-        filename = random_chars(20,
-                                nullable=False,
-                                chars=string.ascii_lowercase)
-        num = random.randint(0, 100)
-        msg_type = 'msg' if random_bool() else 'doc.gz'
-        filename = '{}-{}-{}.gpg'.format(num, filename, msg_type)
-        f_len = int(math.floor(random.expovariate(100000) * 1024 * 1024 * 500))
-        sub_path = current_app.storage.path(source_fid, filename)
-        with open(sub_path, 'w') as f:
-            f.write('x' * f_len)
-
-        return filename
 
     def new_source_star(self, source_id):
         source = Source.query.get(source_id)
@@ -161,15 +180,21 @@ def new_source_star(self, source_id):
     def new_reply(self, journalist_id, source_id):
         source = Source.query.get(source_id)
 
-        # A source may have a null fid according to the DB, but this will
-        # break storage.path.
-        if source.filesystem_id is None:
-            return
-
         journalist = Journalist.query.get(journalist_id)
-        filename = self.fake_file(source.filesystem_id)
-        reply = Reply(journalist, source, filename)
+
+        source.interaction_count += 1
+        source.last_updated = datetime.utcnow()
+
+        fname = "{}-{}-reply.gpg".format(source.interaction_count, source.journalist_filename)
+        current_app.crypto_util.encrypt(
+            next(replies),
+            [current_app.crypto_util.getkey(source.filesystem_id), sdconfig.JOURNALIST_KEY],
+            current_app.storage.path(source.filesystem_id, fname),
+        )
+
+        reply = Reply(journalist, source, fname)
         db.session.add(reply)
+        db.session.flush()
 
     def new_journalist_login_attempt(self, journalist_id):
         journalist = Journalist.query.get(journalist_id)
@@ -177,77 +202,116 @@ def new_journalist_login_attempt(self, journalist_id):
         attempt.timestamp = random_datetime(nullable=True)
         db.session.add(attempt)
 
-    def new_abandoned_submission(self, source_id):
-        '''For issue #1189'''
-
-        source = Source.query.filter(Source.filesystem_id.isnot(None)).all()[0]
-        filename = self.fake_file(source.filesystem_id)
-
-        # Use this as hack to create a real submission then swap out the
-        # source_id
-        submission = Submission(source, filename)
-        submission.source_id = source_id
-        db.session.add(submission)
-        db.session.commit()
-        self.delete_source(source_id)
-
-    def delete_source(self, source_id):
-        '''For issue #1189'''
-        db.session.execute(text('DELETE FROM sources WHERE id = :source_id'),
-                           {'source_id':  source_id})
-
     def load(self):
         with self.app.app_context():
-            for _ in range(self.JOURNALIST_COUNT * self.multiplier):
+            print("Creating {:d} journalists...".format(self.journalist_count))
+            for i in range(1, self.journalist_count + 1):
                 self.new_journalist()
+                if i % min(10, max(1, int(self.journalist_count / 10))) == 0:
+                    sys.stdout.write("{}\r{}".format(" " * len(str(self.journalist_count + 1)), i))
+            print("\n")
             db.session.commit()
 
-            for _ in range(self.SOURCE_COUNT * self.multiplier):
+            print("Creating {:d} sources...".format(self.source_count))
+            for i in range(1, self.source_count + 1):
                 self.new_source()
+                if i % min(10, max(1, int(self.source_count / 10))) == 0:
+                    sys.stdout.write("{}\r{}".format(" " * len(str(self.source_count + 1)), i))
+            print("\n")
             db.session.commit()
 
-            for sid in self.sources[0::5]:
-                for _ in range(1, self.multiplier + 1):
+            print(
+                "Creating submissions ({:d} each) for each source...".format(
+                    self.submissions_per_source
+                )
+            )
+            for sid in self.sources:
+                for _ in range(1, self.submissions_per_source + 1):
                     self.new_submission(sid)
             db.session.commit()
 
-            for sid in self.sources[0::5]:
+            print("Starring {:.2f}% of all sources...".format(self.source_star_fraction * 100))
+            for sid in random.sample(
+                self.sources, int(self.source_count * self.source_star_fraction)
+            ):
                 self.new_source_star(sid)
             db.session.commit()
 
-            for jid in self.journalists[0::10]:
-                for sid in self.sources[0::10]:
-                    for _ in range(1, 3):
-                        self.new_reply(jid, sid)
+            print(
+                "Creating replies ({:d} each) for {:.2f}% of sources...".format(
+                    self.replies_per_source, self.source_reply_fraction * 100
+                )
+            )
+            for sid in random.sample(
+                self.sources, int(self.source_count * self.source_reply_fraction)
+            ):
+                jid = random.choice(self.journalists)
+                for _ in range(self.replies_per_source):
+                    self.new_reply(jid, sid)
             db.session.commit()
 
-            for jid in self.journalists[0::10]:
+            for jid in self.journalists:
                 self.new_journalist_login_attempt(jid)
             db.session.commit()
 
-            for sid in random.sample(self.sources, self.multiplier):
-                self.new_abandoned_submission(sid)
-
 
 def arg_parser():
     parser = ArgumentParser(
-        path.basename(__file__),
-        description='Loads data into the database for testing upgrades')
-    parser.add_argument('-m', '--multiplier', type=positive_int, default=25,
-                        help=('Factor to multiply the loaded data by '
-                              '(default 25)'))
+        path.basename(__file__), description="Loads data into the database for testing upgrades"
+    )
+    parser.add_argument(
+        "--journalist-count",
+        type=positive_int,
+        default=10,
+        help=("Number of journalists to create"),
+    )
+    parser.add_argument(
+        "--source-count", type=positive_int, default=50, help=("Number of sources to create")
+    )
+    parser.add_argument(
+        "--submissions-per-source",
+        type=positive_int,
+        default=1,
+        help=("Number of submissions to create for each source"),
+    )
+    parser.add_argument(
+        "--replies-per-source",
+        type=positive_int,
+        default=1,
+        help=("Number of replies to create for each source"),
+    )
+    parser.add_argument(
+        "--source-star-fraction",
+        type=fraction,
+        default=0.1,
+        help=("Fraction of sources to star"),
+    )
+    parser.add_argument(
+        "--source-reply-fraction",
+        type=fraction,
+        default=0.5,
+        help=("Fraction of sources to reply to"),
+    )
     return parser
 
 
 def main():
     args = arg_parser().parse_args()
-    print('Loading data. This make take a while.')
-    QaLoader(sdconfig, args.multiplier).load()
-
-
-if __name__ == '__main__':
+    print("Loading data. This may take a while.")
+    QaLoader(
+        sdconfig,
+        args.journalist_count,
+        args.source_count,
+        args.submissions_per_source,
+        args.replies_per_source,
+        args.source_star_fraction,
+        args.source_reply_fraction,
+    ).load()
+
+
+if __name__ == "__main__":
     try:
         main()
     except KeyboardInterrupt:
-        print('')  # for prompt on a newline
+        print("")  # for prompt on a newline
         sys.exit(1)

From 8684a45f94a97e340e18f116494ce153db0c1739 Mon Sep 17 00:00:00 2001
From: Kevin O Gorman <kevin.ogorman@gmail.com>
Date: Thu, 9 Apr 2020 16:03:11 -0400
Subject: [PATCH 2/3] fixed qa loader script test

---
 securedrop/tests/test_qa_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/securedrop/tests/test_qa_loader.py b/securedrop/tests/test_qa_loader.py
index cb48bd35a8..66f2a50c61 100644
--- a/securedrop/tests/test_qa_loader.py
+++ b/securedrop/tests/test_qa_loader.py
@@ -5,4 +5,4 @@
 
 def test_load_data(journalist_app, config):
     # Use the journalist_app fixture to init the DB
-    QaLoader(config, multiplier=1).load()
+    QaLoader(config).load()

From e3ad82eef1c71a01efbacb3f8654fcf9b0c7cfb9 Mon Sep 17 00:00:00 2001
From: Kevin O Gorman <kevin.ogorman@gmail.com>
Date: Thu, 9 Apr 2020 16:13:10 -0400
Subject: [PATCH 3/3] updated Tor Browser version

---
 securedrop/dockerfiles/xenial/python3/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/securedrop/dockerfiles/xenial/python3/Dockerfile b/securedrop/dockerfiles/xenial/python3/Dockerfile
index d0b8fd676d..5033a51ad5 100644
--- a/securedrop/dockerfiles/xenial/python3/Dockerfile
+++ b/securedrop/dockerfiles/xenial/python3/Dockerfile
@@ -29,7 +29,7 @@ RUN curl -LO https://ftp.mozilla.org/pub/firefox/releases/${FF_ESR_VER}/linux-x8
 
 COPY ./tor_project_public.pub /opt/
 
-ENV TBB_VERSION 9.0.7
+ENV TBB_VERSION 9.0.9
 RUN gpg --import /opt/tor_project_public.pub && \
     wget  https://www.torproject.org/dist/torbrowser/${TBB_VERSION}/tor-browser-linux64-${TBB_VERSION}_en-US.tar.xz && \
     wget https://www.torproject.org/dist/torbrowser/${TBB_VERSION}/tor-browser-linux64-${TBB_VERSION}_en-US.tar.xz.asc && \