From d24f4dd7b492e76fa7119e03ba523abf80ffebd8 Mon Sep 17 00:00:00 2001 From: John Hensley Date: Wed, 23 Sep 2020 13:24:33 -0400 Subject: [PATCH] Add /seen endpoint, utils.mark_seen, submission type props Add new API endpoint for listing or marking source conversation items that have been seen by a journalist. Add utility method to mark a heterogenous collection of Submission and Reply objects seen. Add Submission.is_file and Submission.is_message to encapsulate the characterization based on filename. --- securedrop/create-dev-data.py | 5 +- securedrop/journalist_app/api.py | 84 +++++++++++- securedrop/journalist_app/col.py | 27 ++-- securedrop/journalist_app/utils.py | 73 +++++++---- securedrop/models.py | 67 ++++++---- securedrop/tests/conftest.py | 2 +- securedrop/tests/test_journalist.py | 6 + securedrop/tests/test_journalist_api.py | 164 ++++++++++++++++++++++++ securedrop/tests/utils/db_helper.py | 27 +++- 9 files changed, 387 insertions(+), 68 deletions(-) diff --git a/securedrop/create-dev-data.py b/securedrop/create-dev-data.py index 07e6e870e42..3f6ef3845f5 100755 --- a/securedrop/create-dev-data.py +++ b/securedrop/create-dev-data.py @@ -15,7 +15,7 @@ from sdconfig import config from db import db -from models import Journalist, Reply, Source, Submission +from models import Journalist, Reply, SeenReply, Source, Submission from specialstrings import strings @@ -138,6 +138,9 @@ def create_source_and_submissions( journalist = journalist_who_replied reply = Reply(journalist, source, fname) db.session.add(reply) + db.session.flush() + seen_reply = SeenReply(reply_id=reply.id, journalist_id=journalist.id) + db.session.add(seen_reply) db.session.commit() diff --git a/securedrop/journalist_app/api.py b/securedrop/journalist_app/api.py index f43b898e7af..c6463fcac05 100644 --- a/securedrop/journalist_app/api.py +++ b/securedrop/journalist_app/api.py @@ -1,7 +1,8 @@ +import collections.abc import json from datetime import datetime, timedelta -from typing import Tuple, Callable, Any +from typing import Tuple, Callable, Any, Set, Union import flask import werkzeug @@ -15,7 +16,8 @@ from db import db from journalist_app import utils -from models import (Journalist, Reply, Source, Submission, +from models import (Journalist, Reply, SeenFile, SeenMessage, + SeenReply, Source, Submission, LoginThrottledException, InvalidUsernameException, BadTokenException, WrongPasswordException) from sdconfig import SDConfig @@ -68,6 +70,7 @@ def get_endpoints() -> Tuple[flask.Response, int]: 'current_user_url': '/api/v1/user', 'submissions_url': '/api/v1/submissions', 'replies_url': '/api/v1/replies', + 'seen_url': '/api/v1/seen', 'auth_token_url': '/api/v1/token'} return jsonify(endpoints), 200 @@ -267,6 +270,9 @@ def all_source_replies(source_uuid: str) -> Tuple[flask.Response, int]: try: db.session.add(reply) + db.session.flush() + seen_reply = SeenReply(reply_id=reply.id, journalist_id=user.id) + db.session.add(seen_reply) db.session.add(source) db.session.commit() except IntegrityError as e: @@ -310,6 +316,80 @@ def get_all_replies() -> Tuple[flask.Response, int]: return jsonify( {'replies': [reply.to_json() for reply in replies if reply.source]}), 200 + @api.route("/seen", methods=["GET", "POST"]) + @token_required + def seen() -> Tuple[flask.Response, int]: + """ + Lists or marks the source conversation items that the journalist has seen. + """ + user = _authenticate_user_from_auth_header(request) + + if request.method == "GET": + seen_files = [ + { + "file_uuid": f.file.uuid, + "journalist_uuid": f.journalist.uuid if f.journalist_id else None + } + for f in SeenFile.query.all() + ] + seen_messages = [ + { + "message_uuid": f.message.uuid, + "journalist_uuid": f.journalist.uuid if f.journalist_id else None + } + for f in SeenMessage.query.all() + ] + seen_replies = [ + { + "reply_uuid": f.reply.uuid, + "journalist_uuid": f.journalist.uuid if f.journalist_id else None + } + for f in SeenReply.query.all() + ] + + return jsonify( + { + "files": seen_files, + "messages": seen_messages, + "replies": seen_replies, + } + ), 200 + + if request.method == "POST": + if request.json is None or not isinstance(request.json, collections.abc.Mapping): + abort(400, "Please send requests in valid JSON.") + + if not any(map(request.json.get, ["files", "messages", "replies"])): + abort(400, "Please specify the resources to mark seen.") + + # gather everything to be marked seen. if any don't exist, + # reject the request. + targets = set() # type: Set[Union[Submission, Reply]] + for file_uuid in request.json.get("files", []): + f = Submission.query.filter(Submission.uuid == file_uuid).one_or_none() + if f is None or not f.is_file: + abort(404, "file not found: {}".format(file_uuid)) + targets.add(f) + + for message_uuid in request.json.get("messages", []): + m = Submission.query.filter(Submission.uuid == message_uuid).one_or_none() + if m is None or not m.is_message: + abort(404, "message not found: {}".format(message_uuid)) + targets.add(m) + + for reply_uuid in request.json.get("replies", []): + r = Reply.query.filter(Reply.uuid == reply_uuid).one_or_none() + if r is None: + abort(404, "reply not found: {}".format(reply_uuid)) + targets.add(r) + + # now mark everything seen. + utils.mark_seen(list(targets), user) + + return jsonify({"message": "resources marked seen"}), 200 + + abort(405) + @api.route('/user', methods=['GET']) @token_required def get_current_user() -> Tuple[flask.Response, int]: diff --git a/securedrop/journalist_app/col.py b/securedrop/journalist_app/col.py index 108dddfb178..0c45619a1dc 100644 --- a/securedrop/journalist_app/col.py +++ b/securedrop/journalist_app/col.py @@ -1,7 +1,17 @@ # -*- coding: utf-8 -*- -from flask import (g, Blueprint, redirect, url_for, render_template, flash, - request, abort, send_file, current_app) +from flask import ( + Blueprint, + abort, + current_app, + flash, + g, + redirect, + render_template, + request, + send_file, + url_for, +) from flask_babel import gettext from sqlalchemy.exc import IntegrityError from sqlalchemy.orm.exc import NoResultFound @@ -82,28 +92,25 @@ def download_single_file(filesystem_id, fn): if '..' in fn or fn.startswith('/'): abort(404) - # mark as seen by the current user and update downloaded for submissions - journalist_id = g.get('user').id + # mark as seen by the current user try: - if fn.endswith('reply.gpg'): + journalist_id = g.get("user").id + if fn.endswith("reply.gpg"): reply = Reply.query.filter(Reply.filename == fn).one() seen_reply = SeenReply(reply_id=reply.id, journalist_id=journalist_id) db.session.add(seen_reply) - elif fn.endswith('-doc.gz.gpg') or fn.endswith("doc.zip.gpg"): + elif fn.endswith("-doc.gz.gpg") or fn.endswith("doc.zip.gpg"): file = Submission.query.filter(Submission.filename == fn).one() seen_file = SeenFile(file_id=file.id, journalist_id=journalist_id) db.session.add(seen_file) - Submission.query.filter(Submission.filename == fn).one().downloaded = True else: message = Submission.query.filter(Submission.filename == fn).one() seen_message = SeenMessage(message_id=message.id, journalist_id=journalist_id) db.session.add(seen_message) - Submission.query.filter(Submission.filename == fn).one().downloaded = True db.session.commit() except NoResultFound as e: - current_app.logger.error( - "Could not mark " + fn + " as downloaded: %s" % (e,)) + current_app.logger.error("Could not mark {} as seen: {}".format(fn, e)) except IntegrityError: pass # expected not to store that a file was seen by the same user multiple times diff --git a/securedrop/journalist_app/utils.py b/securedrop/journalist_app/utils.py index 638dbc5e61e..32af2e512b2 100644 --- a/securedrop/journalist_app/utils.py +++ b/securedrop/journalist_app/utils.py @@ -10,15 +10,29 @@ render_template, Markup, sessions, request) from flask_babel import gettext, ngettext from sqlalchemy.exc import IntegrityError -from sqlalchemy.sql.expression import false import i18n from db import db -from models import (get_one_or_else, Source, Journalist, InvalidUsernameException, - WrongPasswordException, FirstOrLastNameError, LoginThrottledException, - BadTokenException, SourceStar, PasswordError, SeenFile, SeenMessage, SeenReply, - Submission, RevokedToken, InvalidPasswordLength, Reply) +from models import ( + BadTokenException, + FirstOrLastNameError, + InvalidPasswordLength, + InvalidUsernameException, + Journalist, + LoginThrottledException, + PasswordError, + Reply, + RevokedToken, + SeenFile, + SeenMessage, + SeenReply, + Source, + SourceStar, + Submission, + WrongPasswordException, + get_one_or_else, +) from store import add_checksum_for_file from sdconfig import SDConfig @@ -150,6 +164,32 @@ def validate_hotp_secret(user: Journalist, otp_secret: str) -> bool: return True +def mark_seen(targets: List[Union[Submission, Reply]], user: Journalist) -> None: + """ + Marks a list of submissions or replies seen by the given journalist. + """ + for t in targets: + try: + if isinstance(t, Submission): + t.downloaded = True + if t.is_file: + sf = SeenFile(file_id=t.id, journalist_id=user.id) + db.session.add(sf) + elif t.is_message: + sm = SeenMessage(message_id=t.id, journalist_id=user.id) + db.session.add(sm) + db.session.commit() + elif isinstance(t, Reply): + sr = SeenReply(reply_id=t.id, journalist_id=user.id) + db.session.add(sr) + db.session.commit() + except IntegrityError as e: + db.session.rollback() + if 'UNIQUE constraint failed' in str(e): + continue + raise + + def download(zip_basename: str, submissions: List[Union[Submission, Reply]]) -> werkzeug.Response: """Send client contents of ZIP-file *zip_basename*-.zip containing *submissions*. The ZIP-file, being a @@ -163,27 +203,10 @@ def download(zip_basename: str, submissions: List[Union[Submission, Reply]]) -> """ zf = current_app.storage.get_bulk_archive(submissions, zip_directory=zip_basename) attachment_filename = "{}--{}.zip".format( - zip_basename, datetime.datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S")) - - # mark as seen by the current user and update downloaded for submissions - journalist_id = g.get('user').id - for item in submissions: - try: - if item.filename.endswith('reply.gpg'): - seen_reply = SeenReply(reply_id=item.id, journalist_id=journalist_id) - db.session.add(seen_reply) - elif item.filename.endswith('-doc.gz.gpg'): - seen_file = SeenFile(file_id=item.id, journalist_id=journalist_id) - db.session.add(seen_file) - item.downloaded = True - else: - seen_message = SeenMessage(message_id=item.id, journalist_id=journalist_id) - db.session.add(seen_message) - item.downloaded = True + zip_basename, datetime.datetime.utcnow().strftime("%Y-%m-%d--%H-%M-%S") + ) - db.session.commit() - except IntegrityError: - pass # expected not to store that a file was seen by the same user multiple times + mark_seen(submissions, g.user) return send_file( zf.name, diff --git a/securedrop/models.py b/securedrop/models.py index 58b2445a694..709ec27e8df 100644 --- a/securedrop/models.py +++ b/securedrop/models.py @@ -100,11 +100,10 @@ def journalist_filename(self) -> str: def documents_messages_count(self) -> 'Dict[str, int]': self.docs_msgs_count = {'messages': 0, 'documents': 0} for submission in self.submissions: - if submission.filename.endswith('msg.gpg'): - self.docs_msgs_count['messages'] += 1 - elif (submission.filename.endswith('doc.gz.gpg') or - submission.filename.endswith('doc.zip.gpg')): - self.docs_msgs_count['documents'] += 1 + if submission.is_message: + self.docs_msgs_count["messages"] += 1 + elif submission.is_file: + self.docs_msgs_count["documents"] += 1 return self.docs_msgs_count @property @@ -212,7 +211,23 @@ def __init__(self, source: Source, filename: str) -> None: def __repr__(self) -> str: return '' % (self.filename) - def to_json(self) -> 'Dict[str, Union[str, int, bool]]': + @property + def is_file(self) -> bool: + return self.filename.endswith("doc.gz.gpg") or self.filename.endswith("doc.zip.gpg") + + @property + def is_message(self) -> bool: + return self.filename.endswith("msg.gpg") + + def to_json(self) -> "Dict[str, Union[str, int, bool]]": + seen_by = { + f.journalist.uuid for f in SeenFile.query.filter(SeenFile.file_id == self.id) + if f.journalist + } + seen_by.update({ + m.journalist.uuid for m in SeenMessage.query.filter(SeenMessage.message_id == self.id) + if m.journalist + }) json_submission = { 'source_url': url_for('api.single_source', source_uuid=self.source.uuid) if self.source else None, @@ -221,11 +236,14 @@ def to_json(self) -> 'Dict[str, Union[str, int, bool]]': submission_uuid=self.uuid) if self.source else None, 'filename': self.filename, 'size': self.size, - 'is_read': self.downloaded, + "is_file": self.is_file, + "is_message": self.is_message, + "is_read": self.seen, 'uuid': self.uuid, 'download_url': url_for('api.download_submission', source_uuid=self.source.uuid, submission_uuid=self.uuid) if self.source else None, + 'seen_by': list(seen_by) } return json_submission @@ -284,17 +302,21 @@ def __init__(self, def __repr__(self) -> str: return '' % (self.filename) - def to_json(self) -> 'Dict[str, Union[str, int, bool]]': - username = "deleted" - first_name = "" - last_name = "" - uuid = "deleted" + def to_json(self) -> "Dict[str, Union[str, int, bool]]": + journalist_username = "deleted" + journalist_first_name = "" + journalist_last_name = "" + journalist_uuid = "deleted" if self.journalist: - username = self.journalist.username - first_name = self.journalist.first_name - last_name = self.journalist.last_name - uuid = self.journalist.uuid - json_submission = { + journalist_username = self.journalist.username + journalist_first_name = self.journalist.first_name + journalist_last_name = self.journalist.last_name + journalist_uuid = self.journalist.uuid + seen_by = [ + r.journalist.uuid for r in SeenReply.query.filter(SeenReply.reply_id == self.id) + if r.journalist + ] + json_reply = { 'source_url': url_for('api.single_source', source_uuid=self.source.uuid) if self.source else None, 'reply_url': url_for('api.single_reply', @@ -302,14 +324,15 @@ def to_json(self) -> 'Dict[str, Union[str, int, bool]]': reply_uuid=self.uuid) if self.source else None, 'filename': self.filename, 'size': self.size, - 'journalist_username': username, - 'journalist_first_name': first_name, - 'journalist_last_name': last_name, - 'journalist_uuid': uuid, + 'journalist_username': journalist_username, + 'journalist_first_name': journalist_first_name, + 'journalist_last_name': journalist_last_name, + 'journalist_uuid': journalist_uuid, 'uuid': self.uuid, 'is_deleted_by_source': self.deleted_by_source, + 'seen_by': seen_by } - return json_submission + return json_reply class SourceStar(db.Model): diff --git a/securedrop/tests/conftest.py b/securedrop/tests/conftest.py index 043b77bef2b..625589e86a6 100644 --- a/securedrop/tests/conftest.py +++ b/securedrop/tests/conftest.py @@ -208,7 +208,7 @@ def test_submissions(journalist_app): def test_files(journalist_app, test_journo): with journalist_app.app_context(): source, codename = utils.db_helper.init_source() - utils.db_helper.submit(source, 2) + utils.db_helper.submit(source, 2, submission_type="file") utils.db_helper.reply(test_journo['journalist'], source, 1) return {'source': source, 'codename': codename, diff --git a/securedrop/tests/test_journalist.py b/securedrop/tests/test_journalist.py index 3dfef28c5c4..a677cae8e01 100644 --- a/securedrop/tests/test_journalist.py +++ b/securedrop/tests/test_journalist.py @@ -2293,6 +2293,12 @@ def test_download_all_selected_sources(journalist_app, test_journo): source = i["source"] selected.append(source.filesystem_id) + # Download all submissions from all sources selected + resp = app.post( + url_for("col.process"), + data=dict(action="download-all", cols_selected=selected), + ) + # The download request was succesful, and the app returned a zipfile assert resp.status_code == 200 assert resp.content_type == "application/zip" diff --git a/securedrop/tests/test_journalist_api.py b/securedrop/tests/test_journalist_api.py index dc5c676cd3f..84e305721bf 100644 --- a/securedrop/tests/test_journalist_api.py +++ b/securedrop/tests/test_journalist_api.py @@ -1019,3 +1019,167 @@ def test_revoke_token(journalist_app, test_journo, journalist_api_token): resp = app.get(url_for('api.get_all_sources'), headers=get_api_headers(journalist_api_token)) assert resp.status_code == 403 + + +def test_seen(journalist_app, journalist_api_token, test_files, test_journo, test_submissions): + """ + Happy path for seen: marking things seen works. + """ + with journalist_app.test_client() as app: + replies_url = url_for('api.get_all_replies') + seen_url = url_for('api.seen') + submissions_url = url_for('api.get_all_submissions') + headers = get_api_headers(journalist_api_token) + + # check that /seen reports no submissions, but one reply + response = app.get(seen_url, headers=headers) + assert response.status_code == 200 + assert len(response.json["files"]) == 0 + assert len(response.json["messages"]) == 0 + assert len(response.json["replies"]) == 1 + + # check that /submissions contains no seen items + response = app.get(submissions_url, headers=headers) + assert response.status_code == 200 + assert not any([s["seen_by"] for s in response.json["submissions"]]) + + # check that /replies only contains seen items + response = app.get(replies_url, headers=headers) + assert response.status_code == 200 + assert all([r["seen_by"] for r in response.json["replies"]]) + + # now mark one of each type of conversation item seen + file_uuid = test_files['submissions'][0].uuid + msg_uuid = test_submissions['submissions'][0].uuid + reply_uuid = test_files['replies'][0].uuid + data = { + "files": [file_uuid], + "messages": [msg_uuid], + "replies": [reply_uuid], + } + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 200 + assert response.json["message"] == "resources marked seen" + + # and check that they are now reported seen + response = app.get(seen_url, headers=headers) + assert response.status_code == 200 + + expected_data = { + "files": [ + { + "file_uuid": file_uuid, + "journalist_uuid": test_journo["uuid"] + } + ], + "messages": [ + { + "message_uuid": msg_uuid, + "journalist_uuid": test_journo["uuid"] + } + ], + "replies": [ + { + "reply_uuid": reply_uuid, + "journalist_uuid": test_journo["uuid"] + } + ], + } + assert expected_data == response.json + + # check that /submissions now contains the seen items + response = app.get(submissions_url, headers=headers) + assert response.status_code == 200 + assert [ + s for s in response.json["submissions"] + if s["is_file"] and s["uuid"] == file_uuid + and test_journo["uuid"] in s["seen_by"] + ] + assert [ + s for s in response.json["submissions"] + if s["is_message"] and s["uuid"] == msg_uuid + and test_journo["uuid"] in s["seen_by"] + ] + + # check that /replies still only contains one seen reply + response = app.get(replies_url, headers=headers) + assert response.status_code == 200 + assert len(response.json["replies"]) == 1 + assert all([r["seen_by"] for r in response.json["replies"]]) + + # now mark the same things seen. this should be fine. + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 200 + assert response.json["message"] == "resources marked seen" + + # check that /submissions still only has the test journalist + # once in the seen_by list of the submissions we marked + response = app.get(submissions_url, headers=headers) + assert response.status_code == 200 + assert [ + s for s in response.json["submissions"] + if s["uuid"] in [file_uuid, msg_uuid] + and s["seen_by"] == [test_journo["uuid"]] + ] + + # check that /replies still only contains one seen reply + response = app.get(replies_url, headers=headers) + assert response.status_code == 200 + assert len(response.json["replies"]) == 1 + assert all([r["seen_by"] for r in response.json["replies"]]) + + +def test_seen_bad_requests(journalist_app, journalist_api_token): + """ + Check that /seen rejects invalid requests. + """ + with journalist_app.test_client() as app: + seen_url = url_for('api.seen') + headers = get_api_headers(journalist_api_token) + + response = app.get(seen_url, headers=headers) + assert response.status_code == 200 + + assert len(response.json["files"]) == 0 + assert len(response.json["messages"]) == 0 + assert len(response.json["replies"]) == 0 + + # invalid JSON + data = "not a mapping" + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 400 + assert response.json["message"] == "Please send requests in valid JSON." + + # valid JSON, but with invalid content + data = {"valid mapping": False} + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 400 + assert response.json["message"] == "Please specify the resources to mark seen." + + # unsupported HTTP method + response = app.head(seen_url, headers=headers) + assert response.status_code == 405 + + # nonexistent file + data = { + "files": ["not-a-file"], + } + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 404 + assert response.json["message"] == "file not found: not-a-file" + + # nonexistent message + data = { + "messages": ["not-a-message"], + } + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 404 + assert response.json["message"] == "message not found: not-a-message" + + # nonexistent reply + data = { + "replies": ["not-a-reply"], + } + response = app.post(seen_url, data=json.dumps(data), headers=headers) + assert response.status_code == 404 + assert response.json["message"] == "reply not found: not-a-reply" diff --git a/securedrop/tests/utils/db_helper.py b/securedrop/tests/utils/db_helper.py index c824de83eda..99e70f70503 100644 --- a/securedrop/tests/utils/db_helper.py +++ b/securedrop/tests/utils/db_helper.py @@ -4,6 +4,7 @@ """ import datetime import math +import io import os import random from typing import Dict, List @@ -81,6 +82,9 @@ def reply(journalist, source, num_replies): reply = Reply(journalist, source, fname) replies.append(reply) db.session.add(reply) + db.session.flush() + seen_reply = SeenReply(reply_id=reply.id, journalist_id=journalist.id) + db.session.add(seen_reply) db.session.commit() return replies @@ -148,7 +152,7 @@ def init_source(): return source, codename -def submit(source, num_submissions): +def submit(source, num_submissions, submission_type="message"): """Generates and submits *num_submissions* :class:`Submission`s on behalf of a :class:`Source` *source*. @@ -168,12 +172,21 @@ def submit(source, num_submissions): for _ in range(num_submissions): source.interaction_count += 1 source.pending = False - fpath = current_app.storage.save_message_submission( - source.filesystem_id, - source.interaction_count, - source.journalist_filename, - str(os.urandom(1)) - ) + if submission_type == "file": + fpath = current_app.storage.save_file_submission( + source.filesystem_id, + source.interaction_count, + source.journalist_filename, + "pipe.txt", + io.BytesIO(b"Ceci n'est pas une pipe.") + ) + else: + fpath = current_app.storage.save_message_submission( + source.filesystem_id, + source.interaction_count, + source.journalist_filename, + str(os.urandom(1)) + ) submission = Submission(source, fpath) submissions.append(submission) db.session.add(source)