Skip to content

Commit

Permalink
Improve download file handling
Browse files Browse the repository at this point in the history
Eliminate File.original_file column and hard links to downloaded
files. Now as files are downloaded and decrypted, File.filename will
be updated.

Instead of downloading directly into the root of the data directory,
create a directory therein for each downloadable object, named with
its UUID, and process downloaded files in that directory. This should
eliminate collisions if multiple sources upload documents with the
same names.
  • Loading branch information
rmol committed Jan 28, 2020
1 parent eb9ce9c commit 6813852
Show file tree
Hide file tree
Showing 12 changed files with 348 additions and 329 deletions.
71 changes: 71 additions & 0 deletions alembic/versions/fb657f2ee8a7_drop_file_original_filename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""drop File.original_filename
Revision ID: fb657f2ee8a7
Revises: 86b01b6290da
Create Date: 2020-01-23 18:55:09.857324
"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "fb657f2ee8a7"
down_revision = "86b01b6290da"
branch_labels = None
depends_on = None


def upgrade():
conn = op.get_bind()

op.rename_table("files", "original_files")

conn.execute(
"""
CREATE TABLE files (
id INTEGER NOT NULL,
uuid VARCHAR(36) NOT NULL,
filename VARCHAR(255) NOT NULL,
file_counter INTEGER NOT NULL,
size INTEGER NOT NULL,
download_url VARCHAR(255) NOT NULL,
is_downloaded BOOLEAN DEFAULT 0 NOT NULL,
is_read BOOLEAN DEFAULT 0 NOT NULL,
is_decrypted BOOLEAN,
source_id INTEGER NOT NULL,
CONSTRAINT pk_files PRIMARY KEY (id),
CONSTRAINT fk_files_source_id_sources FOREIGN KEY(source_id) REFERENCES sources (id),
CONSTRAINT uq_messages_source_id_file_counter UNIQUE (source_id, file_counter),
CONSTRAINT uq_files_uuid UNIQUE (uuid),
CONSTRAINT files_compare_is_downloaded_vs_is_decrypted CHECK (CASE WHEN is_downloaded = 0 THEN is_decrypted IS NULL ELSE 1 END),
CONSTRAINT ck_files_is_downloaded CHECK (is_downloaded IN (0, 1)),
CONSTRAINT ck_files_is_read CHECK (is_read IN (0, 1)),
CONSTRAINT ck_files_is_decrypted CHECK (is_decrypted IN (0, 1))
)
"""
)

conn.execute(
"""
INSERT INTO files
(id, uuid, filename, file_counter, size, download_url, is_downloaded,
is_decrypted, is_read, source_id)
SELECT id, uuid, filename, file_counter, size, download_url, is_downloaded,
is_decrypted, is_read, source_id
FROM original_files
"""
)

op.drop_table("original_files")


def downgrade():
op.add_column(
"files",
sa.Column(
"original_filename",
sa.VARCHAR(length=255),
server_default=sa.text("''"),
nullable=False,
),
)
61 changes: 43 additions & 18 deletions securedrop_client/api_jobs/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ def call_api(self, api_client: API, session: Session) -> Any:
self._decrypt(os.path.join(self.data_dir, db_object.filename), db_object, session)
return db_object.uuid

self._download(api_client, db_object, session)
self._decrypt(os.path.join(self.data_dir, db_object.filename), db_object, session)
destination = self._download(api_client, db_object, session)
self._decrypt(destination, db_object, session)
return db_object.uuid

def _download(self,
api: API,
db_object: Union[File, Message, Reply],
session: Session) -> None:
session: Session) -> str:
'''
Download the encrypted file. Check file integrity and move it to the data directory before
marking it as downloaded.
Expand All @@ -188,9 +188,12 @@ def _download(self,
)
raise exception

shutil.move(download_path, os.path.join(self.data_dir, db_object.filename))
destination = db_object.location(self.data_dir)
os.makedirs(os.path.dirname(destination), mode=0o700, exist_ok=True)
shutil.move(download_path, destination)
mark_as_downloaded(type(db_object), db_object.uuid, session)
logger.info("File downloaded: {}".format(db_object.filename))
logger.info("File downloaded to {}".format(destination))
return destination
except BaseError as e:
logger.debug("Failed to download file: {}".format(db_object.filename))
raise e
Expand All @@ -207,7 +210,9 @@ def _decrypt(self,
mark_as_decrypted(
type(db_object), db_object.uuid, session, original_filename=original_filename
)
logger.info("File decrypted: {}".format(os.path.basename(filepath)))
logger.info("File decrypted: {} (decrypted file: {})".format(
os.path.basename(filepath), original_filename)
)
except CryptoError as e:
mark_as_decrypted(type(db_object), db_object.uuid, session, is_decrypted=False)
logger.debug("Failed to decrypt file: {}".format(os.path.basename(filepath)))
Expand Down Expand Up @@ -284,11 +289,21 @@ def call_decrypt(self, filepath: str, session: Session = None) -> str:
'''
with NamedTemporaryFile('w+') as plaintext_file:
self.gpg.decrypt_submission_or_reply(filepath, plaintext_file.name, is_doc=False)
set_message_or_reply_content(
model_type=Reply,
uuid=self.uuid,
session=session,
content=plaintext_file.read())
try:
set_message_or_reply_content(
model_type=Reply,
uuid=self.uuid,
session=session,
content=plaintext_file.read())
finally:
# clean up directory where decryption happened
try:
os.rmdir(os.path.dirname(filepath))
except Exception as e:
logger.warning(
"Error deleting decryption directory of message %s: %s", self.uuid, e
)

return ""


Expand Down Expand Up @@ -330,12 +345,21 @@ def call_decrypt(self, filepath: str, session: Session = None) -> str:
The return value is an empty string; messages have no original filename.
'''
with NamedTemporaryFile('w+') as plaintext_file:
self.gpg.decrypt_submission_or_reply(filepath, plaintext_file.name, is_doc=False)
set_message_or_reply_content(
model_type=Message,
uuid=self.uuid,
session=session,
content=plaintext_file.read())
try:
self.gpg.decrypt_submission_or_reply(filepath, plaintext_file.name, is_doc=False)
set_message_or_reply_content(
model_type=Message,
uuid=self.uuid,
session=session,
content=plaintext_file.read())
finally:
# clean up directory where decryption happened
try:
os.rmdir(os.path.dirname(filepath))
except Exception as e:
logger.warning(
"Error deleting decryption directory of message %s: %s", self.uuid, e
)
return ""


Expand Down Expand Up @@ -376,8 +400,9 @@ def call_decrypt(self, filepath: str, session: Session = None) -> str:
the file extensions, e.g. 1-impractical_thing-doc.gz.gpg -> 1-impractical_thing-doc
'''
fn_no_ext, _ = os.path.splitext(os.path.splitext(os.path.basename(filepath))[0])
plaintext_filepath = os.path.join(self.data_dir, fn_no_ext)
plaintext_filepath = os.path.join(os.path.dirname(filepath), fn_no_ext)
original_filename = self.gpg.decrypt_submission_or_reply(
filepath, plaintext_filepath, is_doc=True
)
logger.info("""Decrypted file "%s" to "%s" """, filepath, original_filename)
return original_filename
8 changes: 6 additions & 2 deletions securedrop_client/crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,12 @@ def decrypt_submission_or_reply(self,

# Store the plaintext in the file located at the specified plaintext_filepath
if is_doc:
original_filename = read_gzip_header_filename(out.name)
with gzip.open(out.name, 'rb') as infile, open(plaintext_filepath, 'wb') as outfile:
original_filename = read_gzip_header_filename(out.name) or plaintext_filepath
decrypt_path = os.path.join(
os.path.dirname(filepath),
os.path.basename(original_filename)
)
with gzip.open(out.name, 'rb') as infile, open(decrypt_path, 'wb') as outfile:
shutil.copyfileobj(infile, outfile)
else:
shutil.copy(out.name, plaintext_filepath)
Expand Down
50 changes: 40 additions & 10 deletions securedrop_client/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,19 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return '<Message {}>'.format(self.filename)

def location(self, data_dir: str) -> str:
'''
Return the full path to the Message's file.
'''
return os.path.abspath(
os.path.join(
data_dir,
self.__class__.__name__,
self.uuid,
self.filename,
)
)


class File(Base):

Expand All @@ -135,15 +148,6 @@ class File(Base):
uuid = Column(String(36), unique=True, nullable=False)
filename = Column(String(255), nullable=False)

# Files from the SecureDrop journalist API are gzipped, then
# encrypted with GPG. The gzip header contains the original
# filename, which makes it easier for the client to open the file
# with the right application. We'll record that filename here
# after we've downloaded, decrypted and extracted the file.
# If the header does not contain the filename for some reason,
# this should be the same as filename.
original_filename = Column(String(255), nullable=False, server_default="")

file_counter = Column(Integer, nullable=False)
size = Column(Integer, nullable=False)
download_url = Column(String(255), nullable=False)
Expand Down Expand Up @@ -179,13 +183,26 @@ def __str__(self) -> str:
Return something that's a useful string representation of the file.
"""
if self.is_downloaded:
return "File: {}".format(self.original_filename)
return "File: {}".format(self.filename)
else:
return '<Encrypted file on server>'

def __repr__(self) -> str:
return '<File {}>'.format(self.filename)

def location(self, data_dir: str) -> str:
'''
Return the full path to the File's file.
'''
return os.path.abspath(
os.path.join(
data_dir,
self.__class__.__name__,
self.uuid,
self.filename,
)
)


class Reply(Base):

Expand Down Expand Up @@ -246,6 +263,19 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return '<Reply {}>'.format(self.filename)

def location(self, data_dir: str) -> str:
'''
Return the full path to the Reply's file.
'''
return os.path.abspath(
os.path.join(
data_dir,
self.__class__.__name__,
self.uuid,
self.filename,
)
)


class DraftReply(Base):

Expand Down
12 changes: 6 additions & 6 deletions securedrop_client/gui/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1911,7 +1911,7 @@ def __init__(
self.print_button.clicked.connect(self._on_print_clicked)

# File name or default string
self.file_name = SecureQLabel(self.file.original_filename)
self.file_name = SecureQLabel(self.file.filename)
self.file_name.setObjectName('file_name')
self.file_name.installEventFilter(self)
self.no_file_name = SecureQLabel('ENCRYPTED FILE ON SERVER')
Expand Down Expand Up @@ -1965,7 +1965,7 @@ def _on_file_downloaded(self, file_uuid: str) -> None:
if file_uuid == self.file.uuid:
self.file = self.controller.get_file(self.file.uuid)
if self.file.is_downloaded:
self.file_name.setText(self.file.original_filename)
self.file_name.setText(self.file.filename)
self.download_button.hide()
self.no_file_name.hide()
self.export_button.show()
Expand All @@ -1978,12 +1978,12 @@ def _on_export_clicked(self):
"""
Called when the export button is clicked.
"""
if not self.controller.downloaded_file_exists(self.file.uuid):
if not self.controller.downloaded_file_exists(self.file):
self.controller.sync_api()
return

dialog = ExportDialog(self.controller, self.file.uuid,
self.file.original_filename)
self.file.filename)
dialog.show()
dialog.export()
dialog.exec()
Expand All @@ -1993,7 +1993,7 @@ def _on_print_clicked(self):
"""
Called when the print button is clicked.
"""
if not self.controller.downloaded_file_exists(self.file.uuid):
if not self.controller.downloaded_file_exists(self.file):
self.controller.sync_api()
return

Expand All @@ -2012,7 +2012,7 @@ def _on_left_click(self):

if self.file.is_downloaded:
# Open the already downloaded file.
self.controller.on_file_open(self.file.uuid)
self.controller.on_file_open(self.file)
else:
if self.controller.api:
# Indicate in downloading state... but only after 0.3 seconds (i.e.
Expand Down
Loading

0 comments on commit 6813852

Please sign in to comment.