Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: verify reproducibility of gettext machine objects #1666

Merged
merged 12 commits into from
Sep 26, 2023
Merged
19 changes: 15 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ common-steps:
name: Install testing dependencies
command: |
set -e
apt update && apt install -y git gnupg libqt5x11extras5 make python3-tk python3-dev gnupg python3-venv sqlite3 xvfb
apt update && apt install -y git gnupg libarchive13 libmagic1 libqt5x11extras5 make python3-tk python3-dev gnupg python3-venv sqlite3 xvfb

- &configure_locales
run:
Expand Down Expand Up @@ -77,16 +77,26 @@ common-steps:
source .venv/bin/activate
make semgrep bandit

- &check_internationalization
- &check_source_strings
run:
name: Run internationalization check
name: Check that source strings are updated
command: |
set -e
export VERSION_CODENAME=$(~/project/scripts/codename)
make venv
source .venv/bin/activate
make check-strings

- &check_mo_repro
run:
name: Check that translation machine objects are reproducible
command: |
set -e
export VERSION_CODENAME=$(~/project/scripts/codename)
make venv
source .venv/bin/activate
make verify-mo

- &check_python_dependencies_for_vulnerabilities
run:
name: Check Python dependencies for known vulnerabilities
Expand Down Expand Up @@ -215,7 +225,8 @@ jobs:
steps:
- *install_testing_dependencies
- checkout
- *check_internationalization
- *check_source_strings
- *check_mo_repro

check-testing-requirements:
parameters: *parameters
Expand Down
22 changes: 18 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,27 @@ semgrep-local:

.PHONY: black
black: ## Format Python source code with black
@black ./
@black \
./ \
scripts/*.py

.PHONY: check-black
check-black: ## Check Python source code formatting with black
@black --check --diff ./
@black --check --diff \
./ \
scripts/*.py

.PHONY: isort
isort: ## Run isort to organize Python imports
@isort --skip-glob .venv ./
@isort --skip-glob .venv \
./ \
scripts/*.py

.PHONY: check-isort
check-isort: ## Check Python import organization with isort
@isort --skip-glob .venv --check-only --diff ./
@isort --skip-glob .venv --check-only --diff \
./ scripts \
/*.py

.PHONY: mypy
mypy: ## Run static type checker
Expand All @@ -80,6 +88,7 @@ mypy: ## Run static type checker
--show-error-codes \
--warn-unreachable \
--warn-unused-ignores \
scripts/*.py \
securedrop_client \
*.py

Expand Down Expand Up @@ -233,3 +242,8 @@ $(POT): securedrop_client
$^
@sed -i -e '/^"POT-Creation-Date/d' ${POT}

.PHONY: verify-mo
verify-mo: ## Verify that all gettext machine objects (.mo) are reproducible from their catalogs (.po).
@TERM=dumb scripts/verify-mo.py ${LOCALE_DIR}/*
@# All good; now clean up.
@git restore "${LOCALE_DIR}/**/*.po"
1,613 changes: 993 additions & 620 deletions requirements/dev-bookworm-requirements.txt

Large diffs are not rendered by default.

1,600 changes: 974 additions & 626 deletions requirements/dev-bullseye-requirements.txt

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions requirements/dev-sdw-requirements.in
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
-r requirements.in
babel
black
diffoscope
flake8
flaky
isort
MarkupSafe
mypy
Pillow>=9.0.1 # for mouseinfo, pyscreeze as of CVE-2022-{22817,24303}
pip-tools>=6.8.0 # for jazzband/piptools#1617
polib
PyAutoGUI
pyobjc-core;platform_system=="Darwin"
pyobjc;platform_system=="Darwin"
Expand All @@ -21,6 +23,8 @@ pytest-vcr
pytest-xdist>=3.0.2 # CVE-2022-42969
semgrep
toml
translate-toolkit
types-polib
types-python-dateutil
types-setuptools
wlc
1,600 changes: 974 additions & 626 deletions requirements/dev-sdw-requirements.txt

Large diffs are not rendered by default.

161 changes: 161 additions & 0 deletions scripts/verify-mo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3
"""
Verify the reproducibility of gettext machine objects (.mo) from catalogs
(.po).

Due to tool- and library-level idiosyncrasies, this happens in three stages:

1. Via polib: Overwrite metadata .mo → .po.
2. Via translate: Recompile the entire catalog .po → .mo.
3. Via diffoscope: Diff the new .mo against the old, heavily masked and
filtered to avoid false positives from stray entries in the "fuzzy"
and "obsolete" states.

In other words, the new .mo file should be identical (modulo stray entries) to
the original, meaning that the original .po/.mo pair differed only in their
metadata.
"""

import argparse
import os
import shlex
import subprocess
from collections.abc import Iterator
from pathlib import Path
from types import TracebackType
from typing import Optional, Set

import polib
from translate.tools.pocompile import convertmo

parser = argparse.ArgumentParser(
"""Verify the reproducibility of gettext machine objects (.mo) from catalogs (.po)."""
)
parser.add_argument(
"locale",
nargs="+",
help="""one or more locale directories, each of which must contain an "LC_MESSAGES" directory""",
)
parser.add_argument(
"--domain", default="messages", help="""the gettext domain to load (defaults to "messages")"""
)
args = parser.parse_args()


class CatalogVerifier:
"""Wrapper class for proving .mo → .po → .mo reproducibility."""

def __init__(self, path: Path, domain: str):
"""Set up the .po/.mo pair."""

self.path = path
self.po = polib.pofile(str(path / "LC_MESSAGES" / f"{domain}.po"))
self.mo = polib.mofile(str(path / "LC_MESSAGES" / f"{domain}.mo"))

def __enter__(self) -> "CatalogVerifier":
"""Prepare to generate the new .mo file to diff."""

self.mo_target = Path(f"{self.mo.fpath}.new")
return self

def __exit__(
self,
exc_type: Optional[type[BaseException]],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None:
"""Clean up."""

self.mo_target.unlink(missing_ok=True)

@property
def strays(self) -> Set[str]:
"""Return the set of stray (fuzzy or obsolete) entries to mask when
diffing this catalog."""

fuzzy = {
f"^{line.replace('#| ', '')}" # strip fuzzy marker
for e in self.po.fuzzy_entries()
for line in str(e).splitlines()
}
obsolete = {
f"^{line.replace('#~ ', '')}" # strip obsolete marker
for e in self.po.obsolete_entries()
for line in str(e).splitlines()
}

return fuzzy | obsolete

def diffoscope_args(self, a: Path, b: Path, filtered: bool = True) -> Iterator[str]:
"""Build up a diffoscope invocation that (with `filtered`) removes
false positives from the msgunfmt diff."""

yield f"diffoscope {a} {b}"

if not filtered:
return

yield "--diff-mask '^$'" # tell diffoscope to mask empty lines
for stray in self.strays:
yield f"--diff-mask {shlex.quote(stray)}" # tell diffoscope to mask strays
yield "| grep -Fv '[masked]'" # ignore things we've masked
yield "| grep -E '│ (-|\+)msg(id|str)'" # ignore context; we only care about real diffs

def diffoscope_call(
self, a: Path, b: Path, filtered: bool = True
) -> subprocess.CompletedProcess:
"""Call diffoscope and return the subprocess.CompletedProcess result
for further processing, *without* first checking whether it was
succesful."""

cmd = " ".join(self.diffoscope_args(a, b, filtered))

# We silence Bandit and Semgrep warnings on `shell=True`
# because we want to inherit the Python virtual environment
# in which we're invoked.
return subprocess.run( # nosec B602 nosemgrep: python.lang.security.audit.subprocess-shell-true.subprocess-shell-true
cmd,
capture_output=True,
env=os.environ,
shell=True,
)

def reproduce(self) -> None:
"""Overwrite metadata .mo → .po. Then rewrite the entire file .po →
.mo."""

self.po.metadata = self.mo.metadata
self.po.save(self.po.fpath)

with open(self.mo_target, "wb") as mo_target:
convertmo(self.po.fpath, mo_target, "")

def verify(self) -> None:
"""Run diffoscope for this catalog and error if there's any unmasked
diff."""

# Without filtering, diffoscope should return either 0 (no differences)
# or 1 (differences); anything else is an error.
test = self.diffoscope_call(Path(self.mo.fpath), Path(self.mo_target), filtered=False)
if test.returncode not in [0, 1]:
test.check_returncode()

# With filtering, since diffoscope will return 1 on differences
# (pre-filtering), and grep will return 1 on *no* differences
# (post-filtering), we can't count on result.returncode here.
result = self.diffoscope_call(Path(self.mo.fpath), Path(self.mo_target))
print(f"--> Verifying {self.path}: {result.args}")
if len(result.stdout) > 0:
raise Exception(result.stdout.decode("utf-8"))


print(f"--> Reproducing {len(args.locale)} path(s)")
for path in args.locale:
locale_dir = Path(path).resolve()
if not locale_dir.is_dir():
print(f'--> Skipping "{locale_dir}"')
continue

with CatalogVerifier(locale_dir, args.domain) as catalog:
catalog.reproduce()
catalog.verify()
2 changes: 1 addition & 1 deletion securedrop_client/api_jobs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __repr__(self) -> str:

def __eq__(self, other: Any) -> bool:
# https://github.com/python/mypy/issues/2783
if self.uuid == getattr(other, "uuid", None) and type(self) == type(other):
if self.uuid == getattr(other, "uuid", None) and type(self) is type(other):
return True
else:
return False
1 change: 0 additions & 1 deletion securedrop_client/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


class Config:

CONFIG_NAME = "config.json"

def __init__(self, journalist_key_fingerprint: str) -> None:
Expand Down
2 changes: 0 additions & 2 deletions securedrop_client/crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@


class CryptoError(Exception):

pass


Expand Down Expand Up @@ -208,7 +207,6 @@ def encrypt_to_source(self, source_uuid: str, data: str) -> str:
with tempfile.NamedTemporaryFile("w+") as content, tempfile.NamedTemporaryFile(
"w+"
) as stdout, tempfile.NamedTemporaryFile("w+") as stderr:

content.write(data)
content.seek(0)

Expand Down
7 changes: 0 additions & 7 deletions securedrop_client/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def make_session_maker(home: str) -> scoped_session:


class User(Base):

__tablename__ = "users"

id = Column(Integer, primary_key=True)
Expand Down Expand Up @@ -94,7 +93,6 @@ def __init__(self) -> None:


class Source(Base):

__tablename__ = "sources"

id = Column(Integer, primary_key=True)
Expand Down Expand Up @@ -200,7 +198,6 @@ def __init__(self, **kwargs: Any) -> None:


class Message(Base):

__tablename__ = "messages"
__table_args__ = (
UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"),
Expand Down Expand Up @@ -321,7 +318,6 @@ def seen_by_list(self) -> Dict[str, User]:


class File(Base):

__tablename__ = "files"
__table_args__ = (
UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"),
Expand Down Expand Up @@ -425,7 +421,6 @@ def seen_by(self, journalist_id: int) -> bool:


class Reply(Base):

__tablename__ = "replies"
__table_args__ = (
UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"),
Expand Down Expand Up @@ -574,7 +569,6 @@ def explain(self, classname: str) -> str:


class DraftReply(Base):

__tablename__ = "draftreplies"

id = Column(Integer, primary_key=True)
Expand Down Expand Up @@ -646,7 +640,6 @@ def seen_by_list(self) -> Dict[str, User]:


class ReplySendStatus(Base):

__tablename__ = "replysendstatuses"

id = Column(Integer, primary_key=True)
Expand Down
1 change: 0 additions & 1 deletion securedrop_client/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def connect_signals(
print_preflight_check_requested: Optional[pyqtBoundSignal] = None,
print_requested: Optional[pyqtBoundSignal] = None,
) -> None:

# This instance can optionally react to events to prevent
# coupling it to dependent code.
if export_preflight_check_requested is not None:
Expand Down
1 change: 0 additions & 1 deletion securedrop_client/gui/base/dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@


class ModalDialog(QDialog):

DIALOG_CSS = resource_string(__name__, "dialogs.css").decode("utf-8")
BUTTON_CSS = resource_string(__name__, "dialog_button.css").decode("utf-8")
ERROR_DETAILS_CSS = resource_string(__name__, "dialog_message.css").decode("utf-8")
Expand Down
1 change: 0 additions & 1 deletion securedrop_client/gui/base/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def update_image(self, filename: str, svg_size: Optional[QSize] = None) -> None:


class SecureQLabel(QLabel):

MAX_PREVIEW_LENGTH = 200

def __init__(
Expand Down
Loading