From bfed977e9a90027d520ef1df19a383a1cd83c434 Mon Sep 17 00:00:00 2001 From: Cory Francis Myers Date: Tue, 26 Sep 2023 17:37:02 -0700 Subject: [PATCH] build(verify-mo): verify reproducibility of gettext machine objects From freedomofpress/securedrop-client#1666, from which "verify-mo.py" has been copied verbatim. The "verify-mo" Make target is largely the same, except for some fancy (but temporary) "find" footwork to put "i18n_tool.py"'s outputs where "verify-mo.py" expects them. --- Makefile | 19 +++ devops/scripts/verify-mo.py | 161 ++++++++++++++++++ .../python3/develop-requirements.in | 2 + .../python3/develop-requirements.txt | 102 +++++++++++ 4 files changed, 284 insertions(+) create mode 100755 devops/scripts/verify-mo.py diff --git a/Makefile b/Makefile index 1110d739c66..b00d7169e6c 100644 --- a/Makefile +++ b/Makefile @@ -337,6 +337,9 @@ upgrade-destroy: ## Destroy an upgrade test environment. # ############## +DESKTOP_LOCALE_BASE=install_files/ansible-base/roles/tails-config/templates +DESKTOP_LOCALE_DIR=$(DESKTOP_LOCALE_BASE)/locale + .PHONY: translate translate: ## Update POT files from translated strings in source code. @echo "Updating translations..." @@ -369,6 +372,22 @@ endif cp securedrop/tests/functional/pageslayout/screenshots/en_US/*.png $${DOCS_REPO_DIR}/docs/images/manual/screenshots @echo +.PHONY: verify-mo +verify-mo: ## Verify that all gettext machine objects (.mo) are reproducible from their catalogs (.po). + # TODO(#6917): Once Weblate (rather than i18n_tool.py) is correctly filing + # both .po and .mo under $DESKTOP_LOCALE_DIR, remove this step. (See + # also: 76f3adeed90f4aaadbf0685e09dec6314367d5c0.) + @find ${DESKTOP_LOCALE_BASE} \ + -maxdepth 1 \ + -name "*.po" \ + -exec bash -c 'PO="$$(basename {} | sed \'s/.po//')"; cp ${DESKTOP_LOCALE_BASE}/$${PO}.po $(DESKTOP_LOCALE_DIR)/$${PO}/LC_MESSAGES/messages.po' \; + @TERM=dumb devops/scripts/verify-mo.py ${DESKTOP_LOCALE_DIR}/* + @# All good; now clean up. + @# TODO(#6917): git restore "${LOCALE_DIR}/**/*.po" + @find ${DESKTOP_LOCALE_DIR} \ + -name "*.po" \ + -delete + ########### # diff --git a/devops/scripts/verify-mo.py b/devops/scripts/verify-mo.py new file mode 100755 index 00000000000..ffbdda71fe0 --- /dev/null +++ b/devops/scripts/verify-mo.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +Verify the reproducibility of gettext machine objects (.mo) from catalogs +(.po). + +Due to tool- and library-level idiosyncrasies, this happens in three stages: + +1. Via polib: Overwrite metadata .mo → .po. +2. Via translate: Recompile the entire catalog .po → .mo. +3. Via diffoscope: Diff the new .mo against the old, heavily masked and + filtered to avoid false positives from stray entries in the "fuzzy" + and "obsolete" states. + +In other words, the new .mo file should be identical (modulo stray entries) to +the original, meaning that the original .po/.mo pair differed only in their +metadata. +""" + +import argparse +import os +import shlex +import subprocess +from collections.abc import Iterator +from pathlib import Path +from types import TracebackType +from typing import Optional, Set + +import polib +from translate.tools.pocompile import convertmo + +parser = argparse.ArgumentParser( + """Verify the reproducibility of gettext machine objects (.mo) from catalogs (.po).""" +) +parser.add_argument( + "locale", + nargs="+", + help="""one or more locale directories, each of which must contain an "LC_MESSAGES" directory""", +) +parser.add_argument( + "--domain", default="messages", help="""the gettext domain to load (defaults to "messages")""" +) +args = parser.parse_args() + + +class CatalogVerifier: + """Wrapper class for proving .mo → .po → .mo reproducibility.""" + + def __init__(self, path: Path, domain: str): + """Set up the .po/.mo pair.""" + + self.path = path + self.po = polib.pofile(str(path / "LC_MESSAGES" / f"{domain}.po")) + self.mo = polib.mofile(str(path / "LC_MESSAGES" / f"{domain}.mo")) + + def __enter__(self) -> "CatalogVerifier": + """Prepare to generate the new .mo file to diff.""" + + self.mo_target = Path(f"{self.mo.fpath}.new") + return self + + def __exit__( + self, + exc_type: Optional[type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + """Clean up.""" + + self.mo_target.unlink(missing_ok=True) + + @property + def strays(self) -> Set[str]: + """Return the set of stray (fuzzy or obsolete) entries to mask when + diffing this catalog.""" + + fuzzy = { + f"^{line.replace('#| ', '')}" # strip fuzzy marker + for e in self.po.fuzzy_entries() + for line in str(e).splitlines() + } + obsolete = { + f"^{line.replace('#~ ', '')}" # strip obsolete marker + for e in self.po.obsolete_entries() + for line in str(e).splitlines() + } + + return fuzzy | obsolete + + def diffoscope_args(self, a: Path, b: Path, filtered: bool = True) -> Iterator[str]: + """Build up a diffoscope invocation that (with `filtered`) removes + false positives from the msgunfmt diff.""" + + yield f"diffoscope {a} {b}" + + if not filtered: + return + + yield "--diff-mask '^$'" # tell diffoscope to mask empty lines + for stray in self.strays: + yield f"--diff-mask {shlex.quote(stray)}" # tell diffoscope to mask strays + yield "| grep -Fv '[masked]'" # ignore things we've masked + yield "| grep -E '│ (-|\+)msg(id|str)'" # ignore context; we only care about real diffs + + def diffoscope_call( + self, a: Path, b: Path, filtered: bool = True + ) -> subprocess.CompletedProcess: + """Call diffoscope and return the subprocess.CompletedProcess result + for further processing, *without* first checking whether it was + succesful.""" + + cmd = " ".join(self.diffoscope_args(a, b, filtered)) + + # We silence Bandit and Semgrep warnings on `shell=True` + # because we want to inherit the Python virtual environment + # in which we're invoked. + return subprocess.run( # nosec B602 nosemgrep: python.lang.security.audit.subprocess-shell-true.subprocess-shell-true + cmd, + capture_output=True, + env=os.environ, + shell=True, + ) + + def reproduce(self) -> None: + """Overwrite metadata .mo → .po. Then rewrite the entire file .po → + .mo.""" + + self.po.metadata = self.mo.metadata + self.po.save(self.po.fpath) + + with open(self.mo_target, "wb") as mo_target: + convertmo(self.po.fpath, mo_target, "") + + def verify(self) -> None: + """Run diffoscope for this catalog and error if there's any unmasked + diff.""" + + # Without filtering, diffoscope should return either 0 (no differences) + # or 1 (differences); anything else is an error. + test = self.diffoscope_call(Path(self.mo.fpath), Path(self.mo_target), filtered=False) + if test.returncode not in [0, 1]: + test.check_returncode() + + # With filtering, since diffoscope will return 1 on differences + # (pre-filtering), and grep will return 1 on *no* differences + # (post-filtering), we can't count on result.returncode here. + result = self.diffoscope_call(Path(self.mo.fpath), Path(self.mo_target)) + print(f"--> Verifying {self.path}: {result.args}") + if len(result.stdout) > 0: + raise Exception(result.stdout.decode("utf-8")) + + +print(f"--> Reproducing {len(args.locale)} path(s)") +for path in args.locale: + locale_dir = Path(path).resolve() + if not locale_dir.is_dir(): + print(f'--> Skipping "{locale_dir}"') + continue + + with CatalogVerifier(locale_dir, args.domain) as catalog: + catalog.reproduce() + catalog.verify() diff --git a/securedrop/requirements/python3/develop-requirements.in b/securedrop/requirements/python3/develop-requirements.in index 32a89871f2a..e888974bb75 100644 --- a/securedrop/requirements/python3/develop-requirements.in +++ b/securedrop/requirements/python3/develop-requirements.in @@ -21,6 +21,7 @@ netaddr # Now also pin pip due to https://github.com/jazzband/pip-tools/issues/853 pip>=21.3 pip-tools>=6.1.0 +polib psutil>=5.6.6 pylint>=2.7.0 # pyenchant is via pylint[spelling] @@ -38,5 +39,6 @@ semgrep>=0.98.0 setuptools>=56.0.0 shellcheck-py testinfra>=5.3.1 +translate-toolkit urllib3>=1.26.5 yamllint diff --git a/securedrop/requirements/python3/develop-requirements.txt b/securedrop/requirements/python3/develop-requirements.txt index c8cb3e3c36b..b165ae829c5 100644 --- a/securedrop/requirements/python3/develop-requirements.txt +++ b/securedrop/requirements/python3/develop-requirements.txt @@ -418,6 +418,100 @@ libarchive-c==4.0 \ --hash=sha256:a5b41ade94ba58b198d778e68000f6b7de41da768de7140c984f71d7fa8416e5 \ --hash=sha256:b7306dceaeeac199dd53a471ea9d39ed4be8e81962df99d628db7a12efbb9b52 # via diffoscope +lxml==4.9.3 \ + --hash=sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3 \ + --hash=sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d \ + --hash=sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a \ + --hash=sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120 \ + --hash=sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305 \ + --hash=sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287 \ + --hash=sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23 \ + --hash=sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52 \ + --hash=sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f \ + --hash=sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4 \ + --hash=sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584 \ + --hash=sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f \ + --hash=sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693 \ + --hash=sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef \ + --hash=sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5 \ + --hash=sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02 \ + --hash=sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc \ + --hash=sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7 \ + --hash=sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da \ + --hash=sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a \ + --hash=sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40 \ + --hash=sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8 \ + --hash=sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd \ + --hash=sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601 \ + --hash=sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c \ + --hash=sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be \ + --hash=sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2 \ + --hash=sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c \ + --hash=sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129 \ + --hash=sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc \ + --hash=sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2 \ + --hash=sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1 \ + --hash=sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7 \ + --hash=sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d \ + --hash=sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477 \ + --hash=sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d \ + --hash=sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e \ + --hash=sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7 \ + --hash=sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2 \ + --hash=sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574 \ + --hash=sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf \ + --hash=sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b \ + --hash=sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98 \ + --hash=sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12 \ + --hash=sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42 \ + --hash=sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35 \ + --hash=sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d \ + --hash=sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce \ + --hash=sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d \ + --hash=sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f \ + --hash=sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db \ + --hash=sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4 \ + --hash=sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694 \ + --hash=sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac \ + --hash=sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2 \ + --hash=sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7 \ + --hash=sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96 \ + --hash=sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d \ + --hash=sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b \ + --hash=sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a \ + --hash=sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13 \ + --hash=sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340 \ + --hash=sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6 \ + --hash=sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458 \ + --hash=sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c \ + --hash=sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c \ + --hash=sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9 \ + --hash=sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432 \ + --hash=sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991 \ + --hash=sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69 \ + --hash=sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf \ + --hash=sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb \ + --hash=sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b \ + --hash=sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833 \ + --hash=sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76 \ + --hash=sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85 \ + --hash=sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e \ + --hash=sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50 \ + --hash=sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8 \ + --hash=sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4 \ + --hash=sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b \ + --hash=sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5 \ + --hash=sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190 \ + --hash=sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7 \ + --hash=sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa \ + --hash=sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0 \ + --hash=sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9 \ + --hash=sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0 \ + --hash=sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b \ + --hash=sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5 \ + --hash=sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7 \ + --hash=sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4 + # via translate-toolkit markupsafe==2.0.1 \ --hash=sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298 \ --hash=sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64 \ @@ -577,6 +671,10 @@ pluggy==0.13.1 \ # via # molecule # pytest +polib==1.2.0 \ + --hash=sha256:1c77ee1b81feb31df9bca258cbc58db1bbb32d10214b173882452c73af06d62d \ + --hash=sha256:f3ef94aefed6e183e342a8a269ae1fc4742ba193186ad76f175938621dbfc26b + # via -r requirements/python3/develop-requirements.in poyo==0.4.1 \ --hash=sha256:103b4ee3e1c7765098fe1cabe43f828db2e2a6079646561a2117e1a809f352d6 \ --hash=sha256:230ec11c2f35a23410c1f0e474f09fa4e203686f40ab3adca7b039c845d8c325 @@ -909,6 +1007,10 @@ tqdm==4.64.0 \ --hash=sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d \ --hash=sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6 # via semgrep +translate-toolkit==3.10.1 \ + --hash=sha256:642e8597c55c3a31b8a6506194f0e64095965d5a1e7e0261d045a2865783a366 \ + --hash=sha256:ce0f7702259b4469868883d58176b65845dd1cf75179e0ebb9e95cdc65d5b2b0 + # via -r requirements/python3/develop-requirements.in tree-format==0.1.2 \ --hash=sha256:a538523aa78ae7a4b10003b04f3e1b37708e0e089d99c9d3b9e1c71384c9a7f9 \ --hash=sha256:b5056228dbedde1fb81b79f71fb0c23c98e9d365230df9b29af76e8d8003de11