From 79640ee5d80ce00dbaad7df87f18618f14719b5a Mon Sep 17 00:00:00 2001 From: John Hensley Date: Wed, 7 Oct 2020 17:37:33 -0400 Subject: [PATCH] Improve i18n_tool.py list-translators, update-from-weblate Allow the specification of a commit whose timestamp will be used as the starting point for gathering translator credits, instead of starting from the last commit that says "l10n: sync". That breaks badly if there are any source string changes during the release cycle that require another sync, so instead, just examine all the commits since the time of the specified revision, defaulting to the last release tag. Also, instead of always gathering translator contributions up to the tip of i18n/i18n for list-translators and update-from-weblate, allow the specification of a commit. This is intended to allow verification of these functions' results; release management tasks should use the default target of the i18n/i18n branch tip. --- securedrop/i18n_tool.py | 195 ++++++++++++++++++++++++++-------------- 1 file changed, 130 insertions(+), 65 deletions(-) diff --git a/securedrop/i18n_tool.py b/securedrop/i18n_tool.py index 01abab2cee..10b7cb1571 100755 --- a/securedrop/i18n_tool.py +++ b/securedrop/i18n_tool.py @@ -13,10 +13,7 @@ import textwrap from argparse import _SubParsersAction from typing import Optional -from typing import Union - from typing import Set - from typing import List import version @@ -30,7 +27,6 @@ class I18NTool: - # # The database of support language, indexed by the language code # used by weblate (i.e. whatever shows as CODE in @@ -41,7 +37,7 @@ class I18NTool: # display in the interface. # desktop: The language code used for dekstop icons. # - SUPPORTED_LANGUAGES = { + supported_languages = { 'ar': {'name': 'Arabic', 'desktop': 'ar', }, 'ca': {'name': 'Catalan', 'desktop': 'ca', }, 'cs': {'name': 'Czech', 'desktop': 'cs', }, @@ -62,12 +58,18 @@ class I18NTool: 'tr': {'name': 'Turkish', 'desktop': 'tr', }, 'zh_Hant': {'name': 'Chinese, Traditional', 'desktop': 'zh_Hant', }, } + release_tag_re = re.compile(r"^\d+\.\d+\.\d+$") + translated_commit_re = re.compile('Translated using Weblate') + updated_commit_re = re.compile(r'(?:updated from| (?:revision|commit):) (\w+)') - def file_is_modified(self, path: str) -> int: - dir = dirname(path) - return subprocess.call(['git', '-C', dir, 'diff', '--quiet', path]) + def file_is_modified(self, path: str) -> bool: + return bool(subprocess.call(['git', '-C', dirname(path), 'diff', '--quiet', path])) def ensure_i18n_remote(self, args: argparse.Namespace) -> None: + """ + Make sure we have a git remote for the i18n repo. + """ + k = {'_cwd': args.root} if b'i18n' not in git.remote(**k).stdout: git.remote.add('i18n', args.url, **k) @@ -219,7 +221,7 @@ def require_git_email_name(git_dir: str) -> bool: def update_docs(self, args: argparse.Namespace) -> None: l10n_content = u'.. GENERATED BY i18n_tool.py DO NOT EDIT:\n\n' - for (code, info) in sorted(I18NTool.SUPPORTED_LANGUAGES.items()): + for (code, info) in sorted(self.supported_languages.items()): l10n_content += '* ' + info['name'] + ' (``' + code + '``)\n' includes = abspath(join(args.docs_repo_dir, 'docs/includes')) l10n_txt = join(includes, 'l10n.txt') @@ -246,73 +248,90 @@ def set_update_docs_parser(self, subps: _SubParsersAction) -> None: parser.set_defaults(func=self.update_docs) def update_from_weblate(self, args: argparse.Namespace) -> None: + """ + Pull in updated translations from the i18n repo. + """ self.ensure_i18n_remote(args) - codes = list(I18NTool.SUPPORTED_LANGUAGES.keys()) + codes = list(self.supported_languages.keys()) if args.supported_languages: codes = args.supported_languages.split(',') for code in sorted(codes): - info = I18NTool.SUPPORTED_LANGUAGES[code] + info = self.supported_languages[code] + + def need_update(path: str) -> bool: + """ + Check if the file is different in the i18n repo. + """ - def need_update(p: str) -> Union[bool, int]: - exists = os.path.exists(join(args.root, p)) + exists = os.path.exists(join(args.root, path)) k = {'_cwd': args.root} - git.checkout('i18n/i18n', '--', p, **k) - git.reset('HEAD', '--', p, **k) + git.checkout(args.target, '--', path, **k) + git.reset('HEAD', '--', path, **k) if not exists: return True - else: - return self.file_is_modified(join(args.root, p)) - def add(p: str) -> None: - git('-C', args.root, 'add', p) + return self.file_is_modified(join(args.root, path)) + + def add(path: str) -> None: + """ + Add the file to the git index. + """ + git('-C', args.root, 'add', path) updated = False # - # Update messages + # Add changes to web .po files # - p = "securedrop/translations/{l}/LC_MESSAGES/messages.po".format( + path = "securedrop/translations/{l}/LC_MESSAGES/messages.po".format( l=code) # noqa: E741 - if need_update(p): - add(p) + if need_update(path): + add(path) updated = True # - # Update desktop + # Add changes to desktop .po files # desktop_code = info['desktop'] - p = join("install_files/ansible-base/roles", + path = join("install_files/ansible-base/roles", "tails-config/templates/{l}.po".format( l=desktop_code)) # noqa: E741 - if need_update(p): - add(p) + if need_update(path): + add(path) updated = True if updated: - self.upstream_commit(args, code) + self.commit_changes(args, code) - def translators(self, args: argparse.Namespace, path: str, commit_range: str) -> Set[str]: + def translators(self, args: argparse.Namespace, path: str, since: Optional[str]) -> Set[str]: """ Return the set of people who've modified a file in Weblate. Extracts all the authors of translation changes to the given - path in the given commit range. Translation changes are + path since the given timestamp. Translation changes are identified by the presence of "Translated using Weblate" in the commit message. """ - translation_re = re.compile('Translated using Weblate') - path_changes = git( - '--no-pager', '-C', args.root, - 'log', '--format=%aN\x1e%s', commit_range, '--', path, - _encoding='utf-8' - ) + if since: + path_changes = git( + '--no-pager', '-C', args.root, + 'log', '--format=%aN\x1e%s', '--since', since, args.target, '--', path, + _encoding='utf-8' + ) + else: + path_changes = git( + '--no-pager', '-C', args.root, + 'log', '--format=%aN\x1e%s', args.target, '--', path, + _encoding='utf-8' + ) path_changes = u"{}".format(path_changes) path_changes = [c.split('\x1e') for c in path_changes.strip().split('\n')] - path_changes = [c for c in path_changes if len(c) > 1 and translation_re.match(c[1])] - + path_changes = [ + c for c in path_changes if len(c) > 1 and self.translated_commit_re.match(c[1]) + ] path_authors = [c[0] for c in path_changes] return set(path_authors) - def upstream_commit(self, args: argparse.Namespace, code: str) -> None: + def commit_changes(self, args: argparse.Namespace, code: str) -> None: self.require_git_email_name(args.root) authors = set() # type: Set[str] diffs = u"{}".format(git('--no-pager', '-C', args.root, 'diff', '--name-only', '--cached')) @@ -321,18 +340,17 @@ def upstream_commit(self, args: argparse.Namespace, code: str) -> None: previous_message = u"{}".format(git( '--no-pager', '-C', args.root, 'log', '-n', '1', path, _encoding='utf-8')) - update_re = re.compile(r'(?:updated from| revision:) (\w+)') - m = update_re.search(previous_message) + m = self.updated_commit_re.search(previous_message) + origin = None if m: origin = m.group(1) - else: - origin = '' - authors |= self.translators(args, path, '{}..i18n/i18n'.format(origin)) + since = self.get_commit_timestamp(args.root, origin) + authors |= self.translators(args, path, since) authors_as_str = u"\n ".join(sorted(authors)) - current = git('-C', args.root, 'rev-parse', 'i18n/i18n') - info = I18NTool.SUPPORTED_LANGUAGES[code] + current = git('-C', args.root, 'rev-parse', args.target) + info = self.supported_languages[code] message = textwrap.dedent(u""" l10n: updated {name} ({code}) @@ -341,7 +359,7 @@ def upstream_commit(self, args: argparse.Namespace, code: str) -> None: updated from: repo: {remote} - revision: {current} + commit: {current} """).format( remote=args.url, name=info['name'], @@ -366,6 +384,14 @@ def set_update_from_weblate_parser(self, subps: _SubParsersAction) -> None: default=url, help=('URL of the weblate repository' ' (default {})'.format(url))) + parser.add_argument( + '--target', + default="i18n/i18n", + help=( + 'Commit on i18n branch at which to stop gathering translator contributions ' + '(default: i18n/i18n)' + ) + ) parser.add_argument( '--supported-languages', help='comma separated list of supported languages') @@ -387,12 +413,12 @@ def set_list_locales_parser(self, subps: _SubParsersAction) -> None: def list_locales(self, args: argparse.Namespace) -> None: if args.lines: - for l in sorted(list(self.SUPPORTED_LANGUAGES.keys()) + ['en_US']): + for l in sorted(list(self.supported_languages.keys()) + ['en_US']): print(l) elif args.python: - print(sorted(list(self.SUPPORTED_LANGUAGES.keys()) + ['en_US'])) + print(sorted(list(self.supported_languages.keys()) + ['en_US'])) else: - print(" ".join(sorted(list(self.SUPPORTED_LANGUAGES.keys()) + ['en_US']))) + print(" ".join(sorted(list(self.supported_languages.keys()) + ['en_US']))) def set_list_translators_parser(self, subps: _SubParsersAction) -> None: parser = subps.add_parser('list-translators', @@ -409,30 +435,64 @@ def set_list_translators_parser(self, subps: _SubParsersAction) -> None: default=url, help=('URL of the weblate repository' ' (default {})'.format(url))) + parser.add_argument( + '--target', + default="i18n/i18n", + help=( + 'Commit on i18n branch at which to stop gathering translator contributions ' + '(default: i18n/i18n)' + ) + ) + parser.add_argument( + '--since', + help=( + 'Gather translator contributions from the time of this commit ' + '(default: last release tag)' + ) + ) parser.add_argument( '--all', action="store_true", help=( "List everyone who's ever contributed, instead of just since the last " - "sync from Weblate." + "release or specified commit." ) ) parser.set_defaults(func=self.list_translators) - def get_last_sync(self) -> Optional[str]: - commits = git('--no-pager', 'log', '--format=%h:%s', 'i18n/i18n', _encoding='utf-8') - for commit in commits: - commit_hash, msg = commit.split(':', 1) - if msg.startswith("l10n: sync "): - return commit_hash - return None + def get_last_release(self, root: str) -> str: + """ + Returns the last release tag, e.g. 1.5.0. + """ + tags = subprocess.check_output( + ["git", "-C", root, "tag", "--list"] + ).decode("utf-8").splitlines() + release_tags = sorted([t.strip() for t in tags if self.release_tag_re.match(t)]) + if not release_tags: + raise ValueError("Could not find a release tag!") + return release_tags[-1] + + def get_commit_timestamp(self, root: str, commit: Optional[str]) -> str: + """ + Returns the UNIX timestamp of the given commit. + """ + cmd = ["git", "-C", root, "log", "-n", "1", '--pretty=format:%ct'] + if commit: + cmd.append(commit) + + timestamp = subprocess.check_output(cmd) + return timestamp.decode("utf-8").strip() def list_translators(self, args: argparse.Namespace) -> None: self.ensure_i18n_remote(args) app_template = "securedrop/translations/{}/LC_MESSAGES/messages.po" desktop_template = "install_files/ansible-base/roles/tails-config/templates/{}.po" - last_sync = self.get_last_sync() - for code, info in sorted(I18NTool.SUPPORTED_LANGUAGES.items()): + since = self.get_commit_timestamp(args.root, args.since) if not args.all else None + if args.all: + print("Listing all translators who have ever helped") + else: + print("Listing translators who have helped since {}".format(args.since)) + for code, info in sorted(self.supported_languages.items()): translators = set([]) paths = [ app_template.format(code), @@ -440,19 +500,24 @@ def list_translators(self, args: argparse.Namespace) -> None: ] for path in paths: try: - commit_range = "i18n/i18n" - if last_sync and not args.all: - commit_range = '{}..{}'.format(last_sync, commit_range) - t = self.translators(args, path, commit_range) + t = self.translators(args, path, since) translators.update(t) except Exception as e: print("Could not check git history of {}: {}".format(path, e), file=sys.stderr) - print(u"{} ({}):\n {}".format(code, info["name"], "\n ".join(sorted(translators)))) + print( + "{} ({}):{}".format( + code, info["name"], + "\n {}\n".format( + "\n ".join(sorted(translators))) if translators else "\n" + ) + ) def get_args(self) -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog=__file__, description='i18n tool for SecureDrop.') + parser.set_defaults(func=lambda args: parser.print_help()) + parser.add_argument('-v', '--verbose', action='store_true') subps = parser.add_subparsers()