From 9ea0d80db2d2b3398104b071568d07ca39fe85ea Mon Sep 17 00:00:00 2001 From: buhtz Date: Thu, 8 Feb 2024 09:37:58 +0100 Subject: [PATCH] Build: Validation of shortcut letters in translations take groups into account Validation of shortcut letters in translations distinguish between "shortcut groups". Two "shortcut groups" exist in Back In Time: 1) the menu bar entries in the main window and 2) the tabs in the manage profiles dialog. The script check for rudandant use of shortcut letters inside that groups (e.g. "B" in "&Backup" and "&Back In Time") and report it. Additionally updating po files from py-sources now remove obsolete strings. --- .codespellrc | 2 +- .travis.yml | 2 +- common/doc-dev/2_localization.md | 5 +- update_language_files.py | 167 +++++++++++++++++++++++-------- 4 files changed, 133 insertions(+), 43 deletions(-) diff --git a/.codespellrc b/.codespellrc index b7e0492a4..c4d28d1af 100644 --- a/.codespellrc +++ b/.codespellrc @@ -19,7 +19,7 @@ uri-ignore-words-list=mitre # Good to know about allowed/ignored words: # Codespell acts a bit unusual when it comes to case-sensitivity. # By default the word "Manuel" is an error and codespell recommends to -# modify it into "Manual". Tu allow this German name "Manuel" we have to +# modify it into "Manual". To allow this German name "Manuel" we have to # add "manual" (lower case!) to the "ignore-words-list". The upper-case # version do not work. # See: https://github.com/codespell-project/codespell/issues/3210 diff --git a/.travis.yml b/.travis.yml index f19a607f9..fd6848f0e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,7 +51,7 @@ jobs: - python: "3.9" - python: "3.10" - python: "3.11" - # Excluding temporarily because Pyhton 3.12 for ppc64le is not + # Excluding temporarily because Python 3.12 for ppc64le is not # available yet on Travis. - arch: ppc64le python: "3.12" diff --git a/common/doc-dev/2_localization.md b/common/doc-dev/2_localization.md index c1b77661e..67ead5706 100644 --- a/common/doc-dev/2_localization.md +++ b/common/doc-dev/2_localization.md @@ -35,8 +35,9 @@ GitHub. 6. Scan `py`-files for modified source strings via `./update_language_files.py source`. 7. Check via `git status` or `git diff`. The file `messages.pot` and all `po`-files should be modified. 8. Commit. -9. Create PR and merge into "dev". -10. Weblate ["Repository maintenance"](https://translate.codeberg.org/projects/backintime/#repository): +9. Optional: Check for redundant letters in "shortcut groups" via `./update_language_files.py shortcuts`. +10. Create PR and merge into "dev". +11. Weblate ["Repository maintenance"](https://translate.codeberg.org/projects/backintime/#repository): 1. Go to "Danger zone" and click on "Reset". 2. "Unlock" the project. diff --git a/update_language_files.py b/update_language_files.py index 2606a1975..8b40433af 100755 --- a/update_language_files.py +++ b/update_language_files.py @@ -30,6 +30,8 @@ PACKAGE_NAME = 'Back In Time' PACKAGE_VERSION = Path('VERSION').read_text('utf-8').strip() BUG_ADDRESS = 'https://github.com/bit-team/backintime' +# RegEx pattern: Character & followed by a word character (extract as group) +REX_SHORTCUT_LETTER = re.compile(r'&(\w)') def update_po_template(): @@ -104,7 +106,15 @@ def update_po_language_files(): f'{po_path}', f'{TEMPLATE_PO}' ] + run(cmd, check=True) + # remove obsolete entries ("#~ msgid) + cmd = [ + 'msgattrib', + '--no-obsolete', + f'--output-file={po_path}', + f'{po_path}' + ] run(cmd, check=True) @@ -278,7 +288,7 @@ def create_language_names_dict(language_codes: list) -> dict: 'Can not import package "babel". Please install it.') from exc # Source language (English) should be included - if not 'en' in language_codes: + if 'en' not in language_codes: language_codes.append('en') # Don't use defaultdict because pprint can't handle it @@ -324,59 +334,138 @@ def update_language_names() -> dict: return languages.names -def check_shortcuts(): - """Keyboard shortcuts are indicated via the & in front of a character - in an GUI string (e.g. a button or tab). As an example '&Exclude' and - '&Export' do not work because both of them indicate the 'E' as a - shortcut. +def get_shortcut_entries(po_file: polib.POFile) -> list[polib.POEntry]: + """Return list of po-file entries using a shortcut indicator ("&") + and are not obsolete. + """ + result = filter(lambda entry: entry.obsolete == 0 and + REX_SHORTCUT_LETTER.search(entry.msgid), po_file) + + return list(result) + + +def get_shortcut_groups() -> dict[str, list]: + """Return the currently used "shortcut groups" and validate if they are + up to date with the source strings in "messages.pot". + + Returns: + A dictionarie indexed by group names with list of source strings. + + Raises: + ValueError: If the shortcut indicator using source strings are + modified. + """ + + # Get all entries using a shortcut indicator + real = get_shortcut_entries(polib.pofile(TEMPLATE_PO)) + # Reduce to their source strings + real = [entry.msgid for entry in real] + + # Later this list is sliced into multiple groups + expect = [ + # Main window (menu bar) + '&Backup', + '&Restore', + '&Help', + # Manage profiles dialog (tabs) + '&General', + '&Include', + '&Exclude', + '&Auto-remove', + '&Options', + 'E&xpert Options', + ] - These situation can happen in translated strings and is not easy to - review or control. This function tries to find such redundancies in - the po-files. + # Plausibility check: + # Difference between the real and expected strings indicate + # modifications in the GUI and in the shortcut groups. + if not sorted(real) == sorted(expect): + # This will happen when the source strings are somehow modified or + # some strings add or removed. + # SOLUTION: Look again into the GUI and its commit history what was + # modified. Update the "expect" list to it. + raise ValueError( + f'Source strings with GUI shortcuts in {TEMPLATE_PO} are not as ' + 'expected.\n' + f' Expected: {sorted(expect)}\n' + f' Real: {sorted(real)}') + + # WORKAROUND + # This source string is not a translateble string but has a shortcut + # letter. + # Dev note: From point of view of the translators it might make sense + # making that string translatable also. But then we risk that our projects + # name is translated for real. + expect = ['Back In &Time'] + expect + + return {'mainwindow': expect[:4], 'manageprofile': expect[4:]} - Review the output with care because it there is a high risk of false - positive warnings. + +def check_shortcuts(): + """Check for redundant used letters as shortcut indicators in translated + GUI strings. + + Keyboard shortcuts are indicated via the & in front of a character + in a GUI string (e.g. a button or tab). For example "B&ackup" can be + activated with pressing ALT+A. As another example the strings '&Exclude' + and '&Export' used in the same area of the GUI won't work because both of + them indicate the 'E' as a shortcut. They need to be unique. + + These situation can happen in translated strings in most cases translators + are not aware of that feature or problem. It is nearly impossible to + control this on the level of the translation platform. """ - # RegEx pattern: & followed by a word character (as group) - rex = re.compile(r'&(\w)') + groups = get_shortcut_groups() # each po file in the repository for po_path in list(LOCAL_DIR.rglob('**/*.po')): - print(f'\nProcessing {po_path}...') + + print(f'******* {po_path} *******') + # Remember shortcut relevant entries. - msgs = {} + real = {key: [] for key in groups} + + # WORKAROUND. See get_shortcut_groups() for details. + real['mainwindow'].append('Back In &Time') - # All characters used as shortcuts. 'T' is used in "Back In &Time" - # which is an untranslated string. - shortcuts = 'T' + # Entries using shortcut indicators + shortcut_entries = get_shortcut_entries(polib.pofile(po_path)) - # each entry in po-file - for entry in polib.pofile(po_path): + # Group the entries to their shortcut groups + for entry in shortcut_entries: + for groupname in real: + if entry.msgid in groups[groupname]: + real[groupname].append(entry.msgstr) - # Ignore untranslated or obsolete strings - if not entry.msgstr or entry.obsolete: - continue + # Each shortcut group... + for groupname in real: - # Source string contain "&" - if rex.search(entry.msgid): - # Collect the source string and its translation - msgs[entry.msgid] = entry.msgstr + # All shortcut letters used in that group + letters = '' - # Get shortcut character from translated string + # Collect letters + for trans in real[groupname]: try: - shortcuts = shortcuts + rex.search(entry.msgstr).groups()[0] + letters = letters \ + + REX_SHORTCUT_LETTER.search(trans).groups()[0] except AttributeError: - print('ATTENTION: Maybe missing shortcut in translated ' - f'string.\nmsgid={entry.msgid}\n' - f'msgstr={entry.msgstr}') - - # redundant shortcuts? - if len(shortcuts) > len(set(shortcuts)): - print(f'ATTENTION: Maybe redundant shortcuts in "{po_path}". ' - 'Please take a look.') - for key, msgs in msgs.items(): - print(f'{key}: {msgs}') + pass + + # Redundant shortcuts? set() do remove duplicates + if len(letters) > len(set(letters)): + err_msg = f'Maybe redundant shortcuts in "{po_path}".' + + # Missing shortcuts in translated strings? + if len(letters) < len(real[groupname]): + err_msg = err_msg + ' Maybe missing ones.' + + err_msg = f'{err_msg} Please take a look.\n' \ + f' Group: {groupname}\n' \ + f' Source: {groups[groupname]}\n' \ + f' Translation: {real[groupname]}' + + print(err_msg) if __name__ == '__main__':