Skip to content

Commit

Permalink
Build: Validation of shortcut letters in translations take groups int…
Browse files Browse the repository at this point in the history
…o account

Validation of shortcut letters in translations distinguish between "shortcut groups".

Two "shortcut groups" exist in Back In Time: 1) the menu bar entries in the main window and 2) the tabs in the manage profiles dialog. The script check for rudandant use of shortcut letters inside that groups (e.g. "B" in "&Backup" and "&Back In Time") and report it.

Additionally updating po files from py-sources now remove obsolete strings.
  • Loading branch information
buhtz authored Feb 8, 2024
1 parent c65fc91 commit 9ea0d80
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 43 deletions.
2 changes: 1 addition & 1 deletion .codespellrc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ uri-ignore-words-list=mitre
# Good to know about allowed/ignored words:
# Codespell acts a bit unusual when it comes to case-sensitivity.
# By default the word "Manuel" is an error and codespell recommends to
# modify it into "Manual". Tu allow this German name "Manuel" we have to
# modify it into "Manual". To allow this German name "Manuel" we have to
# add "manual" (lower case!) to the "ignore-words-list". The upper-case
# version do not work.
# See: https://github.com/codespell-project/codespell/issues/3210
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- python: "3.9"
- python: "3.10"
- python: "3.11"
# Excluding temporarily because Pyhton 3.12 for ppc64le is not
# Excluding temporarily because Python 3.12 for ppc64le is not
# available yet on Travis.
- arch: ppc64le
python: "3.12"
Expand Down
5 changes: 3 additions & 2 deletions common/doc-dev/2_localization.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ GitHub.
6. Scan `py`-files for modified source strings via `./update_language_files.py source`.
7. Check via `git status` or `git diff`. The file `messages.pot` and all `po`-files should be modified.
8. Commit.
9. Create PR and merge into "dev".
10. Weblate ["Repository maintenance"](https://translate.codeberg.org/projects/backintime/#repository):
9. Optional: Check for redundant letters in "shortcut groups" via `./update_language_files.py shortcuts`.
10. Create PR and merge into "dev".
11. Weblate ["Repository maintenance"](https://translate.codeberg.org/projects/backintime/#repository):
1. Go to "Danger zone" and click on "Reset".
2. "Unlock" the project.

Expand Down
167 changes: 128 additions & 39 deletions update_language_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
PACKAGE_NAME = 'Back In Time'
PACKAGE_VERSION = Path('VERSION').read_text('utf-8').strip()
BUG_ADDRESS = 'https://github.com/bit-team/backintime'
# RegEx pattern: Character & followed by a word character (extract as group)
REX_SHORTCUT_LETTER = re.compile(r'&(\w)')


def update_po_template():
Expand Down Expand Up @@ -104,7 +106,15 @@ def update_po_language_files():
f'{po_path}',
f'{TEMPLATE_PO}'
]
run(cmd, check=True)

# remove obsolete entries ("#~ msgid)
cmd = [
'msgattrib',
'--no-obsolete',
f'--output-file={po_path}',
f'{po_path}'
]
run(cmd, check=True)


Expand Down Expand Up @@ -278,7 +288,7 @@ def create_language_names_dict(language_codes: list) -> dict:
'Can not import package "babel". Please install it.') from exc

# Source language (English) should be included
if not 'en' in language_codes:
if 'en' not in language_codes:
language_codes.append('en')

# Don't use defaultdict because pprint can't handle it
Expand Down Expand Up @@ -324,59 +334,138 @@ def update_language_names() -> dict:
return languages.names


def check_shortcuts():
"""Keyboard shortcuts are indicated via the & in front of a character
in an GUI string (e.g. a button or tab). As an example '&Exclude' and
'&Export' do not work because both of them indicate the 'E' as a
shortcut.
def get_shortcut_entries(po_file: polib.POFile) -> list[polib.POEntry]:
"""Return list of po-file entries using a shortcut indicator ("&")
and are not obsolete.
"""
result = filter(lambda entry: entry.obsolete == 0 and
REX_SHORTCUT_LETTER.search(entry.msgid), po_file)

return list(result)


def get_shortcut_groups() -> dict[str, list]:
"""Return the currently used "shortcut groups" and validate if they are
up to date with the source strings in "messages.pot".
Returns:
A dictionarie indexed by group names with list of source strings.
Raises:
ValueError: If the shortcut indicator using source strings are
modified.
"""

# Get all entries using a shortcut indicator
real = get_shortcut_entries(polib.pofile(TEMPLATE_PO))
# Reduce to their source strings
real = [entry.msgid for entry in real]

# Later this list is sliced into multiple groups
expect = [
# Main window (menu bar)
'&Backup',
'&Restore',
'&Help',
# Manage profiles dialog (tabs)
'&General',
'&Include',
'&Exclude',
'&Auto-remove',
'&Options',
'E&xpert Options',
]

These situation can happen in translated strings and is not easy to
review or control. This function tries to find such redundancies in
the po-files.
# Plausibility check:
# Difference between the real and expected strings indicate
# modifications in the GUI and in the shortcut groups.
if not sorted(real) == sorted(expect):
# This will happen when the source strings are somehow modified or
# some strings add or removed.
# SOLUTION: Look again into the GUI and its commit history what was
# modified. Update the "expect" list to it.
raise ValueError(
f'Source strings with GUI shortcuts in {TEMPLATE_PO} are not as '
'expected.\n'
f' Expected: {sorted(expect)}\n'
f' Real: {sorted(real)}')

# WORKAROUND
# This source string is not a translateble string but has a shortcut
# letter.
# Dev note: From point of view of the translators it might make sense
# making that string translatable also. But then we risk that our projects
# name is translated for real.
expect = ['Back In &Time'] + expect

return {'mainwindow': expect[:4], 'manageprofile': expect[4:]}

Review the output with care because it there is a high risk of false
positive warnings.

def check_shortcuts():
"""Check for redundant used letters as shortcut indicators in translated
GUI strings.
Keyboard shortcuts are indicated via the & in front of a character
in a GUI string (e.g. a button or tab). For example "B&ackup" can be
activated with pressing ALT+A. As another example the strings '&Exclude'
and '&Export' used in the same area of the GUI won't work because both of
them indicate the 'E' as a shortcut. They need to be unique.
These situation can happen in translated strings in most cases translators
are not aware of that feature or problem. It is nearly impossible to
control this on the level of the translation platform.
"""

# RegEx pattern: & followed by a word character (as group)
rex = re.compile(r'&(\w)')
groups = get_shortcut_groups()

# each po file in the repository
for po_path in list(LOCAL_DIR.rglob('**/*.po')):
print(f'\nProcessing {po_path}...')

print(f'******* {po_path} *******')

# Remember shortcut relevant entries.
msgs = {}
real = {key: [] for key in groups}

# WORKAROUND. See get_shortcut_groups() for details.
real['mainwindow'].append('Back In &Time')

# All characters used as shortcuts. 'T' is used in "Back In &Time"
# which is an untranslated string.
shortcuts = 'T'
# Entries using shortcut indicators
shortcut_entries = get_shortcut_entries(polib.pofile(po_path))

# each entry in po-file
for entry in polib.pofile(po_path):
# Group the entries to their shortcut groups
for entry in shortcut_entries:
for groupname in real:
if entry.msgid in groups[groupname]:
real[groupname].append(entry.msgstr)

# Ignore untranslated or obsolete strings
if not entry.msgstr or entry.obsolete:
continue
# Each shortcut group...
for groupname in real:

# Source string contain "&"
if rex.search(entry.msgid):
# Collect the source string and its translation
msgs[entry.msgid] = entry.msgstr
# All shortcut letters used in that group
letters = ''

# Get shortcut character from translated string
# Collect letters
for trans in real[groupname]:
try:
shortcuts = shortcuts + rex.search(entry.msgstr).groups()[0]
letters = letters \
+ REX_SHORTCUT_LETTER.search(trans).groups()[0]
except AttributeError:
print('ATTENTION: Maybe missing shortcut in translated '
f'string.\nmsgid={entry.msgid}\n'
f'msgstr={entry.msgstr}')

# redundant shortcuts?
if len(shortcuts) > len(set(shortcuts)):
print(f'ATTENTION: Maybe redundant shortcuts in "{po_path}". '
'Please take a look.')
for key, msgs in msgs.items():
print(f'{key}: {msgs}')
pass

# Redundant shortcuts? set() do remove duplicates
if len(letters) > len(set(letters)):
err_msg = f'Maybe redundant shortcuts in "{po_path}".'

# Missing shortcuts in translated strings?
if len(letters) < len(real[groupname]):
err_msg = err_msg + ' Maybe missing ones.'

err_msg = f'{err_msg} Please take a look.\n' \
f' Group: {groupname}\n' \
f' Source: {groups[groupname]}\n' \
f' Translation: {real[groupname]}'

print(err_msg)


if __name__ == '__main__':
Expand Down

0 comments on commit 9ea0d80

Please sign in to comment.