diff --git a/contrib/devtools/README.md b/contrib/devtools/README.md index 3d1024c7a5..04fa02484f 100644 --- a/contrib/devtools/README.md +++ b/contrib/devtools/README.md @@ -120,18 +120,6 @@ If there are 'unsupported' symbols, the return value will be 1 a list like this .../64/test_bitcoin: symbol std::out_of_range::~out_of_range() from unsupported version GLIBCXX_3.4.15 .../64/test_bitcoin: symbol _ZNSt8__detail15_List_nod from unsupported version GLIBCXX_3.4.15 -update-translations.py -====================== - -Run this script from the root of the repository to update all translations from transifex. -It will do the following automatically: - -- fetch all translations -- post-process them into valid and committable format -- add missing translations to the build system (TODO) - -See doc/translation-process.md for more information. - circular-dependencies.py ======================== diff --git a/contrib/devtools/update-translations.py b/contrib/devtools/update-translations.py deleted file mode 100755 index 1b9d3a4c27..0000000000 --- a/contrib/devtools/update-translations.py +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) 2014 Wladimir J. van der Laan -# Distributed under the MIT software license, see the accompanying -# file COPYING or http://www.opensource.org/licenses/mit-license.php. -''' -Run this script from the root of the repository to update all translations from -transifex. -It will do the following automatically: - -- fetch all translations using the tx tool -- post-process them into valid and committable format - - remove invalid control characters - - remove location tags (makes diffs less noisy) - -TODO: -- auto-add new translations to the build system according to the translation process -''' -import subprocess -import re -import sys -import os -import io -import xml.etree.ElementTree as ET - -# Name of transifex tool -TX = 'tx' -# Name of source language file -SOURCE_LANG = 'bitcoin_en.ts' -# Directory with locale files -LOCALE_DIR = 'src/qt/locale' -# Minimum number of messages for translation to be considered at all -MIN_NUM_MESSAGES = 10 -# Regexp to check for Bitcoin addresses -ADDRESS_REGEXP = re.compile('([13]|bc1)[a-zA-Z0-9]{30,}') - -def check_at_repository_root(): - if not os.path.exists('.git'): - print('No .git directory found') - print('Execute this script at the root of the repository', file=sys.stderr) - sys.exit(1) - -def fetch_all_translations(): - if subprocess.call([TX, 'pull', '-f', '-a']): - print('Error while fetching translations', file=sys.stderr) - sys.exit(1) - -def find_format_specifiers(s): - '''Find all format specifiers in a string.''' - pos = 0 - specifiers = [] - while True: - percent = s.find('%', pos) - if percent < 0: - break - specifiers.append(s[percent+1]) - pos = percent+2 - return specifiers - -def split_format_specifiers(specifiers): - '''Split format specifiers between numeric (Qt) and others (strprintf)''' - numeric = [] - other = [] - for s in specifiers: - if s in {'1','2','3','4','5','6','7','8','9'}: - numeric.append(s) - else: - other.append(s) - - # If both numeric format specifiers and "others" are used, assume we're dealing - # with a Qt-formatted message. In the case of Qt formatting (see https://doc.qt.io/qt-5/qstring.html#arg) - # only numeric formats are replaced at all. This means "(percentage: %1%)" is valid, without needing - # any kind of escaping that would be necessary for strprintf. Without this, this function - # would wrongly detect '%)' as a printf format specifier. - if numeric: - other = [] - - # numeric (Qt) can be present in any order, others (strprintf) must be in specified order - return set(numeric),other - -def sanitize_string(s): - '''Sanitize string for printing''' - return s.replace('\n',' ') - -def check_format_specifiers(source, translation, errors, numerus): - source_f = split_format_specifiers(find_format_specifiers(source)) - # assert that no source messages contain both Qt and strprintf format specifiers - # if this fails, go change the source as this is hacky and confusing! - assert(not(source_f[0] and source_f[1])) - try: - translation_f = split_format_specifiers(find_format_specifiers(translation)) - except IndexError: - errors.append("Parse error in translation for '%s': '%s'" % (sanitize_string(source), sanitize_string(translation))) - return False - else: - if source_f != translation_f: - if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1: - # Allow numerus translations to omit %n specifier (usually when it only has one possible value) - return True - errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation))) - return False - return True - -def all_ts_files(suffix=''): - for filename in os.listdir(LOCALE_DIR): - # process only language files, and do not process source language - if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix: - continue - if suffix: # remove provided suffix - filename = filename[0:-len(suffix)] - filepath = os.path.join(LOCALE_DIR, filename) - yield(filename, filepath) - -FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]') -def remove_invalid_characters(s): - '''Remove invalid characters from translation string''' - return FIX_RE.sub(b'', s) - -# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for -# comparison, disable by default) -_orig_escape_cdata = None -def escape_cdata(text): - text = _orig_escape_cdata(text) - text = text.replace("'", ''') - text = text.replace('"', '"') - return text - -def contains_bitcoin_addr(text, errors): - if text is not None and ADDRESS_REGEXP.search(text) is not None: - errors.append('Translation "%s" contains a bitcoin address. This will be removed.' % (text)) - return True - return False - -def postprocess_translations(reduce_diff_hacks=False): - print('Checking and postprocessing...') - - if reduce_diff_hacks: - global _orig_escape_cdata - _orig_escape_cdata = ET._escape_cdata - ET._escape_cdata = escape_cdata - - for (filename,filepath) in all_ts_files(): - os.rename(filepath, filepath+'.orig') - - have_errors = False - for (filename,filepath) in all_ts_files('.orig'): - # pre-fixups to cope with transifex output - parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8' - with open(filepath + '.orig', 'rb') as f: - data = f.read() - # remove control characters; this must be done over the entire file otherwise the XML parser will fail - data = remove_invalid_characters(data) - tree = ET.parse(io.BytesIO(data), parser=parser) - - # iterate over all messages in file - root = tree.getroot() - for context in root.findall('context'): - for message in context.findall('message'): - numerus = message.get('numerus') == 'yes' - source = message.find('source').text - translation_node = message.find('translation') - # pick all numerusforms - if numerus: - translations = [i.text for i in translation_node.findall('numerusform')] - else: - translations = [translation_node.text] - - for translation in translations: - if translation is None: - continue - errors = [] - valid = check_format_specifiers(source, translation, errors, numerus) and not contains_bitcoin_addr(translation, errors) - - for error in errors: - print('%s: %s' % (filename, error)) - - if not valid: # set type to unfinished and clear string if invalid - translation_node.clear() - translation_node.set('type', 'unfinished') - have_errors = True - - # Remove location tags - for location in message.findall('location'): - message.remove(location) - - # Remove entire message if it is an unfinished translation - if translation_node.get('type') == 'unfinished': - context.remove(message) - - # check if document is (virtually) empty, and remove it if so - num_messages = 0 - for context in root.findall('context'): - for message in context.findall('message'): - num_messages += 1 - if num_messages < MIN_NUM_MESSAGES: - print('Removing %s, as it contains only %i messages' % (filepath, num_messages)) - continue - - # write fixed-up tree - # if diff reduction requested, replace some XML to 'sanitize' to qt formatting - if reduce_diff_hacks: - out = io.BytesIO() - tree.write(out, encoding='utf-8') - out = out.getvalue() - out = out.replace(b' />', b'/>') - with open(filepath, 'wb') as f: - f.write(out) - else: - tree.write(filepath, encoding='utf-8') - return have_errors - -if __name__ == '__main__': - check_at_repository_root() - fetch_all_translations() - postprocess_translations() - diff --git a/doc/translation_process.md b/doc/translation_process.md index b9a10b6527..7212105f35 100644 --- a/doc/translation_process.md +++ b/doc/translation_process.md @@ -65,9 +65,9 @@ username = USERNAME The Transifex Bitcoin project config file is included as part of the repo. It can be found at `.tx/config`, however you shouldn’t need to change anything. ### Synchronising translations -To assist in updating translations, we have created a script to help. +To assist in updating translations, a helper script is available in the [maintainer-tools repo](https://github.com/bitcoin-core/bitcoin-maintainer-tools). -1. `python contrib/devtools/update-translations.py` +1. `python3 ../bitcoin-maintainer-tools/update-translations.py` 2. `git add` new translations from `src/qt/locale/` 3. Update `src/qt/bitcoin_locale.qrc` manually or via ```bash