From 697c4887a0c07ae78578b2eb56566e802804831c Mon Sep 17 00:00:00 2001
From: Nicolas Lance <60974685+ml4nC3@users.noreply.github.com>
Date: Wed, 26 Apr 2023 18:53:13 +0200
Subject: [PATCH] feat: add script to extract, sync, check, and integrate new
 translations (#814)

fix #813

Co-authored-by: nicolas <lance.nicolas@gmail.com>
Co-authored-by: Nathan Lovato <12694995+NathanLovato@users.noreply.github.com>
---
 i18n/check_and_integrate_translations.py | 184 +++++++++++++++++++++++
 i18n/extract.py                          |  24 ++-
 requirements.txt                         |   3 +
 3 files changed, 197 insertions(+), 14 deletions(-)
 create mode 100644 i18n/check_and_integrate_translations.py
 create mode 100644 requirements.txt

diff --git a/i18n/check_and_integrate_translations.py b/i18n/check_and_integrate_translations.py
new file mode 100644
index 00000000..3370ab0e
--- /dev/null
+++ b/i18n/check_and_integrate_translations.py
@@ -0,0 +1,184 @@
+import os
+import sys
+import argparse
+import subprocess
+import shutil
+import glob
+from match_and_merge_po_translations import parse_po_file
+from dataclasses import dataclass
+
+
+@dataclass
+class TranslationsData:
+    language_code: str
+    directory_path: str
+    total_strings: int = 0
+    missing_translations: int = 0
+    fuzzy_translations: int = 0
+
+    _completion_rate: int = -1
+
+    def get_completion_rate(self) -> int:
+        """Returns the completion rate of the language with an int between 0 and 100, with 100 representing 100% completion."""
+        if self._completion_rate == -1:
+            todo_count = self.missing_translations + self.fuzzy_translations
+
+            self._completion_rate = 100 - (100 * todo_count // self.total_strings)
+
+        return self._completion_rate
+
+
+@dataclass
+class Args:
+    translations_path: str
+    threshold: int
+    skip_extract: bool
+    skip_sync: bool
+
+
+def parse_command_line_arguments() -> Args:
+    """
+    Parses command line arguments and returns a dictionary containing options and arguments.
+    """
+    parser = argparse.ArgumentParser(
+        description="This script performs text extraction from the application and move "
+        "generated POT files in the translations project in order to compare "
+        "with translations source and output a translation completion "
+        "indicator for each language."
+    )
+    parser.add_argument(
+        "translations_path",
+        help="Relative or absolute path to the repository learn-gdscript-translations/.",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        type=int,
+        default=95,
+        help="Minimum completion percentage value for a language to be integrated.",
+    )
+    parser.add_argument(
+        "-E",
+        "--skip-extract",
+        action="store_true",
+        help="Skip the extraction of strings and POT files generation.",
+    )
+    parser.add_argument(
+        "-S",
+        "--skip-sync",
+        action="store_true",
+        help="Skip the synchronization and merge of PO files with the reference POT files.",
+    )
+    return Args(**vars(parser.parse_args()))
+
+
+def main():
+    args = parse_command_line_arguments()
+
+    I18N_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
+
+    if args.skip_extract:
+        print("WARN: Skipping strings extraction and POT files generation...")
+    else:
+        print(
+            "INFO: Extracting translation strings from learn-gdscript source code and generating POT files..."
+        )
+        EXTRACT_SCRIPT_PATH = os.path.join(I18N_DIRECTORY, "extract.py")
+        result = subprocess.run(["python3", EXTRACT_SCRIPT_PATH], capture_output=True)
+
+        # if there was an error, print stdout and stderr and exit
+        if result.returncode > 0:
+            sys.exit(
+                "ERROR: Extraction scripts ended with errors. Aborting script.\n"
+                f"Script {EXTRACT_SCRIPT_PATH} output the following errors:"
+                f"Error code: {result.returncode}"
+                f"\n{result.stderr.decode('utf-8')}"
+            )
+
+        print("INFO: Moving POT files to translations folder")
+
+        pot_files = glob.glob("*.pot", root_dir=I18N_DIRECTORY)
+        for pot_file in pot_files:
+            shutil.move(
+                os.path.join(I18N_DIRECTORY, pot_file),
+                os.path.join(args.translations_path, pot_file),
+            )
+
+    # Updating PO files with sync_translations.py
+    sync_translations_script_path = os.path.join(
+        args.translations_path, "sync_translations.py"
+    )
+    if args.skip_sync:
+        print("WARN: Skipping PO files merging with POT...")
+    else:
+        print("INFO: Running synchronization script")
+        subprocess.run(["python3", sync_translations_script_path])
+
+    # Parsing and Analyzing PO files
+    print("INFO: Parsing PO files and counting missing translations")
+    languages_directories = []
+    for name in os.listdir(args.translations_path):
+        folder_path = os.path.join(args.translations_path, name)
+        if os.path.isdir(folder_path) and name not in [".git", "images"]:
+            languages_directories.append(folder_path)
+
+    translation_datas = []
+    for language_directory in languages_directories:
+        po_files = [os.path.join(language_directory, file) for file in glob.glob("*.po", root_dir=language_directory)]
+        parsed_po_files = list(map(parse_po_file, po_files))
+        data = TranslationsData(
+            language_code=os.path.basename(language_directory),
+            directory_path=language_directory,
+            total_strings=0,
+            missing_translations=0,
+            fuzzy_translations=0,
+        )
+
+        # Iterating through PO files entries in order to count missing and fuzzy translations
+        for po_file in parsed_po_files:
+            data.total_strings += len(po_file.entries)
+            for entry in po_file.entries:
+                if entry.msgstr == "" and entry.msgid != "":
+                    # Case 1 : entry has no translated string whereas it has an id value
+                    data.missing_translations += 1
+                elif entry.is_fuzzy:
+                    # Case 2 : entry has a translation, but it is tagged as fuzzy by msgmerge
+                    data.fuzzy_translations += 1
+
+        if data.total_strings > 0:
+            translation_datas.append(data)
+
+    # Computing translations indicator values
+    print("INFO: Computing completion indicator for each language.")
+
+    # Sorting and Outputting results
+    for data in sorted(
+        translation_datas, key=lambda data: data.get_completion_rate(), reverse=True
+    ):
+        print(
+            f"Language : {data.language_code} - translations are {data.get_completion_rate()}% complete, "
+            f"including {data.fuzzy_translations} fuzzy and {data.missing_translations} missing translations."
+        )
+
+    languages_to_integrate = [
+        data
+        for data in translation_datas
+        if data.get_completion_rate() >= args.threshold
+    ]
+    # Integrating translations in GD_Learn project
+    if languages_to_integrate:
+        print(
+            f"INFO: Integrating languages with translations above {args.threshold}% complete."
+        )
+        for data in languages_to_integrate:
+            print("Copying ", data.language_code)
+            destination = os.path.join(I18N_DIRECTORY, data.language_code)
+            if os.path.exists(destination):
+                shutil.rmtree(destination)
+            shutil.copytree(data.directory_path, destination)
+    else:
+        print("WARN: No language complete enough to be integrated.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/i18n/extract.py b/i18n/extract.py
index 984a80b7..6fb0a97f 100644
--- a/i18n/extract.py
+++ b/i18n/extract.py
@@ -116,22 +116,18 @@ def extract_babel_and_write(
         strip_comment_tags=False,
     )
 
-    # (filename, lineno, translation string, comments, context)
-    for message_data in extractor:
-        message_id = message_data[2]
+    # (filename, lineno, message, comments, context)
+    for message in extractor:
+        message_id = message[2]
         message_id = message_id.replace("\r\n", "\n")
 
-        # We split blocks into individual translation strings to make updating
-        # translations easier.
-        for line in message_id.split("\n"):
-            if line.strip() != "":
-                catalog.add(
-                    id=line,
-                    string="",
-                    locations=[(message_data[0], message_data[1])],
-                    auto_comments=message_data[3],
-                    context=message_data[4],
-                )
+        catalog.add(
+            id=message_id,
+            string="",
+            locations=[(message[0], message[1])],
+            auto_comments=message[3],
+            context=message[4],
+        )
 
     with open(output_file, "wb") as file:
         pofile.write_po(
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..2bf868ca
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+babel
+babel-godot
+datargs
\ No newline at end of file