Merge pull request #480 from axif0/pyICU

(New) Fixed pyICU capabilities for emoji functionalities
scribe-org · Oct 24, 2024 · 328d916 · 328d916
2 parents c13f50e + c35989f
commit 328d916
Show file tree

Hide file tree

Showing 3 changed files with 264 additions and 34 deletions.
diff --git a/src/scribe_data/check/check_pyicu.py b/src/scribe_data/check/check_pyicu.py
@@ -0,0 +1,211 @@
+"""
+Check to see if the requirements of the emoji process are installed.
+
+.. raw:: html
+    <!--
+    * Copyright (C) 2024 Scribe
+    *
+    * This program is free software: you can redistribute it and/or modify
+    * it under the terms of the GNU General Public License as published by
+    * the Free Software Foundation, either version 3 of the License, or
+    * (at your option) any later version.
+    *
+    * This program is distributed in the hope that it will be useful,
+    * but WITHOUT ANY WARRANTY; without even the implied warranty of
+    * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    * GNU General Public License for more details.
+    *
+    * You should have received a copy of the GNU General Public License
+    * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+    -->
+"""
+
+import os
+import platform  # added to check the OS
+import subprocess
+import sys
+from pathlib import Path
+
+import pkg_resources
+import requests
+
+
+def check_if_pyicu_installed():
+    installed_packages = {pkg.key for pkg in pkg_resources.working_set}
+
+    return "pyicu" in installed_packages
+
+
+def get_python_version_and_architecture():
+    """
+    Get the current Python version and architecture.
+
+    Returns
+    -------
+        python_version : str
+            The Python version in the format 'cpXY'.
+
+        architecture : str
+            The architecture type ('amd64' or 'win32').
+    """
+    version = sys.version_info
+    python_version = f"cp{version.major}{version.minor}"
+    architecture = "win_amd64" if sys.maxsize > 2**32 else "win32"
+    return python_version, architecture
+
+
+def fetch_wheel_releases():
+    """
+    Fetch the release data for PyICU from GitHub.
+
+    Returns
+    -------
+        available_wheels : list
+            A list of tuples containing wheel file names and their download URLs.
+
+        total_size_mb : float
+            The total size of all available wheels in MB.
+    """
+    url = "https://api.github.com/repos/cgohlke/pyicu-build/releases"
+    response = requests.get(url)
+    response.raise_for_status()  # raise an error for bad responses
+
+    available_wheels = []
+    total_size_bytes = 0
+
+    for release in response.json():
+        for asset in release["assets"]:
+            if asset["name"].endswith(".whl"):
+                available_wheels.append((asset["name"], asset["browser_download_url"]))
+                total_size_bytes += asset["size"]
+
+    total_size_mb = total_size_bytes / (1024 * 1024)  # convert bytes to MB
+    return available_wheels, total_size_mb
+
+
+def download_wheel_file(wheel_url, output_dir):
+    """
+    Download the wheel file from the given URL.
+
+    Parameters
+    ----------
+        wheel_url : str
+            The URL of the wheel file to download.
+
+        output_dir : str
+            The directory to save the downloaded file.
+
+    Returns
+    -------
+        str : path to the downloaded wheel file.
+    """
+    response = requests.get(wheel_url)
+    response.raise_for_status()  # raise an error for bad responses
+
+    wheel_filename = os.path.basename(wheel_url)
+    wheel_path = os.path.join(output_dir, wheel_filename)
+
+    with open(wheel_path, "wb") as wheel_file:
+        wheel_file.write(response.content)
+
+    return wheel_path
+
+
+def find_matching_wheel(wheels, python_version, architecture):
+    """
+    Find the matching wheel file based on Python version and architecture.
+
+    Parameters
+    ----------
+        wheels : list
+            The list of available wheels.
+
+        python_version : str
+            The Python version (e.g., 'cp311').
+
+        architecture : str
+            The architecture type (e.g., 'win_amd64').
+
+    Returns
+    -------
+        str : The download URL of the matching wheel or None if not found.
+    """
+    return next(
+        (
+            download_url
+            for name, download_url in wheels
+            if python_version in name and architecture in name
+        ),
+        None,
+    )
+
+
+def check_and_install_pyicu():
+    package_name = "PyICU"
+    installed_packages = {pkg.key for pkg in pkg_resources.working_set}
+    if package_name.lower() not in installed_packages:
+        # print(f"{package_name} not found. Installing...")
+
+        # Fetch available wheels from GitHub to estimate download size.
+        wheels, total_size_mb = fetch_wheel_releases()
+
+        print(
+            f"{package_name} is not installed.\nIt will be downloaded from 'https://github.com/repos/cgohlke/pyicu'"
+            f"\nApproximately {total_size_mb:.2f} MB will be downloaded.\nDo you want to proceed? (Y/n)?"
+        )
+
+        user_input = input().strip().lower()
+        if user_input in ["", "y", "yes"]:
+            print("Proceeding with installation...")
+
+        else:
+            print("Installation aborted by the user.")
+            return False
+
+        # Check the operating system.
+        if platform.system() != "Windows":
+            # If not Windows, directly use pip to install PyICU.
+            try:
+                subprocess.run(
+                    [sys.executable, "-m", "pip", "install", package_name], check=True
+                )
+                print(f"{package_name} has been installed successfully.")
+
+            except subprocess.CalledProcessError as e:
+                print(f"Error occurred while installing {package_name}: {e}")
+                return False
+
+        else:
+            # Windows-specific installation using wheel files.
+            python_version, architecture = get_python_version_and_architecture()
+
+            # Find the matching wheel for the current Python version and architecture.
+            wheel_url = find_matching_wheel(wheels, python_version, architecture)
+
+            if not wheel_url:
+                print(
+                    "No matching wheel file found for your Python version and architecture."
+                )
+                return False
+
+            # Download the wheel file.
+            output_dir = Path.cwd()  # use the current directory for simplicity
+            wheel_path = download_wheel_file(wheel_url, output_dir)
+
+            # Install PyICU using pip.
+            try:
+                subprocess.run(
+                    [sys.executable, "-m", "pip", "install", wheel_path],
+                    check=True,
+                )
+                print(f"{package_name} has been installed successfully.")
+
+                # Remove the downloaded wheel file.
+                os.remove(wheel_path)
+                print(f"Removed temporary file: {wheel_path}")
+
+            except subprocess.CalledProcessError as e:
+                print(f"Error occurred while installing {package_name}: {e}")
+                return False
+
+    return True
diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
@@ -160,12 +160,3 @@ def get_data(
 
         if interactive:
             return True
-
-    # Handle emoji keywords process failure.
-    elif data_type in {"emoji-keywords", "emoji_keywords"}:
-        print(
-            "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
-        )
-        print(
-            "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n"
-        )
diff --git a/src/scribe_data/unicode/generate_emoji_keywords.py b/src/scribe_data/unicode/generate_emoji_keywords.py
@@ -23,6 +23,10 @@
 import os
 from pathlib import Path
 
+from scribe_data.check.check_pyicu import (
+    check_and_install_pyicu,
+    check_if_pyicu_installed,
+)
 from scribe_data.unicode.process_unicode import gen_emoji_lexicon
 from scribe_data.utils import export_formatted_data, get_language_iso
 
@@ -31,29 +35,53 @@
 
 
 def generate_emoji(language, output_dir: str = None):
-    iso = get_language_iso(language=language)
-    path_to_cldr_annotations = (
-        Path(__file__).parent / "cldr-annotations-full" / "annotations"
-    )
-    if iso in os.listdir(path_to_cldr_annotations):
-        print(f"Emoji Generation for language {language} is supported")
-
-    else:
-        print(f"Emoji Generation for language {language} is not supported")
-        return
-
-    updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
-    export_dir = Path(updated_path) / language.capitalize()
-    export_dir.mkdir(parents=True, exist_ok=True)
-
-    if emoji_keywords_dict := gen_emoji_lexicon(
-        language=language,
-        emojis_per_keyword=EMOJI_KEYWORDS_DICT,
-    ):
-        export_formatted_data(
-            file_path=output_dir,
-            formatted_data=emoji_keywords_dict,
-            query_data_in_use=True,
-            language=language,
-            data_type=DATA_TYPE,
+    """
+    Generates emoji keywords for a specified language and exports the data to the given directory.
+
+    This function first checks and installs the PyICU package, which is necessary for the script to run.
+    If the installation is successful, it proceeds with generating emoji keywords based on the specified language.
+    The results are then exported to the provided output directory.
+
+    Parameters
+    ----------
+        language : str
+            The ISO code of the language for which to generate emoji keywords.
+
+        output_dir : str, optional
+            The directory where the generated data will be saved.
+            If not specified, the data will be saved in a default directory.
+
+    Returns
+    -------
+        None: The function does not return any value but outputs data to the specified directory.
+    """
+    if check_and_install_pyicu() and check_if_pyicu_installed() is False:
+        print("Thank you.")
+
+    if check_if_pyicu_installed():
+        iso = get_language_iso(language=language)
+        path_to_cldr_annotations = (
+            Path(__file__).parent / "cldr-annotations-full" / "annotations"
         )
+        if iso in os.listdir(path_to_cldr_annotations):
+            print(f"Emoji Generation for language {language} is supported")
+
+        else:
+            print(f"Emoji Generation for language {language} is not supported")
+            return
+
+        updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
+        export_dir = Path(updated_path) / language.capitalize()
+        export_dir.mkdir(parents=True, exist_ok=True)
+
+        if emoji_keywords_dict := gen_emoji_lexicon(
+            language=language,
+            emojis_per_keyword=EMOJI_KEYWORDS_DICT,
+        ):
+            export_formatted_data(
+                file_path=output_dir,
+                formatted_data=emoji_keywords_dict,
+                query_data_in_use=True,
+                language=language.capitalize(),
+                data_type=DATA_TYPE,
+            )