From 24e0d3794654c46c1e905ec2131bdd39193f79da Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 17:20:34 +0200 Subject: [PATCH 001/112] 1rst commit for pypi packaging --- .env.example | 23 ---- .github/workflows/build-package.yaml | 56 +++++++++ .github/workflows/publish-package.yaml | 32 +++++ pyproject.toml | 29 +++++ src/{ => auto_po_lyglot}/__init__.py | 0 src/{ => auto_po_lyglot}/base.py | 17 ++- src/{ => auto_po_lyglot}/claude_client.py | 2 +- src/{ => auto_po_lyglot}/csv_extractor.py | 2 +- src/{ => auto_po_lyglot}/getenv.py | 52 +++----- .../openai_ollama_client.py | 2 +- src/{ => auto_po_lyglot}/po_main.py | 13 +- src/test_main.py | 119 ------------------ tests/__init__.py | 0 tests/settings.py | 66 ++++++++++ tests/test_main.py | 74 +++++++++++ 15 files changed, 296 insertions(+), 191 deletions(-) create mode 100644 .github/workflows/build-package.yaml create mode 100644 .github/workflows/publish-package.yaml create mode 100644 pyproject.toml rename src/{ => auto_po_lyglot}/__init__.py (100%) rename src/{ => auto_po_lyglot}/base.py (87%) rename src/{ => auto_po_lyglot}/claude_client.py (96%) rename src/{ => auto_po_lyglot}/csv_extractor.py (98%) rename src/{ => auto_po_lyglot}/getenv.py (68%) rename src/{ => auto_po_lyglot}/openai_ollama_client.py (97%) rename src/{ => auto_po_lyglot}/po_main.py (89%) delete mode 100755 src/test_main.py create mode 100644 tests/__init__.py create mode 100644 tests/settings.py create mode 100755 tests/test_main.py diff --git a/.env.example b/.env.example index c280465..f250e3d 100644 --- a/.env.example +++ b/.env.example @@ -102,26 +102,3 @@ Provide only the {target_language} translation." # Here you choose which prompt couple to use SYSTEM_PROMPT=${SYSTEM_PROMPT1} USER_PROMPT=${USER_PROMPT1} - -#####################################################################################" -# This section is used for testing purposes (used in test_main.py) -#####################################################################################" - -# Where tests results will be stored. Can be overriden on the command line -OUTPUT_DIRECTORY="./tests/output" -# Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing -# Can be overriden on the command line with one (and only one) original phrase and its context translation -TEST_TRANSLATIONS="[ - {\"original_phrase\": \"She broke down\", \"context_translation\": \"Elle est tombée en panne\"}, - {\"original_phrase\": \"She broke down\", \"context_translation\": \"Elle s'est effondrée\"}, - {\"original_phrase\": \"bank\", \"context_translation\": \"rive\"}, - {\"original_phrase\": \"bank\", \"context_translation\": \"banque\"}, - {\"original_phrase\": \"He saw the light.\", \"context_translation\": \"Il a compris.\"}, - {\"original_phrase\": \"He saw the light.\", \"context_translation\": \"Il a vu la lumière.\"}, - {\"original_phrase\": \"She made a call.\", \"context_translation\": \"Elle a passé un appel.\"}, - {\"original_phrase\": \"She made a call.\", \"context_translation\": \"Elle a pris une décision.\"}, - {\"original_phrase\": \"They left the room.\", \"context_translation\": \"Ils ont quitté la pièce.\"}, - {\"original_phrase\": \"They left the room.\", \"context_translation\": \"Ils ont laissé la pièce en l'état.\"}, - {\"original_phrase\": \"He gave her a ring.\", \"context_translation\": \"Il lui a donné une bague.\"}, - {\"original_phrase\": \"He gave her a ring.\", \"context_translation\": \"Il lui a passé un coup de fil.\"} -]" diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml new file mode 100644 index 0000000..13153e7 --- /dev/null +++ b/.github/workflows/build-package.yaml @@ -0,0 +1,56 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + pip install -r requirements.txt + # to allow dev package testing + pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + - name: Make envfile + uses: SpicyPizza/create-envfile@v2.0.3 + with: + envkey_VERBOSE: true # optional + envkey_INPUT_PO: "tests/input/input.po" + envkey_ORIGINAL_LANGUAGE: "English" + envkey_CONTEXT_LANGUAGE: "French" + envkey_TARGET_LANGUAGES: "Italian" + envkey_LLM_CLIENT: "ollama" + envkey_LLM_MODEL: "gemma2:2b" + envkey_OLLAMA_BASE_URL: "http://localhost:11434/v1" + envkey_SYSTEM_PROMPT: $SYSTEM_PROMPT + envkey_USER_PROMPT: $USER_PROMPT + directory: tests + file_name: .env + fail_on_empty: false + sort_keys: false + - name: Test with pytest + run: | + pytest -s tests/ \ No newline at end of file diff --git a/.github/workflows/publish-package.yaml b/.github/workflows/publish-package.yaml new file mode 100644 index 0000000..4be1ad6 --- /dev/null +++ b/.github/workflows/publish-package.yaml @@ -0,0 +1,32 @@ +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_SECRET_TOKEN }} + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e23824d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "auto-po-lyglot" +version = "1.0.0" +#dynamic = ["version"] +authors = [ + { name="Olivier LEVILLAIN", email="levillain.olivier@gmail.com" }, +] +description = "A Python package to translate po files using AI" +readme = "README.md" +requires-python = ">=3.10" +dependencies=["polib>=1.2.0", "openai>=1.12.0", "python-dotenv>=1.0.1", "anthropic>=0.34.1", "langcodes>=3.4.0"] +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 4 - Beta", + +] +keywords = ["python", "poet", "translation", "llm", "AI"] + +[project.urls] +Homepage = "https://github.com/leolivier/transpo" +Repository = "https://github.com/leolivier/transpo.git" +Issues = "https://github.com/leolivier/transpo/issues" +Download = "https://github.com/leolivier/transpo/archive/refs/tags/v1.0.0.tar.gz" \ No newline at end of file diff --git a/src/__init__.py b/src/auto_po_lyglot/__init__.py similarity index 100% rename from src/__init__.py rename to src/auto_po_lyglot/__init__.py diff --git a/src/base.py b/src/auto_po_lyglot/base.py similarity index 87% rename from src/base.py rename to src/auto_po_lyglot/base.py index 3bf7b6a..4dbbbad 100644 --- a/src/base.py +++ b/src/auto_po_lyglot/base.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod import logging from os import environ +import sys class TranspoException(Exception): @@ -63,12 +64,24 @@ def get_user_prompt(self, phrase, context_translation): } return format.format(**params) + def process_translation(self, raw_result): + translation_result = raw_result.split('\n') + translation = translation_result[0].strip(' "') + explanation = 'Not provided' + if len(translation_result) > 1: + translation_result.pop(0) + translation_result = [line for line in translation_result if line] + explanation = '\n'.join(translation_result) + + return translation, explanation + def translate(self, phrase, context_translation): if self.target_language is None: raise TranspoException("Error:target_language must be set before trying to translate anything") system_prompt = self.get_system_prompt() user_prompt = self.get_user_prompt(phrase, context_translation) - return self.get_translation(system_prompt, user_prompt) + raw_result = self.get_translation(system_prompt, user_prompt) + return self.process_translation(raw_result) class Logger(): @@ -81,7 +94,7 @@ def vprint(self, *args, **kwargs): """Print only if verbose is set""" if self.verbose_mode: print(*args, **kwargs) - # sys.stdout.flush() + sys.stdout.flush() def info(self, *args, **kwargs): self.logger.info(*args, **kwargs) diff --git a/src/claude_client.py b/src/auto_po_lyglot/claude_client.py similarity index 96% rename from src/claude_client.py rename to src/auto_po_lyglot/claude_client.py index 54a4a35..e57c9c3 100644 --- a/src/claude_client.py +++ b/src/auto_po_lyglot/claude_client.py @@ -1,5 +1,5 @@ from anthropic import Anthropic -from base import TranspoClient, TranspoException, Logger +from .base import TranspoClient, TranspoException, Logger logger = Logger(__name__) diff --git a/src/csv_extractor.py b/src/auto_po_lyglot/csv_extractor.py similarity index 98% rename from src/csv_extractor.py rename to src/auto_po_lyglot/csv_extractor.py index 822dc51..d0ed55e 100755 --- a/src/csv_extractor.py +++ b/src/auto_po_lyglot/csv_extractor.py @@ -4,7 +4,7 @@ import csv import sys import os -from base import Logger +from .base import Logger logger = Logger(__name__) diff --git a/src/getenv.py b/src/auto_po_lyglot/getenv.py similarity index 68% rename from src/getenv.py rename to src/auto_po_lyglot/getenv.py index 181165f..408c6d1 100755 --- a/src/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -1,10 +1,8 @@ #!/usr/bin/env python -import sys from dotenv import load_dotenv from os import environ -import json import argparse -from base import Logger +from .base import Logger logger = Logger(__name__) @@ -38,8 +36,12 @@ def parse_args(self, additional_args=None): type=str, help='the language into which the original phrase will be translated') parser.add_argument('--verbose', action='store_true', help='verbose mode') - for arg in additional_args: - parser.add_argument(arg.get('arg'), type=arg.get('type'), help=arg.get('help')) + if additional_args: + for arg in additional_args: + if arg.get('action'): + parser.add_argument(arg.get('arg'), action=arg.get('action'), help=arg.get('help')) + else: + parser.add_argument(arg.get('arg'), type=arg.get('type'), help=arg.get('help')) # Analyze the arguments return parser.parse_args() @@ -70,48 +72,28 @@ def __init__(self, additional_args=None): else: self.test_target_languages = environ.get('TARGET_LANGUAGES', 'Spanish').split(',') - # semi specific management for testing and for po files - for argument in additional_args: - arg = argument.get('arg')[2:] - # some ambiguous original sentences and their context translations for testing - if arg == 'original_phrase': - if args.original_phrase: - if not hasattr(args, 'context_translation'): - print("Error: context_translation must be set when original_phrase is set") - sys.exit(1) - - self.translations_testset = [{"original_phrase": args.original_phrase, - "context_translation": args.context_translation}] - else: - TEST_TRANSLATIONS = environ.get( - 'TEST_TRANSLATIONS', - """[{"original_phrase": "He gave her a ring.", "context_translation": "Il lui a donné une bague."}]""") - try: - self.translations_testset = json.loads(TEST_TRANSLATIONS) - except json.decoder.JSONDecodeError: - print("Error: TEST_TRANSLATIONS must be a valid JSON array\n", TEST_TRANSLATIONS) - sys.exit(1) - logger.debug(self.translations_testset) - elif arg == 'context_translation': - continue # already processed with original_phrase - else: - # for all other arguments, generic processing + # generic processing of additional arguments + if additional_args: + for argument in additional_args: + arg = argument.get('arg') + while arg.startswith('-'): + arg = arg[1:] val = getattr(args, arg) or environ.get(argument.get('env'), argument.get('default', None)) setattr(self, arg, val) def get_client(self): match self.llm_client: case 'ollama': - from openai_ollama_client import OllamaClient as LLMClient + from .openai_ollama_client import OllamaClient as LLMClient case 'openai': # uses OpenAI GPT-4o by default - from openai_ollama_client import OpenAIClient as LLMClient + from .openai_ollama_client import OpenAIClient as LLMClient case 'claude': # uses Claude Sonnet 3.5 by default - from claude_client import ClaudeClient as LLMClient + from .claude_client import ClaudeClient as LLMClient case 'claude_cached': # uses Claude Sonnet 3.5, cached mode for long system prompts - from claude_client import CachedClaudeClient as LLMClient + from .claude_client import CachedClaudeClient as LLMClient case _: raise Exception( f"LLM_CLIENT must be one of 'ollama', 'openai', 'claude' or 'claude_cached', not '{self.llm_client}'" diff --git a/src/openai_ollama_client.py b/src/auto_po_lyglot/openai_ollama_client.py similarity index 97% rename from src/openai_ollama_client.py rename to src/auto_po_lyglot/openai_ollama_client.py index d2051c4..6d9e176 100644 --- a/src/openai_ollama_client.py +++ b/src/auto_po_lyglot/openai_ollama_client.py @@ -1,4 +1,4 @@ -from base import TranspoClient, TranspoException +from .base import TranspoClient, TranspoException from openai import OpenAI diff --git a/src/po_main.py b/src/auto_po_lyglot/po_main.py similarity index 89% rename from src/po_main.py rename to src/auto_po_lyglot/po_main.py index 0fd907a..0295baa 100755 --- a/src/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -from getenv import TranspoParams +from .getenv import TranspoParams from pathlib import Path import polib -from base import Logger +from .base import Logger import langcodes logger = Logger(__name__) @@ -99,13 +99,8 @@ def main(): if entry.msgid and not entry.fuzzy: context_translation = entry.msgstr if entry.msgstr else entry.msgid original_phrase = entry.msgid - translation_result = client.translate(original_phrase, context_translation).split('\n') - translation = translation_result[0].strip('"') - explanation = 'Not provided' - if len(translation_result) > 1: - translation_result.pop(0) - translation_result = [line for line in translation_result if line] - explanation = '\n'.join(translation_result) + translation, explanation = client.translate(original_phrase, context_translation).split('\n') + if explanation: entry.comment = explanation # Update translation entry.msgstr = translation diff --git a/src/test_main.py b/src/test_main.py deleted file mode 100755 index bdf13b2..0000000 --- a/src/test_main.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -from getenv import TranspoParams -from base import Logger - -logger = Logger(__name__) - - -class TestParams(TranspoParams): - description = """ -Generates a translation file using a given model and llm type. It reads the parameters from the command line, -and completes them when necessary from the content of .env in the same directory. -It iterates over a list of test translations containing the original phrase and its translation -within a context language, and for each target language, translates the original phrase -into the target language helped with the context translation, by using the provided client and -prompt implementation.""" - - -def get_outfile_name(model_name, params): - """ - Generates a unique output file name based on the given model name. - - Args: - model_name (str): The name of the model. - - Returns: - Path: A unique output file name in the format "{model_name}_output{i}.md". - """ - from pathlib import Path - p = Path(params.output_dir) - logger.vprint("Output directory:", p) - if not p.is_dir(): - raise ValueError(f"Output directory {p} does not exist.") - basefile_name = f"{model_name.replace(':', '-')}_output%i.md" - i = 0 - while True: - outfile_name = p / (basefile_name % i) - if not outfile_name.exists(): - logger.vprint("Output file:", outfile_name) - return outfile_name - i += 1 - - -def extract_csv_translations(output_file, params): - from csv_extractor import process_file - from pathlib import PurePath - import sys - csv_file = PurePath(output_file).with_suffix('.csv') - if not output_file.exists(): - print(f"Error: Input file '{output_file}' does not exist.") - sys.exit(1) - languages = [params.original_language, params.context_language] + params.test_target_languages - process_file(output_file, csv_file, languages) - logger.vprint("CSV extracted to file:", csv_file) - - -def main(): - """ - This is the main function of the program. It generates a translation file using a given model. - It iterates over a list of test translations containing the original phrase and its translation - within a context language, and for each target language, translates the original phrase - into the target language helped with the context translation, by using the provided client and - prompt implementation. - The translations are then written to an output file and printed to the console. - - Parameters: - None - - Returns: - None - """ - additional_args = [ - { - 'arg': '--output_dir', - 'env': 'OUTPUT_DIRECTORY', - 'type': str, - 'help': 'the directory where the output files will be stored', - 'default': '.' - }, - { - 'arg': '--original_phrase', - 'env': 'ORIGINAL_PHRASE', - 'type': str, - 'help': 'the sentence to be translated (otherwise, taken from .env). ' - 'If this is provided, context_translation is required', - 'default': None - }, - { - 'arg': '--context_translation', - 'env': 'CONTEXT_TRANSLATION', - 'type': str, - 'help': 'the context translation related to the original phrase (otherwise, taken from .env)', - 'default': None - } - ] - - params = TestParams(additional_args) - - client = params.get_client() - - logger.vprint(f"Using model {client.params.model} for {params.original_language} -> {params.context_language} -> " - f"{params.test_target_languages} with an {params.llm_client} client") - outfile_name = get_outfile_name(client.params.model, params) - with outfile_name.open('w', newline='', encoding='utf-8') as outfile: - for tr in params.translations_testset: - for target_language in params.test_target_languages: - client.target_language = target_language - out = f""" -================= -{params.original_language}: "{tr['original_phrase']}", {params.context_language}: "{tr['context_translation']}", {target_language}: """ # noqa - logger.vprint(out, end='') - translation = client.translate(tr['original_phrase'], tr['context_translation']) - logger.vprint(translation) - outfile.write(out + translation) - outfile.close() - extract_csv_translations(outfile_name, params) - - -if __name__ == "__main__": - main() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/settings.py b/tests/settings.py new file mode 100644 index 0000000..3d66fe8 --- /dev/null +++ b/tests/settings.py @@ -0,0 +1,66 @@ +# Where tests results will be stored. Can be overriden on the command line +OUTPUT_DIRECTORY = "./tests/output" + +# Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing +TEST_TRANSLATIONS = [ + { + "original_phrase": "She broke down", + "context_translation": "Elle est tombée en panne", + "target_translation": "Lei si è guastata" + }, + { + "original_phrase": "She broke down", + "context_translation": "Elle s'est effondrée", + "target_translation": "Lei si è sbandita" + }, + { + "original_phrase": "bank", + "context_translation": "rive", + "target_translation": "la banca" + }, + { + "original_phrase": "bank", + "context_translation": "banque", + "target_translation": "Banca" + }, + { + "original_phrase": "He saw the light.", + "context_translation": "Il a compris.", + "target_translation": "Lui è capitato la luce." + }, + { + "original_phrase": "He saw the light.", + "context_translation": "Il a vu la lumière.", + "target_translation": "Lui è stata vista la luce." + }, + { + "original_phrase": "She made a call.", + "context_translation": "Elle a passé un appel.", + "target_translation": "Lei ha fatto una chiamata." + }, + { + "original_phrase": "She made a call.", + "context_translation": "Elle a pris une décision.", + "target_translation": "Lei ha preso una decisione." + }, + { + "original_phrase": "They left the room.", + "context_translation": "Ils ont quitté la pièce.", + "target_translation": "Si sono andati dalla stanza." + }, + { + "original_phrase": "They left the room.", + "context_translation": "Ils ont laissé la pièce en l'état.", + "target_translation": "Si hanno lasciato la stanza." + }, + { + "original_phrase": "He gave her a ring.", + "context_translation": "Il lui a donné une bague.", + "target_translation": "Lui ha regalato un anello." + }, + { + "original_phrase": "He gave her a ring.", + "context_translation": "Il lui a passé un coup de fil.", + "target_translation": "Lui ha regalato un anello." + } + ] diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100755 index 0000000..b49c943 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +from auto_po_lyglot.getenv import TranspoParams +from auto_po_lyglot.base import Logger +from auto_po_lyglot.csv_extractor import process_file +from .settings import OUTPUT_DIRECTORY, TEST_TRANSLATIONS +from pathlib import PurePath, Path +import sys +import pytest + +logger = Logger(__name__) + + +@pytest.fixture(scope="class") +def params(): + return TranspoParams([ + {'arg': 'testdir', 'type': str, 'help': 'test directory'}, + {'arg': '-s', 'action': 'store_true', 'help': 'don\'t capture outputs'}, + ]) + + +@pytest.fixture(scope="class") +def llm_client(params): + return params.get_client() + + +@pytest.fixture(scope="class") +def output_file(llm_client): + p = Path(OUTPUT_DIRECTORY) + logger.vprint("Output directory:", p) + if not p.is_dir(): + raise ValueError(f"Output directory {p} does not exist.") + basefile_name = f"{llm_client.params.model.replace(':', '-')}_output%i.md" + i = 0 + while True: + outfile_name = p / (basefile_name % i) + if not outfile_name.exists(): + logger.vprint("Output file:", outfile_name) + return outfile_name + i += 1 + + +def extract_csv_translations(output_file, params): + csv_file = PurePath(output_file).with_suffix('.csv') + if not output_file.exists(): + print(f"Error: Input file '{output_file}' does not exist.") + sys.exit(1) + languages = [params.original_language, params.context_language] + params.test_target_languages + process_file(output_file, csv_file, languages) + logger.vprint("CSV extracted to file:", csv_file) + + +class TestTranspo: + @pytest.fixture(autouse=True, scope="class") + def setup(self, params, llm_client, output_file): + pass + + def test_main(self, params, llm_client, output_file): + + logger.vprint(f"Using model {llm_client.params.model} for {params.original_language} -> {params.context_language} -> " + f"{params.test_target_languages} with an {params.llm_client} client") + with output_file.open('w', newline='', encoding='utf-8') as outfile: + for target_language in params.test_target_languages: + llm_client.target_language = target_language + for tr in TEST_TRANSLATIONS: + out = f""" +================= +{params.original_language}: "{tr['original_phrase']}", {params.context_language}: "{tr['context_translation']}", {target_language}: """ # noqa + logger.vprint(out, end='') + translation, explanation = llm_client.translate(tr['original_phrase'], tr['context_translation']) + logger.vprint(translation) + outfile.write(f'{out} {translation}\n{explanation}\n\n') + assert translation == tr['target_translation'] + outfile.close() + extract_csv_translations(output_file, params) From 7eb20277f3869540a540b2b7500cbe833bfe7bcb Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 18:54:03 +0200 Subject: [PATCH 002/112] add ollama server --- .github/workflows/build-package.yaml | 10 +++++++--- .github/workflows/run-ollama.yaml | 29 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/run-ollama.yaml diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 13153e7..3206e2d 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -10,10 +10,14 @@ on: branches: [ "main" ] jobs: - build: - runs-on: ubuntu-latest + start-ollama-server: + uses: ./.github/workflows/run-ollama.yml + with: + model: phi3 + build: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python 3.10 @@ -43,7 +47,7 @@ jobs: envkey_CONTEXT_LANGUAGE: "French" envkey_TARGET_LANGUAGES: "Italian" envkey_LLM_CLIENT: "ollama" - envkey_LLM_MODEL: "gemma2:2b" + envkey_LLM_MODEL: "phi3" envkey_OLLAMA_BASE_URL: "http://localhost:11434/v1" envkey_SYSTEM_PROMPT: $SYSTEM_PROMPT envkey_USER_PROMPT: $USER_PROMPT diff --git a/.github/workflows/run-ollama.yaml b/.github/workflows/run-ollama.yaml new file mode 100644 index 0000000..c481793 --- /dev/null +++ b/.github/workflows/run-ollama.yaml @@ -0,0 +1,29 @@ +name: reusable Ollama Setup + +on: + workflow_call: + inputs: + model: + required: true + type: string + description: "Name of Ollama model to be used" + +jobs: + setup-ollama: + runs-on: ubuntu-latest + steps: + - name: Install Ollama + run: | + curl -fsSL https://ollama.ai/install.sh | sh + ollama --version + + - name: Start Ollama service + run: | + ollama serve & + sleep 10 # Wait for service to start + + - name: Pull Ollama model + run: ollama pull ${{ inputs.model }} + + outputs: + ollama_ready: "true" \ No newline at end of file From ad0c6991caec0b4cfb1780464ce6687b56a61a68 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 18:55:13 +0200 Subject: [PATCH 003/112] added test branch --- .github/workflows/build-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 3206e2d..4a4731b 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ "main" ] + branches: [ "main", "pypi-packaging" ] pull_request: branches: [ "main" ] From 8d24206744989f9ae682be960234d5f65ab83c83 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 18:56:44 +0200 Subject: [PATCH 004/112] typo --- .github/workflows/build-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 4a4731b..454f5c7 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: jobs: start-ollama-server: - uses: ./.github/workflows/run-ollama.yml + uses: ./.github/workflows/run-ollama.yaml with: model: phi3 From e4f96cdce981ca15f57df0924750ee62058866f5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 18:59:22 +0200 Subject: [PATCH 005/112] missing output dir creation --- .github/workflows/build-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 454f5c7..f97d93c 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -57,4 +57,5 @@ jobs: sort_keys: false - name: Test with pytest run: | + mkdir -p tests/output pytest -s tests/ \ No newline at end of file From 8d0ea807b65bf6cb419358b0b714a891230d354c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:06:30 +0200 Subject: [PATCH 006/112] wrong variable usage --- .github/workflows/build-package.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index f97d93c..38cce3f 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -11,10 +11,10 @@ on: jobs: - start-ollama-server: - uses: ./.github/workflows/run-ollama.yaml - with: - model: phi3 + # start-ollama-server: + # uses: ./.github/workflows/run-ollama.yaml + # with: + # model: phi3 build: runs-on: ubuntu-latest @@ -49,8 +49,8 @@ jobs: envkey_LLM_CLIENT: "ollama" envkey_LLM_MODEL: "phi3" envkey_OLLAMA_BASE_URL: "http://localhost:11434/v1" - envkey_SYSTEM_PROMPT: $SYSTEM_PROMPT - envkey_USER_PROMPT: $USER_PROMPT + envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} + envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} directory: tests file_name: .env fail_on_empty: false From c26c0307edd4efad69d428d0f4354536594269ce Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:09:38 +0200 Subject: [PATCH 007/112] traces to understand --- .github/workflows/build-package.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 38cce3f..e80e34a 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -58,4 +58,7 @@ jobs: - name: Test with pytest run: | mkdir -p tests/output + [[ -f .env ]] && cat .env || echo "No .env file found in root" + [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" + echo "SYSTEM_PROMPT=${{ vars.SYSTEM_PROMPT }}" pytest -s tests/ \ No newline at end of file From 8a9ebdcde55bf0435406417376df5d3396c18d2c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:37:10 +0200 Subject: [PATCH 008/112] create the settings file for the 1rst run --- tests/test_main.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index b49c943..17349a2 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -63,12 +63,20 @@ def test_main(self, params, llm_client, output_file): llm_client.target_language = target_language for tr in TEST_TRANSLATIONS: out = f""" -================= -{params.original_language}: "{tr['original_phrase']}", {params.context_language}: "{tr['context_translation']}", {target_language}: """ # noqa + {{ + "original_phrase": "{tr['original_phrase']}", # {params.original_language} + "context_translation": "{tr['context_translation']}", # {params.context_language} + "target_translation": """ logger.vprint(out, end='') translation, explanation = llm_client.translate(tr['original_phrase'], tr['context_translation']) - logger.vprint(translation) - outfile.write(f'{out} {translation}\n{explanation}\n\n') - assert translation == tr['target_translation'] + comment = explanation.replace('\n', '\n# ') + trans_exp = f"""{translation} # {target_language} + # {comment} + + }}, +""" + logger.vprint(trans_exp) + outfile.write(f'{out} {trans_exp}') + # assert translation == tr['target_translation'] outfile.close() extract_csv_translations(output_file, params) From 906d612341880d32d58d4bb32cc7a0ebe9bae918 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:37:24 +0200 Subject: [PATCH 009/112] more traces --- .github/workflows/build-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index e80e34a..687bb83 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -60,5 +60,5 @@ jobs: mkdir -p tests/output [[ -f .env ]] && cat .env || echo "No .env file found in root" [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - echo "SYSTEM_PROMPT=${{ vars.SYSTEM_PROMPT }}" + echo "vars.SYSTEM_PROMPT=${{ vars.SYSTEM_PROMPT }}" pytest -s tests/ \ No newline at end of file From 6f137687f0bd978ec26873d1f8cbfa00ae611b7f Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:40:23 +0200 Subject: [PATCH 010/112] tries to execute pytest in tests dir to see .env --- .github/workflows/build-package.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 687bb83..eb05b91 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -58,7 +58,6 @@ jobs: - name: Test with pytest run: | mkdir -p tests/output - [[ -f .env ]] && cat .env || echo "No .env file found in root" - [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - echo "vars.SYSTEM_PROMPT=${{ vars.SYSTEM_PROMPT }}" - pytest -s tests/ \ No newline at end of file + # [[ -f .env ]] && cat .env || echo "No .env file found in root" + # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" + cd tests && pytest -s ./ \ No newline at end of file From 22f75b3ff67012874cbc4105585f51ec87c7b715 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:42:13 +0200 Subject: [PATCH 011/112] supposes the test runs in tests directory --- tests/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/settings.py b/tests/settings.py index 3d66fe8..177139b 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -1,5 +1,5 @@ # Where tests results will be stored. Can be overriden on the command line -OUTPUT_DIRECTORY = "./tests/output" +OUTPUT_DIRECTORY = "./output" # Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing TEST_TRANSLATIONS = [ From 9f44860c3d7c19da58fc3eedf3c0b5c518fbf3be Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 19:51:24 +0200 Subject: [PATCH 012/112] more verbose --- src/auto_po_lyglot/getenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index 408c6d1..c550b7f 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -50,7 +50,7 @@ def __init__(self, additional_args=None): "looks at args and returns an object with attributes of these args completed by the environ variables where needed" args = self.parse_args(additional_args) - load_dotenv(override=True) + load_dotenv(verbose=logger.verbose_mode, override=True) self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) logger.set_verbose(self.verbose) From 5f24e0c83fa81fcbf353a96dcfdecc647bdabda3 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:03:15 +0200 Subject: [PATCH 013/112] force verbose loadenv --- src/auto_po_lyglot/getenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index c550b7f..d2b7fb9 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -50,7 +50,7 @@ def __init__(self, additional_args=None): "looks at args and returns an object with attributes of these args completed by the environ variables where needed" args = self.parse_args(additional_args) - load_dotenv(verbose=logger.verbose_mode, override=True) + load_dotenv(verbose=True, override=True) self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) logger.set_verbose(self.verbose) From 120f83049481e2e81b80e962fb940722b1b91100 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:15:48 +0200 Subject: [PATCH 014/112] use params in get_system_prompt also --- src/auto_po_lyglot/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 4dbbbad..ffcfb3d 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -34,16 +34,16 @@ def get_translation(self, phrase, context_translation): ... def get_system_prompt(self): - format = environ.get("SYSTEM_PROMPT", None) - logger.debug("system prompt format: ", format) + format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None if format is None: raise TranspoException("SYSTEM_PROMPT environment variable not set") - params = { + logger.debug("system prompt format: ", format) + prompt_params = { "original_language": self.params.original_language, "context_language": self.params.context_language, "target_language": self.target_language, } - system_prompt = format.format(**params) + system_prompt = format.format(**prompt_params) if self.first: logger.info("system prompt:\n", system_prompt) self.first = False From db03f9058117d9c9c673053f6db161a12d68b9a0 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:17:37 +0200 Subject: [PATCH 015/112] traces --- src/auto_po_lyglot/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index ffcfb3d..0565443 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -36,6 +36,7 @@ def get_translation(self, phrase, context_translation): def get_system_prompt(self): format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None if format is None: + print(self.params) raise TranspoException("SYSTEM_PROMPT environment variable not set") logger.debug("system prompt format: ", format) prompt_params = { From e21dbc805aad3b2f876da4a630111f0cc08a1ee3 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:20:12 +0200 Subject: [PATCH 016/112] more traces --- src/auto_po_lyglot/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 0565443..705ba84 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -37,7 +37,9 @@ def get_system_prompt(self): format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None if format is None: print(self.params) - raise TranspoException("SYSTEM_PROMPT environment variable not set") + # raise TranspoException("SYSTEM_PROMPT environment variable not set") + print("SYSTEM_PROMPT environment variable not set") + sys.exit(1) logger.debug("system prompt format: ", format) prompt_params = { "original_language": self.params.original_language, From 54c7775332ec8a6f71c460b960188183b87cbb12 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:22:06 +0200 Subject: [PATCH 017/112] more more traces --- src/auto_po_lyglot/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 705ba84..251aea0 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -36,7 +36,7 @@ def get_translation(self, phrase, context_translation): def get_system_prompt(self): format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None if format is None: - print(self.params) + print(self.params._dict) # raise TranspoException("SYSTEM_PROMPT environment variable not set") print("SYSTEM_PROMPT environment variable not set") sys.exit(1) From bc454cdbef3f185b5402ac7b3a7dd41acfb688b1 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:23:40 +0200 Subject: [PATCH 018/112] typo --- src/auto_po_lyglot/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 251aea0..1f33bca 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -36,7 +36,7 @@ def get_translation(self, phrase, context_translation): def get_system_prompt(self): format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None if format is None: - print(self.params._dict) + print(self.params.__dict__) # raise TranspoException("SYSTEM_PROMPT environment variable not set") print("SYSTEM_PROMPT environment variable not set") sys.exit(1) From 55e017cfc871e465c5d19a0d3d387fdd50cc43f5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:26:58 +0200 Subject: [PATCH 019/112] try another strategy for .env --- .github/workflows/build-package.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index eb05b91..a445deb 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -51,13 +51,13 @@ jobs: envkey_OLLAMA_BASE_URL: "http://localhost:11434/v1" envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} - directory: tests + directory: . file_name: .env fail_on_empty: false sort_keys: false - name: Test with pytest run: | mkdir -p tests/output - # [[ -f .env ]] && cat .env || echo "No .env file found in root" - # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - cd tests && pytest -s ./ \ No newline at end of file + [[ -f .env ]] && cat .env || echo "No .env file found in root" + [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" + pytest -s ./tests \ No newline at end of file From 6c9c620040f1261f61c73a95b27dfdfb2f414855 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Mon, 26 Aug 2024 20:29:12 +0200 Subject: [PATCH 020/112] adapt settings to new strategy --- tests/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/settings.py b/tests/settings.py index 177139b..3d66fe8 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -1,5 +1,5 @@ # Where tests results will be stored. Can be overriden on the command line -OUTPUT_DIRECTORY = "./output" +OUTPUT_DIRECTORY = "./tests/output" # Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing TEST_TRANSLATIONS = [ From 9364ade989ddb97a8191e4da9fe36d8b62661b22 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:39:58 +0200 Subject: [PATCH 021/112] added .venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1d5d999..8363bad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .env +.venv *.pyc __pycache__/ tests/output/ \ No newline at end of file From 6ab74f2654d4f7b0bc95bdd50874519b2e5d94e8 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:40:13 +0200 Subject: [PATCH 022/112] adapted to uv --- pyproject.toml | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e23824d..80e282b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,18 +12,29 @@ authors = [ description = "A Python package to translate po files using AI" readme = "README.md" requires-python = ">=3.10" -dependencies=["polib>=1.2.0", "openai>=1.12.0", "python-dotenv>=1.0.1", "anthropic>=0.34.1", "langcodes>=3.4.0"] +dependencies=[ + "polib>=1.2.0", + "openai>=1.12.0", + "python-dotenv>=1.0.1", + "anthropic>=0.34.1", + "langcodes>=3.4.0", + "pytest>=8.3.2", + "flake8>=7.1.1", +] classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Development Status :: 4 - Beta", - + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 4 - Beta", ] keywords = ["python", "poet", "translation", "llm", "AI"] +[tool.uv] +dev-dependencies = [ +] + [project.urls] Homepage = "https://github.com/leolivier/transpo" Repository = "https://github.com/leolivier/transpo.git" Issues = "https://github.com/leolivier/transpo/issues" -Download = "https://github.com/leolivier/transpo/archive/refs/tags/v1.0.0.tar.gz" \ No newline at end of file +Download = "https://github.com/leolivier/transpo/archive/refs/tags/v1.0.0.tar.gz" From 471a0ee573c7b54ef414317801021e70bf21647f Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:40:30 +0200 Subject: [PATCH 023/112] replaced by pyproject.toml --- requirements.txt | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 04522c1..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -polib>=1.2.0 -openai>=1.12.0 -python-dotenv>=1.0.1 -anthropic>=0.34.1 -langcodes>=3.4.0 \ No newline at end of file From 5396ed872cd1cc52754f9a1641af16b1e2f6df27 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:40:48 +0200 Subject: [PATCH 024/112] 1rst commit --- uv.lock | 738 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 uv.lock diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..9463678 --- /dev/null +++ b/uv.lock @@ -0,0 +1,738 @@ +version = 1 +requires-python = ">=3.10" +resolution-markers = [ + "python_full_version < '3.13'", + "python_full_version >= '3.13'", +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anthropic" +version = "0.34.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tokenizers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/e2/98ff733ff75c1d371c029fb27eb9308f9c8e694749cea70382338a8e7e88/anthropic-0.34.1.tar.gz", hash = "sha256:69e822bd7a31ec11c2edb85f2147e8f0ee0cfd3288fea70b0ca8808b2f9bf91d", size = 901462 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/1c/1ce9edec76885badebacb4e31d42acffbdfd30dbaa839d5c378d57ac9aa9/anthropic-0.34.1-py3-none-any.whl", hash = "sha256:2fa26710809d0960d970f26cd0be3686437250a481edb95c33d837aa5fa24158", size = 891537 }, +] + +[[package]] +name = "anyio" +version = "4.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "idna" }, + { name = "sniffio" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e6/e3/c4c8d473d6780ef1853d630d581f70d655b4f8d7553c6997958c283039a2/anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94", size = 163930 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/a2/10639a79341f6c019dedc95bd48a4928eed9f1d1197f4c04f546fc7ae0ff/anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7", size = 86780 }, +] + +[[package]] +name = "auto-po-lyglot" +version = "1.0.0" +source = { editable = "." } +dependencies = [ + { name = "anthropic" }, + { name = "flake8" }, + { name = "langcodes" }, + { name = "openai" }, + { name = "polib" }, + { name = "pytest" }, + { name = "python-dotenv" }, +] + +[package.metadata] +requires-dist = [ + { name = "anthropic", specifier = ">=0.34.1" }, + { name = "flake8", specifier = ">=7.1.1" }, + { name = "langcodes", specifier = ">=3.4.0" }, + { name = "openai", specifier = ">=1.12.0" }, + { name = "polib", specifier = ">=1.2.0" }, + { name = "pytest", specifier = ">=8.3.2" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, +] + +[[package]] +name = "certifi" +version = "2024.7.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/02/a95f2b11e207f68bc64d7aae9666fed2e2b3f307748d5123dffb72a1bbea/certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b", size = 164065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/d5/c84e1a17bf61d4df64ca866a1c9a913874b4e9bdc131ec689a0ad013fb36/certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90", size = 162960 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/09/c1bc53dab74b1816a00d8d030de5bf98f724c52c1635e07681d312f20be8/charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", size = 104809 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2b/61/095a0aa1a84d1481998b534177c8566fdc50bb1233ea9a0478cd3cc075bd/charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", size = 194219 }, + { url = "https://files.pythonhosted.org/packages/cc/94/f7cf5e5134175de79ad2059edf2adce18e0685ebdb9227ff0139975d0e93/charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", size = 122521 }, + { url = "https://files.pythonhosted.org/packages/46/6a/d5c26c41c49b546860cc1acabdddf48b0b3fb2685f4f5617ac59261b44ae/charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", size = 120383 }, + { url = "https://files.pythonhosted.org/packages/b8/60/e2f67915a51be59d4539ed189eb0a2b0d292bf79270410746becb32bc2c3/charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", size = 138223 }, + { url = "https://files.pythonhosted.org/packages/05/8c/eb854996d5fef5e4f33ad56927ad053d04dc820e4a3d39023f35cad72617/charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", size = 148101 }, + { url = "https://files.pythonhosted.org/packages/f6/93/bb6cbeec3bf9da9b2eba458c15966658d1daa8b982c642f81c93ad9b40e1/charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", size = 140699 }, + { url = "https://files.pythonhosted.org/packages/da/f1/3702ba2a7470666a62fd81c58a4c40be00670e5006a67f4d626e57f013ae/charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", size = 142065 }, + { url = "https://files.pythonhosted.org/packages/3f/ba/3f5e7be00b215fa10e13d64b1f6237eb6ebea66676a41b2bcdd09fe74323/charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", size = 144505 }, + { url = "https://files.pythonhosted.org/packages/33/c3/3b96a435c5109dd5b6adc8a59ba1d678b302a97938f032e3770cc84cd354/charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", size = 139425 }, + { url = "https://files.pythonhosted.org/packages/43/05/3bf613e719efe68fb3a77f9c536a389f35b95d75424b96b426a47a45ef1d/charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", size = 145287 }, + { url = "https://files.pythonhosted.org/packages/58/78/a0bc646900994df12e07b4ae5c713f2b3e5998f58b9d3720cce2aa45652f/charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", size = 149929 }, + { url = "https://files.pythonhosted.org/packages/eb/5c/97d97248af4920bc68687d9c3b3c0f47c910e21a8ff80af4565a576bd2f0/charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", size = 141605 }, + { url = "https://files.pythonhosted.org/packages/a8/31/47d018ef89f95b8aded95c589a77c072c55e94b50a41aa99c0a2008a45a4/charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", size = 142646 }, + { url = "https://files.pythonhosted.org/packages/ae/d5/4fecf1d58bedb1340a50f165ba1c7ddc0400252d6832ff619c4568b36cc0/charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", size = 92846 }, + { url = "https://files.pythonhosted.org/packages/a2/a0/4af29e22cb5942488cf45630cbdd7cefd908768e69bdd90280842e4e8529/charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", size = 100343 }, + { url = "https://files.pythonhosted.org/packages/68/77/02839016f6fbbf808e8b38601df6e0e66c17bbab76dff4613f7511413597/charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", size = 191647 }, + { url = "https://files.pythonhosted.org/packages/3e/33/21a875a61057165e92227466e54ee076b73af1e21fe1b31f1e292251aa1e/charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", size = 121434 }, + { url = "https://files.pythonhosted.org/packages/dd/51/68b61b90b24ca35495956b718f35a9756ef7d3dd4b3c1508056fa98d1a1b/charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", size = 118979 }, + { url = "https://files.pythonhosted.org/packages/e4/a6/7ee57823d46331ddc37dd00749c95b0edec2c79b15fc0d6e6efb532e89ac/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", size = 136582 }, + { url = "https://files.pythonhosted.org/packages/74/f1/0d9fe69ac441467b737ba7f48c68241487df2f4522dd7246d9426e7c690e/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", size = 146645 }, + { url = "https://files.pythonhosted.org/packages/05/31/e1f51c76db7be1d4aef220d29fbfa5dbb4a99165d9833dcbf166753b6dc0/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", size = 139398 }, + { url = "https://files.pythonhosted.org/packages/40/26/f35951c45070edc957ba40a5b1db3cf60a9dbb1b350c2d5bef03e01e61de/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", size = 140273 }, + { url = "https://files.pythonhosted.org/packages/07/07/7e554f2bbce3295e191f7e653ff15d55309a9ca40d0362fcdab36f01063c/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", size = 142577 }, + { url = "https://files.pythonhosted.org/packages/d8/b5/eb705c313100defa57da79277d9207dc8d8e45931035862fa64b625bfead/charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", size = 137747 }, + { url = "https://files.pythonhosted.org/packages/19/28/573147271fd041d351b438a5665be8223f1dd92f273713cb882ddafe214c/charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", size = 143375 }, + { url = "https://files.pythonhosted.org/packages/cf/7c/f3b682fa053cc21373c9a839e6beba7705857075686a05c72e0f8c4980ca/charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", size = 148474 }, + { url = "https://files.pythonhosted.org/packages/1e/49/7ab74d4ac537ece3bc3334ee08645e231f39f7d6df6347b29a74b0537103/charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", size = 140232 }, + { url = "https://files.pythonhosted.org/packages/2d/dc/9dacba68c9ac0ae781d40e1a0c0058e26302ea0660e574ddf6797a0347f7/charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", size = 140859 }, + { url = "https://files.pythonhosted.org/packages/6c/c2/4a583f800c0708dd22096298e49f887b49d9746d0e78bfc1d7e29816614c/charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", size = 92509 }, + { url = "https://files.pythonhosted.org/packages/57/ec/80c8d48ac8b1741d5b963797b7c0c869335619e13d4744ca2f67fc11c6fc/charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", size = 99870 }, + { url = "https://files.pythonhosted.org/packages/d1/b2/fcedc8255ec42afee97f9e6f0145c734bbe104aac28300214593eb326f1d/charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", size = 192892 }, + { url = "https://files.pythonhosted.org/packages/2e/7d/2259318c202f3d17f3fe6438149b3b9e706d1070fe3fcbb28049730bb25c/charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", size = 122213 }, + { url = "https://files.pythonhosted.org/packages/3a/52/9f9d17c3b54dc238de384c4cb5a2ef0e27985b42a0e5cc8e8a31d918d48d/charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", size = 119404 }, + { url = "https://files.pythonhosted.org/packages/99/b0/9c365f6d79a9f0f3c379ddb40a256a67aa69c59609608fe7feb6235896e1/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", size = 137275 }, + { url = "https://files.pythonhosted.org/packages/91/33/749df346e93d7a30cdcb90cbfdd41a06026317bfbfb62cd68307c1a3c543/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", size = 147518 }, + { url = "https://files.pythonhosted.org/packages/72/1a/641d5c9f59e6af4c7b53da463d07600a695b9824e20849cb6eea8a627761/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", size = 140182 }, + { url = "https://files.pythonhosted.org/packages/ee/fb/14d30eb4956408ee3ae09ad34299131fb383c47df355ddb428a7331cfa1e/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", size = 141869 }, + { url = "https://files.pythonhosted.org/packages/df/3e/a06b18788ca2eb6695c9b22325b6fde7dde0f1d1838b1792a0076f58fe9d/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", size = 144042 }, + { url = "https://files.pythonhosted.org/packages/45/59/3d27019d3b447a88fe7e7d004a1e04be220227760264cc41b405e863891b/charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", size = 138275 }, + { url = "https://files.pythonhosted.org/packages/7b/ef/5eb105530b4da8ae37d506ccfa25057961b7b63d581def6f99165ea89c7e/charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", size = 144819 }, + { url = "https://files.pythonhosted.org/packages/a2/51/e5023f937d7f307c948ed3e5c29c4b7a3e42ed2ee0b8cdf8f3a706089bf0/charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", size = 149415 }, + { url = "https://files.pythonhosted.org/packages/24/9d/2e3ef673dfd5be0154b20363c5cdcc5606f35666544381bee15af3778239/charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", size = 141212 }, + { url = "https://files.pythonhosted.org/packages/5b/ae/ce2c12fcac59cb3860b2e2d76dc405253a4475436b1861d95fe75bdea520/charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", size = 142167 }, + { url = "https://files.pythonhosted.org/packages/ed/3a/a448bf035dce5da359daf9ae8a16b8a39623cc395a2ffb1620aa1bce62b0/charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", size = 93041 }, + { url = "https://files.pythonhosted.org/packages/b6/7c/8debebb4f90174074b827c63242c23851bdf00a532489fba57fef3416e40/charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", size = 100397 }, + { url = "https://files.pythonhosted.org/packages/28/76/e6222113b83e3622caa4bb41032d0b1bf785250607392e1b778aca0b8a7d/charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", size = 48543 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, +] + +[[package]] +name = "filelock" +version = "3.15.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/08/dd/49e06f09b6645156550fb9aee9cc1e59aba7efbc972d665a1bd6ae0435d4/filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb", size = 18007 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ae/f0/48285f0262fe47103a4a45972ed2f9b93e4c80b8fd609fa98da78b2a5706/filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7", size = 16159 }, +] + +[[package]] +name = "flake8" +version = "7.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mccabe" }, + { name = "pycodestyle" }, + { name = "pyflakes" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/37/72/e8d66150c4fcace3c0a450466aa3480506ba2cae7b61e100a2613afc3907/flake8-7.1.1.tar.gz", hash = "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38", size = 48054 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/42/65004373ac4617464f35ed15931b30d764f53cdd30cc78d5aea349c8c050/flake8-7.1.1-py2.py3-none-any.whl", hash = "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213", size = 57731 }, +] + +[[package]] +name = "fsspec" +version = "2024.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/b6/eba5024a9889fcfff396db543a34bef0ab9d002278f163129f9f01005960/fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49", size = 284584 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/44/73bea497ac69bafde2ee4269292fa3b41f1198f4bb7bbaaabde30ad29d4a/fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e", size = 177561 }, +] + +[[package]] +name = "h11" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, +] + +[[package]] +name = "httpcore" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/b0/5e8b8674f8d203335a62fdfcfa0d11ebe09e23613c3391033cbba35f7926/httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61", size = 83234 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/d4/e5d7e4f2174f8a4d63c8897d79eb8fe2503f7ecc03282fee1fa2719c2704/httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5", size = 77926 }, +] + +[[package]] +name = "httpx" +version = "0.27.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, + { name = "sniffio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2", size = 144189 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", size = 76395 }, +] + +[[package]] +name = "huggingface-hub" +version = "0.24.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/65/24/b98fce967b7d63700e5805b915012ba25bb538a81fcf11e97f3cc3f4f012/huggingface_hub-0.24.6.tar.gz", hash = "sha256:cc2579e761d070713eaa9c323e3debe39d5b464ae3a7261c39a9195b27bb8000", size = 349200 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/8f/d6718641c14d98a5848c6a24d2376028d292074ffade0702940a4b1dde76/huggingface_hub-0.24.6-py3-none-any.whl", hash = "sha256:a990f3232aa985fe749bc9474060cbad75e8b2f115f6665a9fda5b9c97818970", size = 417509 }, +] + +[[package]] +name = "idna" +version = "3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/ac/e349c5e6d4543326c6883ee9491e3921e0d07b55fdf3cce184b40d63e72a/idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603", size = 189467 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/7e/d71db821f177828df9dea8c42ac46473366f191be53080e552e628aad991/idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac", size = 66894 }, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, +] + +[[package]] +name = "jiter" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/1a/aa64be757afc614484b370a4d9fc1747dc9237b37ce464f7f9d9ca2a3d38/jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a", size = 158300 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/af/09/f659fc67d6aaa82c56432c4a7cc8365fff763acbf1c8f24121076617f207/jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f", size = 284126 }, + { url = "https://files.pythonhosted.org/packages/07/2d/5bdaddfefc44f91af0f3340e75ef327950d790c9f86490757ac8b395c074/jiter-0.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a063f71c4b06225543dddadbe09d203dc0c95ba352d8b85f1221173480a71d5", size = 299265 }, + { url = "https://files.pythonhosted.org/packages/74/bd/964485231deaec8caa6599f3f27c8787a54e9f9373ae80dcfbda2ad79c02/jiter-0.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acc0d5b8b3dd12e91dd184b87273f864b363dfabc90ef29a1092d269f18c7e28", size = 332178 }, + { url = "https://files.pythonhosted.org/packages/cf/4f/6353179174db10254549bbf2eb2c7ea102e59e0460ee374adb12071c274d/jiter-0.5.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c22541f0b672f4d741382a97c65609332a783501551445ab2df137ada01e019e", size = 342533 }, + { url = "https://files.pythonhosted.org/packages/76/6f/21576071b8b056ef743129b9dacf9da65e328b58766f3d1ea265e966f000/jiter-0.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63314832e302cc10d8dfbda0333a384bf4bcfce80d65fe99b0f3c0da8945a91a", size = 363469 }, + { url = "https://files.pythonhosted.org/packages/73/a1/9ef99a279c72a031dbe8a4085db41e3521ae01ab0058651d6ccc809a5e93/jiter-0.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a25fbd8a5a58061e433d6fae6d5298777c0814a8bcefa1e5ecfff20c594bd749", size = 379078 }, + { url = "https://files.pythonhosted.org/packages/41/6a/c038077509d67fe876c724bfe9ad15334593851a7def0d84518172bdd44a/jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:503b2c27d87dfff5ab717a8200fbbcf4714516c9d85558048b1fc14d2de7d8dc", size = 318943 }, + { url = "https://files.pythonhosted.org/packages/67/0d/d82673814eb38c208b7881581df596e680f8c2c003e2b80c25ca58975ee4/jiter-0.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d1f3d27cce923713933a844872d213d244e09b53ec99b7a7fdf73d543529d6d", size = 357394 }, + { url = "https://files.pythonhosted.org/packages/56/9e/cbd8f6612346c38cc42e41e35cda19ce78f5b12e4106d1186e8e95ee839b/jiter-0.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c95980207b3998f2c3b3098f357994d3fd7661121f30669ca7cb945f09510a87", size = 511080 }, + { url = "https://files.pythonhosted.org/packages/ff/33/135c0c33565b6d5c3010d047710837427dd24c9adbc9ca090f3f92df446e/jiter-0.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:afa66939d834b0ce063f57d9895e8036ffc41c4bd90e4a99631e5f261d9b518e", size = 492827 }, + { url = "https://files.pythonhosted.org/packages/68/c1/491a8ef682508edbaf2a32e41c1b1e34064078b369b0c2d141170999d1c9/jiter-0.5.0-cp310-none-win32.whl", hash = "sha256:f16ca8f10e62f25fd81d5310e852df6649af17824146ca74647a018424ddeccf", size = 195081 }, + { url = "https://files.pythonhosted.org/packages/31/20/8cda4faa9571affea6130b150289522a22329778bdfa45a7aab4e7edff95/jiter-0.5.0-cp310-none-win_amd64.whl", hash = "sha256:b2950e4798e82dd9176935ef6a55cf6a448b5c71515a556da3f6b811a7844f1e", size = 190977 }, + { url = "https://files.pythonhosted.org/packages/94/5f/3ac960ed598726aae46edea916e6df4df7ff6fe084bc60774b95cf3154e6/jiter-0.5.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d4c8e1ed0ef31ad29cae5ea16b9e41529eb50a7fba70600008e9f8de6376d553", size = 284131 }, + { url = "https://files.pythonhosted.org/packages/03/eb/2308fa5f5c14c97c4c7720fef9465f1fa0771826cddb4eec9866bdd88846/jiter-0.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c6f16e21276074a12d8421692515b3fd6d2ea9c94fd0734c39a12960a20e85f3", size = 299310 }, + { url = "https://files.pythonhosted.org/packages/3c/f6/dba34ca10b44715fa5302b8e8d2113f72eb00a9297ddf3fa0ae4fd22d1d1/jiter-0.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5280e68e7740c8c128d3ae5ab63335ce6d1fb6603d3b809637b11713487af9e6", size = 332282 }, + { url = "https://files.pythonhosted.org/packages/69/f7/64e0a7439790ec47f7681adb3871c9d9c45fff771102490bbee5e92c00b7/jiter-0.5.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:583c57fc30cc1fec360e66323aadd7fc3edeec01289bfafc35d3b9dcb29495e4", size = 342370 }, + { url = "https://files.pythonhosted.org/packages/55/31/1efbfff2ae8e4d919144c53db19b828049ad0622a670be3bbea94a86282c/jiter-0.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:26351cc14507bdf466b5f99aba3df3143a59da75799bf64a53a3ad3155ecded9", size = 363591 }, + { url = "https://files.pythonhosted.org/packages/30/c3/7ab2ca2276426a7398c6dfb651e38dbc81954c79a3bfbc36c514d8599499/jiter-0.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4829df14d656b3fb87e50ae8b48253a8851c707da9f30d45aacab2aa2ba2d614", size = 378551 }, + { url = "https://files.pythonhosted.org/packages/47/e7/5d88031cd743c62199b125181a591b1671df3ff2f6e102df85c58d8f7d31/jiter-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42a4bdcf7307b86cb863b2fb9bb55029b422d8f86276a50487982d99eed7c6e", size = 319152 }, + { url = "https://files.pythonhosted.org/packages/4c/2d/09ea58e1adca9f0359f3d41ef44a1a18e59518d7c43a21f4ece9e72e28c0/jiter-0.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04d461ad0aebf696f8da13c99bc1b3e06f66ecf6cfd56254cc402f6385231c06", size = 357377 }, + { url = "https://files.pythonhosted.org/packages/7d/2f/83ff1058cb56fc3ff73e0d3c6440703ddc9cdb7f759b00cfbde8228fc435/jiter-0.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6375923c5f19888c9226582a124b77b622f8fd0018b843c45eeb19d9701c403", size = 511091 }, + { url = "https://files.pythonhosted.org/packages/ae/c9/4f85f97c9894382ab457382337aea0012711baaa17f2ed55c0ff25f3668a/jiter-0.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cec323a853c24fd0472517113768c92ae0be8f8c384ef4441d3632da8baa646", size = 492948 }, + { url = "https://files.pythonhosted.org/packages/4d/f2/2e987e0eb465e064c5f52c2f29c8d955452e3b316746e326269263bfb1b7/jiter-0.5.0-cp311-none-win32.whl", hash = "sha256:aa1db0967130b5cab63dfe4d6ff547c88b2a394c3410db64744d491df7f069bb", size = 195183 }, + { url = "https://files.pythonhosted.org/packages/ab/59/05d1c3203c349b37c4dd28b02b9b4e5915a7bcbd9319173b4548a67d2e93/jiter-0.5.0-cp311-none-win_amd64.whl", hash = "sha256:aa9d2b85b2ed7dc7697597dcfaac66e63c1b3028652f751c81c65a9f220899ae", size = 191032 }, + { url = "https://files.pythonhosted.org/packages/aa/bd/c3950e2c478161e131bed8cb67c36aed418190e2a961a1c981e69954e54b/jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9f664e7351604f91dcdd557603c57fc0d551bc65cc0a732fdacbf73ad335049a", size = 283511 }, + { url = "https://files.pythonhosted.org/packages/80/1c/8ce58d8c37a589eeaaa5d07d131fd31043886f5e77ab50c00a66d869a361/jiter-0.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:044f2f1148b5248ad2c8c3afb43430dccf676c5a5834d2f5089a4e6c5bbd64df", size = 296974 }, + { url = "https://files.pythonhosted.org/packages/4d/b8/6faeff9eed8952bed93a77ea1cffae7b946795b88eafd1a60e87a67b09e0/jiter-0.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:702e3520384c88b6e270c55c772d4bd6d7b150608dcc94dea87ceba1b6391248", size = 331897 }, + { url = "https://files.pythonhosted.org/packages/4f/54/1d9a2209b46d39ce6f0cef3ad87c462f9c50312ab84585e6bd5541292b35/jiter-0.5.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:528d742dcde73fad9d63e8242c036ab4a84389a56e04efd854062b660f559544", size = 342962 }, + { url = "https://files.pythonhosted.org/packages/2a/de/90360be7fc54b2b4c2dfe79eb4ed1f659fce9c96682e6a0be4bbe71371f7/jiter-0.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cf80e5fe6ab582c82f0c3331df27a7e1565e2dcf06265afd5173d809cdbf9ba", size = 363844 }, + { url = "https://files.pythonhosted.org/packages/ba/ad/ef32b173191b7a53ea8a6757b80723cba321f8469834825e8c71c96bde17/jiter-0.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44dfc9ddfb9b51a5626568ef4e55ada462b7328996294fe4d36de02fce42721f", size = 378709 }, + { url = "https://files.pythonhosted.org/packages/07/de/353ce53743c0defbbbd652e89c106a97dbbac4eb42c95920b74b5056b93a/jiter-0.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c451f7922992751a936b96c5f5b9bb9312243d9b754c34b33d0cb72c84669f4e", size = 319038 }, + { url = "https://files.pythonhosted.org/packages/3f/92/42d47310bf9530b9dece9e2d7c6d51cf419af5586ededaf5e66622d160e2/jiter-0.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:308fce789a2f093dca1ff91ac391f11a9f99c35369117ad5a5c6c4903e1b3e3a", size = 357763 }, + { url = "https://files.pythonhosted.org/packages/bd/8c/2bb76a9a84474d48fdd133d3445db8a4413da4e87c23879d917e000a9d87/jiter-0.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7f5ad4a7c6b0d90776fdefa294f662e8a86871e601309643de30bf94bb93a64e", size = 511031 }, + { url = "https://files.pythonhosted.org/packages/33/4f/9f23d79c0795e0a8e56e7988e8785c2dcda27e0ed37977256d50c77c6a19/jiter-0.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ea189db75f8eca08807d02ae27929e890c7d47599ce3d0a6a5d41f2419ecf338", size = 493042 }, + { url = "https://files.pythonhosted.org/packages/df/67/8a4f975aa834b8aecdb6b131422390173928fd47f42f269dcc32034ab432/jiter-0.5.0-cp312-none-win32.whl", hash = "sha256:e3bbe3910c724b877846186c25fe3c802e105a2c1fc2b57d6688b9f8772026e4", size = 195405 }, + { url = "https://files.pythonhosted.org/packages/15/81/296b1e25c43db67848728cdab34ac3eb5c5cbb4955ceb3f51ae60d4a5e3d/jiter-0.5.0-cp312-none-win_amd64.whl", hash = "sha256:a586832f70c3f1481732919215f36d41c59ca080fa27a65cf23d9490e75b2ef5", size = 189720 }, +] + +[[package]] +name = "langcodes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "language-data" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/a8/eef8555bb38e9a41451deabd7962b68764f1681bfd43a73e37a0586d8d04/langcodes-3.4.0.tar.gz", hash = "sha256:ae5a77d1a01d0d1e91854a671890892b7ce9abb601ab7327fc5c874f899e1979", size = 190271 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/70/4058ab0ebb082b18d06888e711baed7f33354a5e0b363bb627586d8c323a/langcodes-3.4.0-py3-none-any.whl", hash = "sha256:10a4cc078b8e8937d8485d3352312a0a89a3125190db9f2bb2074250eef654e9", size = 182028 }, +] + +[[package]] +name = "language-data" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marisa-trie" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/53/d3657025d32bfacc832769ab3c925f8f4ad2165cd2c8467c2446b21400d1/language_data-1.2.0.tar.gz", hash = "sha256:82a86050bbd677bfde87d97885b17566cfe75dad3ac4f5ce44b52c28f752e773", size = 5137321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/5f/139464da89c49afcc8bb97ebad48818a535220ce01b1f24c61fb80dbe4d0/language_data-1.2.0-py3-none-any.whl", hash = "sha256:77d5cab917f91ee0b2f1aa7018443e911cf8985ef734ca2ba3940770f6a3816b", size = 5385777 }, +] + +[[package]] +name = "marisa-trie" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ff/3b/e3b9e08c393acc474e7a60df6f5e180af103ad25b0c29cee3ce2564447eb/marisa_trie-1.2.0.tar.gz", hash = "sha256:fedfc67497f8aa2757756b5cf493759f245d321fb78914ce125b6d75daa89b5f", size = 415819 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/79/8fbfbcdcbdacc0de2c4c9bd43d0183e4d18ad84669d9573fb8cf3ee69eb1/marisa_trie-1.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:61fab91fef677f0af0e818e61595f2334f7e0b3e122b24ec65889aae69ba468d", size = 361059 }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b088cf63f633f27b7db3e9b8ca4c762f7e1ae51a80e65e9eff24ed908bbe/marisa_trie-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f5b3080316de735bd2b07265de5eea3ae176fa2fc60f9871aeaa9cdcddfc8f7", size = 191605 }, + { url = "https://files.pythonhosted.org/packages/6b/f8/1933a0d1c18a3c3d747653fb0f41764322f4b4dc039bbaf3dd4ec62e9831/marisa_trie-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:77bfde3287314e91e28d3a882c7b87519ef0ee104c921df72c7819987d5e4863", size = 174143 }, + { url = "https://files.pythonhosted.org/packages/f9/f3/5f777b96e1270faa50e9fb70815f609e79fc9770c7f44a77574de8c8c37b/marisa_trie-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4fbb1ec1d9e891060a0aee9f9c243acec63de1e197097a14850ba38ec8a4013", size = 1314687 }, + { url = "https://files.pythonhosted.org/packages/ab/28/b10fb434253724d6e2476a9232ce138fbb92bd9feb402217c71ce390ba10/marisa_trie-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e04e9c86fe8908b61c2aebb8444217cacaed15b93d2dccaac3849e36a6dc660", size = 1346557 }, + { url = "https://files.pythonhosted.org/packages/81/15/7f36ebc52fd2220e0f496b1f4ac129350cf04b5be0b98de9a1d259b0f00d/marisa_trie-1.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a7c75a508f44e40f7af8448d466045a97534adcbb026e63989407cefb9ebfa6", size = 1307170 }, + { url = "https://files.pythonhosted.org/packages/ab/8a/acb24216a365802fd901ad294ed9cf848b943323d58e4564cbfa0799c745/marisa_trie-1.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5321211647609869907e81b0230ad2dfdfa7e19fe1ee469b46304a622391e6a1", size = 2194723 }, + { url = "https://files.pythonhosted.org/packages/b2/1c/293010ccfad51f6e1176c24a0b547c73880fcac689e721f0d0b074daca50/marisa_trie-1.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:88660e6ee0f821872aaf63ba4b9a7513428b9cab20c69cc013c368bd72c3a4fe", size = 2356904 }, + { url = "https://files.pythonhosted.org/packages/14/56/24c6552d6b79560f0d28c12d2781b174d975750e6c847239b5a82eb1eaf7/marisa_trie-1.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4e4535fc5458de2b59789e574cdd55923d63de5612dc159d33941af79cd62786", size = 2290024 }, + { url = "https://files.pythonhosted.org/packages/10/7c/db6f389cbde0294fa84c4e4bdafd8485d3f7a66c978f311f138a566beb64/marisa_trie-1.2.0-cp310-cp310-win32.whl", hash = "sha256:bdd1d4d430e33abbe558971d1bd57da2d44ca129fa8a86924c51437dba5cb345", size = 130278 }, + { url = "https://files.pythonhosted.org/packages/7a/f6/53981a750463d6b0c380d15839578a71b17cd4b1d6f26d11fa5515ea5b96/marisa_trie-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:c729e2b8f9699874b1372b5a01515b340eda1292f5e08a3fe4633b745f80ad7a", size = 152358 }, + { url = "https://files.pythonhosted.org/packages/02/63/3aa7a8794bd0ec8e682cc3d67ef53d863c6c6847b80d24bd36b89d9b55ab/marisa_trie-1.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d62985a0e6f2cfeb36cd6afa0460063bbe83ef4bfd9afe189a99103487547210", size = 361865 }, + { url = "https://files.pythonhosted.org/packages/2c/e1/b3eb01992e3b326cb95989e41692be229b50bd7a7abc23a84e95cd2b9566/marisa_trie-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1890cc993149db4aa8242973526589e8133c3f92949b0ac74c2c9a6596707ae3", size = 192055 }, + { url = "https://files.pythonhosted.org/packages/8f/b2/ad317634caae0f38a84170abc08ac57e13fd8db16507b457b8fabee0e153/marisa_trie-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26177cd0dadb7b44f47c17c40e16ac157c4d22ac7ed83b5a47f44713239e10d1", size = 174554 }, + { url = "https://files.pythonhosted.org/packages/95/9d/d0cad0d771e830fdfd51a0435c47a99bb44bd27d01831ec6fd11e4f3ac28/marisa_trie-1.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3425dc81d49a374be49e3a063cb6ccdf57973201b0a30127082acea50562a85e", size = 1384672 }, + { url = "https://files.pythonhosted.org/packages/09/a8/2a08ba5cc9040d478ea727abe95b00926c34f792065de95fb2db5fe922e8/marisa_trie-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:525b8df41a1a7337ed7f982eb63b704d7d75f047e30970fcfbe9cf6fc22c5991", size = 1412802 }, + { url = "https://files.pythonhosted.org/packages/ef/0b/f9404c2df8d35ade65478f9abb88d6ce7077c5deba6e69307085da81d0ce/marisa_trie-1.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c643c66bbde6a115e4ec8713c087a9fe9cb7b7c684e6af4cf448c120fa427ea4", size = 1362330 }, + { url = "https://files.pythonhosted.org/packages/f2/82/274869c51f619be33109d23191f03cd5f92f611ee973bc7b23e6295f1245/marisa_trie-1.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a83fe83e0eab9154a2dc7c556898c86584b7779ddf4214c606fce4ceff07c13", size = 2257696 }, + { url = "https://files.pythonhosted.org/packages/d7/97/9752b5c109d2410749a00517fe7abd301d755ddf4489529db45721b7586f/marisa_trie-1.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:49701db6bb8f1ec0133abd95f0a4891cfd6f84f3bd019e343037e31a5a5b0210", size = 2417409 }, + { url = "https://files.pythonhosted.org/packages/48/1e/380877607af8a9668265be602d31cfc8d5e1bf6569ebe97221542f5cb222/marisa_trie-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a3f0562863deaad58c5dc3a51f706da92582bc9084189148a45f7a12fe261a51", size = 2352055 }, + { url = "https://files.pythonhosted.org/packages/11/ce/19b0721caa8ec606062d56437ae3661364f18d850dc54472032bcda8dd85/marisa_trie-1.2.0-cp311-cp311-win32.whl", hash = "sha256:b08968ccad00f54f31e38516e4452fae59dd15a3fcee56aea3101ba2304680b3", size = 130035 }, + { url = "https://files.pythonhosted.org/packages/61/28/b93cd14cd422be8fc091bd454dd48edbf0c2333111183db38c8e5a13e468/marisa_trie-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3ef375491e7dd71a0a7e7bf288c88750942bd1ee0c379dcd6ad43e31af67d00", size = 152571 }, + { url = "https://files.pythonhosted.org/packages/6d/f1/96d2dffe115b6ca92f59a5d9cc8f7bd68715f50e38f132b92cad8bf88b2d/marisa_trie-1.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:39b88f126988ea83e8458259297d2b2f9391bfba8f4dc5d7a246813aae1c1def", size = 358929 }, + { url = "https://files.pythonhosted.org/packages/dc/ff/f00f14025d6afd87614e5ab40762e6bf33b49c3754105d62621e321cf7e8/marisa_trie-1.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ec167b006884a90d130ee30518a9aa44cb40211f702bf07031b2d7d4d1db569b", size = 189923 }, + { url = "https://files.pythonhosted.org/packages/da/21/d261bfde464531614bf5b747dc854de8a9eeb68c5dc2b49ab765174f19cb/marisa_trie-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b855e6286faef5411386bf9d676dfb545c09f7d109f197f347c9366aeb12f07", size = 173558 }, + { url = "https://files.pythonhosted.org/packages/2b/8a/9f8fae02c25a9dedb3c6815be273be4d82846307ebf5f74caaa12041bb80/marisa_trie-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cd287ff323224d87c2b739cba39614aac3737c95a254e0ff70e77d9b8df226d", size = 1354922 }, + { url = "https://files.pythonhosted.org/packages/56/6c/0a8e06daa6872e0d2a0ad4316f8382a29defc44bca14a9fc1fce2c3bc098/marisa_trie-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d8a1c0361165231f4fb915237470afc8cc4803c535f535f4fc42ca72855b124", size = 1391023 }, + { url = "https://files.pythonhosted.org/packages/8e/12/0680e07b7fe8c03332cf43c9833a7bf7515f8469f410dd02e51f00e10a03/marisa_trie-1.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3267f438d258d7d85ee3dde363c4f96c3196ca9cd9e63fe429a59543cc544b15", size = 1328760 }, + { url = "https://files.pythonhosted.org/packages/0d/69/2752c621f327a83e57da0db13a5070176cb7cbde565789b0384910a15522/marisa_trie-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7c87a0c2cccce12b07bfcb70708637c0816970282d966a1531ecda1a24bd1cc8", size = 2230772 }, + { url = "https://files.pythonhosted.org/packages/e1/8f/7744cd602c76fee9543898151b34b0dcd0200e51aa41be5b28fc20a5313a/marisa_trie-1.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d3c0e38f0501951e2322f7274a39b8e2344bbd91ceaa9da439f46022570ddc9d", size = 2384385 }, + { url = "https://files.pythonhosted.org/packages/79/6b/4f33d4eefeb3c8dbe6a24cbe20e1f9d96bcb3b261885fa80e386216d7f86/marisa_trie-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cd88a338c87e6dc130b6cea7b697580c21f0c83a8a8b46671cfecbb713d3fe24", size = 2329101 }, + { url = "https://files.pythonhosted.org/packages/cb/59/4a30fb46105fe84d30dcb913cb30375b77c473eaed13929d1802464e6a4d/marisa_trie-1.2.0-cp312-cp312-win32.whl", hash = "sha256:5cea60975184f03fbcff51339df0eb44d2abe106a1693983cc64415eb87b897b", size = 128696 }, + { url = "https://files.pythonhosted.org/packages/cd/93/372821821c62624a258f96de1add12ce032c1eca12eb2ed492c9b4462fcb/marisa_trie-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:b04a07b99b62b9bdf3eaf1d44571a3293ce249ce8971944e780c9c709593462f", size = 151131 }, + { url = "https://files.pythonhosted.org/packages/e3/48/81cbc54ec66e30d2199428d1227019f60b16e4d8e9cfbdb5864110a52d82/marisa_trie-1.2.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:035c4c8f3b313b4d7b7451ddd539da811a11077a9e359c6a0345f816b1bdccb3", size = 159998 }, + { url = "https://files.pythonhosted.org/packages/10/a1/eaea55807ac4258f1109193df9765808488c1203d8ae7a645b8d7fcd4c32/marisa_trie-1.2.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d4f05c2ee218a5ab09d269b640d06f9708b0cf37c842344cbdffb0661c74c472", size = 148019 }, + { url = "https://files.pythonhosted.org/packages/fc/ad/87dd9c792e3107112e56ac57bf080a14f68a38df688e5c943994c32a5851/marisa_trie-1.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92ac63e1519598de946c7d9346df3bb52ed96968eb3021b4e89b51d79bc72a86", size = 169764 }, + { url = "https://files.pythonhosted.org/packages/f3/ca/d63f8811e22ddc7f908c9618226b3e5370468bdca931bde7803760df90e0/marisa_trie-1.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:045f32eaeb5dcdb5beadb571ba616d7a34141764b616eebb4decce71b366f5fa", size = 184322 }, + { url = "https://files.pythonhosted.org/packages/42/5e/878f8331b17a654d512674af6e82449d67d168ddf4f415304f049af87434/marisa_trie-1.2.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb60c2f9897ce2bfc31a69ac25a040de4f8643ab2a339bb0ff1185e1a9dedaf8", size = 187999 }, + { url = "https://files.pythonhosted.org/packages/69/54/8f1255f6621aa355a8a1c8bae4f8e0b7494b64390dfca707ac91a58622d1/marisa_trie-1.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f19c5fcf23c02f1303deb69c67603ee37ed8f01de2d8b19f1716a6cf5afd5455", size = 141708 }, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/ff/0ffefdcac38932a54d2b5eed4e0ba8a408f215002cd178ad1df0f2806ff8/mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", size = 9658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350 }, +] + +[[package]] +name = "openai" +version = "1.42.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8c/1f/310b0b5efb6178ad9f9ca4a80b2ead3cb7cbc16a1b843941bcf1c52dd884/openai-1.42.0.tar.gz", hash = "sha256:c9d31853b4e0bc2dc8bd08003b462a006035655a701471695d0bfdc08529cde3", size = 290549 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/9e/d77569d06e365f093977d94f305a395b7ac5ccd746016a2e8dd34c4e20c1/openai-1.42.0-py3-none-any.whl", hash = "sha256:dc91e0307033a4f94931e5d03cc3b29b9717014ad5e73f9f2051b6cb5eda4d80", size = 362858 }, +] + +[[package]] +name = "packaging" +version = "24.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/51/65/50db4dda066951078f0a96cf12f4b9ada6e4b811516bf0262c0f4f7064d4/packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", size = 148788 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, +] + +[[package]] +name = "polib" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/10/9a/79b1067d27e38ddf84fe7da6ec516f1743f31f752c6122193e7bce38bdbf/polib-1.2.0.tar.gz", hash = "sha256:f3ef94aefed6e183e342a8a269ae1fc4742ba193186ad76f175938621dbfc26b", size = 161658 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/99/45bb1f9926efe370c6dbe324741c749658e44cb060124f28dad201202274/polib-1.2.0-py2.py3-none-any.whl", hash = "sha256:1c77ee1b81feb31df9bca258cbc58db1bbb32d10214b173882452c73af06d62d", size = 20634 }, +] + +[[package]] +name = "pycodestyle" +version = "2.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/aa/210b2c9aedd8c1cbeea31a50e42050ad56187754b34eb214c46709445801/pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521", size = 39232 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/d8/a211b3f85e99a0daa2ddec96c949cac6824bd305b040571b82a03dd62636/pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3", size = 31284 }, +] + +[[package]] +name = "pydantic" +version = "2.8.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8c/99/d0a5dca411e0a017762258013ba9905cd6e7baa9a3fd1fe8b6529472902e/pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a", size = 739834 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/fa/b7f815b8c9ad021c07f88875b601222ef5e70619391ade4a49234d12d278/pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8", size = 423875 }, +] + +[[package]] +name = "pydantic-core" +version = "2.20.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/12/e3/0d5ad91211dba310f7ded335f4dad871172b9cc9ce204f5a56d76ccd6247/pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4", size = 388371 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/9d/f30f080f745682e762512f3eef1f6e392c7d74a102e6e96de8a013a5db84/pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3", size = 1837257 }, + { url = "https://files.pythonhosted.org/packages/f2/89/77e7aebdd4a235497ac1e07f0a99e9f40e47f6e0f6783fe30500df08fc42/pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6", size = 1776715 }, + { url = "https://files.pythonhosted.org/packages/18/50/5a4e9120b395108c2a0441a425356c0d26a655d7c617288bec1c28b854ac/pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a", size = 1789023 }, + { url = "https://files.pythonhosted.org/packages/c7/e5/f19e13ba86b968d024b56aa53f40b24828652ac026e5addd0ae49eeada02/pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3", size = 1775598 }, + { url = "https://files.pythonhosted.org/packages/c9/c7/f3c29bed28bd022c783baba5bf9946c4f694cb837a687e62f453c81eb5c6/pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1", size = 1977691 }, + { url = "https://files.pythonhosted.org/packages/41/3e/f62c2a05c554fff34570f6788617e9670c83ed7bc07d62a55cccd1bc0be6/pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953", size = 2693214 }, + { url = "https://files.pythonhosted.org/packages/ae/49/8a6fe79d35e2f3bea566d8ea0e4e6f436d4f749d7838c8e8c4c5148ae706/pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98", size = 2061047 }, + { url = "https://files.pythonhosted.org/packages/51/c6/585355c7c8561e11197dbf6333c57dd32f9f62165d48589b57ced2373d97/pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a", size = 1895106 }, + { url = "https://files.pythonhosted.org/packages/ce/23/829f6b87de0775919e82f8addef8b487ace1c77bb4cb754b217f7b1301b6/pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a", size = 1968506 }, + { url = "https://files.pythonhosted.org/packages/ca/2f/f8ca8f0c40b3ee0a4d8730a51851adb14c5eda986ec09f8d754b2fba784e/pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840", size = 2110217 }, + { url = "https://files.pythonhosted.org/packages/bb/a0/1876656c7b17eb69cc683452cce6bb890dd722222a71b3de57ddb512f561/pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250", size = 1709669 }, + { url = "https://files.pythonhosted.org/packages/be/4a/576524eefa9b301c088c4818dc50ff1c51a88fe29efd87ab75748ae15fd7/pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c", size = 1902386 }, + { url = "https://files.pythonhosted.org/packages/61/db/f6a724db226d990a329910727cfac43539ff6969edc217286dd05cda3ef6/pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312", size = 1834507 }, + { url = "https://files.pythonhosted.org/packages/9b/83/6f2bfe75209d557ae1c3550c1252684fc1827b8b12fbed84c3b4439e135d/pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88", size = 1773527 }, + { url = "https://files.pythonhosted.org/packages/93/ef/513ea76d7ca81f2354bb9c8d7839fc1157673e652613f7e1aff17d8ce05d/pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc", size = 1787879 }, + { url = "https://files.pythonhosted.org/packages/31/0a/ac294caecf235f0cc651de6232f1642bb793af448d1cfc541b0dc1fd72b8/pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43", size = 1774694 }, + { url = "https://files.pythonhosted.org/packages/46/a4/08f12b5512f095963550a7cb49ae010e3f8f3f22b45e508c2cb4d7744fce/pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6", size = 1976369 }, + { url = "https://files.pythonhosted.org/packages/15/59/b2495be4410462aedb399071c71884042a2c6443319cbf62d00b4a7ed7a5/pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121", size = 2691250 }, + { url = "https://files.pythonhosted.org/packages/3c/ae/fc99ce1ba791c9e9d1dee04ce80eef1dae5b25b27e3fc8e19f4e3f1348bf/pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1", size = 2061462 }, + { url = "https://files.pythonhosted.org/packages/44/bb/eb07cbe47cfd638603ce3cb8c220f1a054b821e666509e535f27ba07ca5f/pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b", size = 1893923 }, + { url = "https://files.pythonhosted.org/packages/ce/ef/5a52400553b8faa0e7f11fd7a2ba11e8d2feb50b540f9e7973c49b97eac0/pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27", size = 1966779 }, + { url = "https://files.pythonhosted.org/packages/4c/5b/fb37fe341344d9651f5c5f579639cd97d50a457dc53901aa8f7e9f28beb9/pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b", size = 2109044 }, + { url = "https://files.pythonhosted.org/packages/70/1a/6f7278802dbc66716661618807ab0dfa4fc32b09d1235923bbbe8b3a5757/pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a", size = 1708265 }, + { url = "https://files.pythonhosted.org/packages/35/7f/58758c42c61b0bdd585158586fecea295523d49933cb33664ea888162daf/pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2", size = 1901750 }, + { url = "https://files.pythonhosted.org/packages/6f/47/ef0d60ae23c41aced42921728650460dc831a0adf604bfa66b76028cb4d0/pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231", size = 1839225 }, + { url = "https://files.pythonhosted.org/packages/6a/23/430f2878c9cd977a61bb39f71751d9310ec55cee36b3d5bf1752c6341fd0/pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9", size = 1768604 }, + { url = "https://files.pythonhosted.org/packages/9e/2b/ec4e7225dee79e0dc80ccc3c35ab33cc2c4bbb8a1a7ecf060e5e453651ec/pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f", size = 1789767 }, + { url = "https://files.pythonhosted.org/packages/64/b0/38b24a1fa6d2f96af3148362e10737ec073768cd44d3ec21dca3be40a519/pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52", size = 1772061 }, + { url = "https://files.pythonhosted.org/packages/5e/da/bb73274c42cb60decfa61e9eb0c9029da78b3b9af0a9de0309dbc8ff87b6/pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237", size = 1974573 }, + { url = "https://files.pythonhosted.org/packages/c8/65/41693110fb3552556180460daffdb8bbeefb87fc026fd9aa4b849374015c/pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe", size = 2625596 }, + { url = "https://files.pythonhosted.org/packages/09/b3/a5a54b47cccd1ab661ed5775235c5e06924753c2d4817737c5667bfa19a8/pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e", size = 2099064 }, + { url = "https://files.pythonhosted.org/packages/52/fa/443a7a6ea54beaba45ff3a59f3d3e6e3004b7460bcfb0be77bcf98719d3b/pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24", size = 1900345 }, + { url = "https://files.pythonhosted.org/packages/8e/e6/9aca9ffae60f9cdf0183069de3e271889b628d0fb175913fcb3db5618fb1/pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1", size = 1968252 }, + { url = "https://files.pythonhosted.org/packages/46/5e/6c716810ea20a6419188992973a73c2fb4eb99cd382368d0637ddb6d3c99/pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd", size = 2119191 }, + { url = "https://files.pythonhosted.org/packages/06/fc/6123b00a9240fbb9ae0babad7a005d51103d9a5d39c957a986f5cdd0c271/pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688", size = 1717788 }, + { url = "https://files.pythonhosted.org/packages/d5/36/e61ad5a46607a469e2786f398cd671ebafcd9fb17f09a2359985c7228df5/pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d", size = 1898188 }, + { url = "https://files.pythonhosted.org/packages/49/75/40b0e98b658fdba02a693b3bacb4c875a28bba87796c7b13975976597d8c/pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686", size = 1838688 }, + { url = "https://files.pythonhosted.org/packages/75/02/d8ba2d4a266591a6a623c68b331b96523d4b62ab82a951794e3ed8907390/pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a", size = 1768409 }, + { url = "https://files.pythonhosted.org/packages/91/ae/25ecd9bc4ce4993e99a1a3c9ab111c082630c914260e129572fafed4ecc2/pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b", size = 1789317 }, + { url = "https://files.pythonhosted.org/packages/7a/80/72057580681cdbe55699c367963d9c661b569a1d39338b4f6239faf36cdc/pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19", size = 1771949 }, + { url = "https://files.pythonhosted.org/packages/a2/be/d9bbabc55b05019013180f141fcaf3b14dbe15ca7da550e95b60c321009a/pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac", size = 1974392 }, + { url = "https://files.pythonhosted.org/packages/79/2d/7bcd938c6afb0f40293283f5f09988b61fb0a4f1d180abe7c23a2f665f8e/pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703", size = 2625565 }, + { url = "https://files.pythonhosted.org/packages/ac/88/ca758e979457096008a4b16a064509028e3e092a1e85a5ed6c18ced8da88/pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c", size = 2098784 }, + { url = "https://files.pythonhosted.org/packages/eb/de/2fad6d63c3c42e472e985acb12ec45b7f56e42e6f4cd6dfbc5e87ee8678c/pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83", size = 1900198 }, + { url = "https://files.pythonhosted.org/packages/fe/50/077c7f35b6488dc369a6d22993af3a37901e198630f38ac43391ca730f5b/pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203", size = 1968005 }, + { url = "https://files.pythonhosted.org/packages/5d/1f/f378631574ead46d636b9a04a80ff878b9365d4b361b1905ef1667d4182a/pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0", size = 2118920 }, + { url = "https://files.pythonhosted.org/packages/7a/ea/e4943f17df7a3031d709481fe4363d4624ae875a6409aec34c28c9e6cf59/pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e", size = 1717397 }, + { url = "https://files.pythonhosted.org/packages/13/63/b95781763e8d84207025071c0cec16d921c0163c7a9033ae4b9a0e020dc7/pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20", size = 1898013 }, + { url = "https://files.pythonhosted.org/packages/73/73/0c7265903f66cce39ed7ca939684fba344210cefc91ccc999cfd5b113fd3/pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906", size = 1828190 }, + { url = "https://files.pythonhosted.org/packages/27/55/60b8b0e58b49ee3ed36a18562dd7c6bc06a551c390e387af5872a238f2ec/pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94", size = 1715252 }, + { url = "https://files.pythonhosted.org/packages/28/3d/d66314bad6bb777a36559195a007b31e916bd9e2c198f7bb8f4ccdceb4fa/pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f", size = 1782641 }, + { url = "https://files.pythonhosted.org/packages/9e/f5/f178f4354d0d6c1431a8f9ede71f3c4269ac4dc55d314fdb7555814276dc/pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482", size = 1928788 }, + { url = "https://files.pythonhosted.org/packages/9c/51/1f5e27bb194df79e30b593b608c66e881ed481241e2b9ed5bdf86d165480/pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6", size = 1886116 }, + { url = "https://files.pythonhosted.org/packages/ac/76/450d9258c58dc7c70b9e3aadf6bebe23ddd99e459c365e2adbde80e238da/pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc", size = 1960125 }, + { url = "https://files.pythonhosted.org/packages/dd/9e/0309a7a4bea51771729515e413b3987be0789837de99087f7415e0db1f9b/pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99", size = 2100407 }, + { url = "https://files.pythonhosted.org/packages/af/93/06d44e08277b3b818b75bd5f25e879d7693e4b7dd3505fde89916fcc9ca2/pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6", size = 1914966 }, +] + +[[package]] +name = "pyflakes" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/f9/669d8c9c86613c9d568757c7f5824bd3197d7b1c6c27553bc5618a27cce2/pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f", size = 63788 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/d7/f1b7db88d8e4417c5d47adad627a93547f44bdc9028372dbd2313f34a855/pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a", size = 62725 }, +] + +[[package]] +name = "pytest" +version = "8.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/8c/9862305bdcd6020bc7b45b1b5e7397a6caf1a33d3025b9a003b39075ffb2/pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce", size = 1439314 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/f9/cf155cf32ca7d6fa3601bc4c5dd19086af4b320b706919d48a4c79081cf9/pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5", size = 341802 }, +] + +[[package]] +name = "python-dotenv" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199 }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758 }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052 }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774 }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "setuptools" +version = "73.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8d/37/f4d4ce9bc15e61edba3179f9b0f763fc6d439474d28511b11f0d95bab7a2/setuptools-73.0.1.tar.gz", hash = "sha256:d59a3e788ab7e012ab2c4baed1b376da6366883ee20d7a5fc426816e3d7b1193", size = 2526506 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6a/0270e295bf30c37567736b7fca10167640898214ff911273af37ddb95770/setuptools-73.0.1-py3-none-any.whl", hash = "sha256:b208925fcb9f7af924ed2dc04708ea89791e24bde0d3020b27df0e116088b34e", size = 2346588 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "tokenizers" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/3a/508a4875f69e12b08fb3dabfc746039fe763838ff45d6e42229ed09a41c2/tokenizers-0.20.0.tar.gz", hash = "sha256:39d7acc43f564c274085cafcd1dae9d36f332456de1a31970296a6b8da4eac8d", size = 337421 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/47/88f92fb433fe2fb59b35bbce28455095bcb7b40fff385223b1e7818cec38/tokenizers-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6cff5c5e37c41bc5faa519d6f3df0679e4b37da54ea1f42121719c5e2b4905c0", size = 2624575 }, + { url = "https://files.pythonhosted.org/packages/fc/e5/74c6ab076de7d2d4d347e8781086117889d202628dfd5f5fba8ebefb1ea2/tokenizers-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:62a56bf75c27443432456f4ca5ca055befa95e25be8a28141cc495cac8ae4d6d", size = 2515759 }, + { url = "https://files.pythonhosted.org/packages/4e/f5/1087cb5100e704dce9a1419d6f3e8ac843c98efa11579c3287ddb036b476/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68cc7de6a63f09c4a86909c2597b995aa66e19df852a23aea894929c74369929", size = 2892020 }, + { url = "https://files.pythonhosted.org/packages/35/07/7004003098e3d442bba9b9821b78f34043248bdf6a78433846944b7d9a61/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:053c37ecee482cc958fdee53af3c6534286a86f5d35aac476f7c246830e53ae5", size = 2754734 }, + { url = "https://files.pythonhosted.org/packages/d0/61/9f3def0db2db72d8da6c4c318481a35c5c71172dad54ff3813f765ab2a45/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d7074aaabc151a6363fa03db5493fc95b423b2a1874456783989e96d541c7b6", size = 3009897 }, + { url = "https://files.pythonhosted.org/packages/c1/98/f4a9a18a4e2e254c6ed253b3e5344d8f48760d3af6813df4415446db1b4c/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a11435780f2acd89e8fefe5e81cecf01776f6edb9b3ac95bcb76baee76b30b90", size = 3032295 }, + { url = "https://files.pythonhosted.org/packages/87/43/52b096d5aacb3eb698f1b791e8a6c1b7ecd39b17724c38312804b79429fa/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9a81cd2712973b007d84268d45fc3f6f90a79c31dfe7f1925e6732f8d2959987", size = 3328639 }, + { url = "https://files.pythonhosted.org/packages/fc/7e/794850f99752d1811952722c18652a5c0125b0ef595d9ed069d00da9a5db/tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7dfd796ab9d909f76fb93080e1c7c8309f196ecb316eb130718cd5e34231c69", size = 2936169 }, + { url = "https://files.pythonhosted.org/packages/ea/3d/d573173b0cd78cd64e95b5c8f268f3a619877bc6a484b649d98af4de24bf/tokenizers-0.20.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:8029ad2aa8cb00605c9374566034c1cc1b15130713e0eb5afcef6cface8255c9", size = 8965441 }, + { url = "https://files.pythonhosted.org/packages/27/cb/76636123a5bc550c48aa8048def1ae3d86421723be2cca8f195f464c20f6/tokenizers-0.20.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ca4d54260ebe97d59dfa9a30baa20d0c4dd9137d99a8801700055c561145c24e", size = 9284485 }, + { url = "https://files.pythonhosted.org/packages/32/16/5eaa1405e15ca91a9e0f6c07963cd91f48daf8f999ff731b589078a4caa1/tokenizers-0.20.0-cp310-none-win32.whl", hash = "sha256:95ee16b57cec11b86a7940174ec5197d506439b0f415ab3859f254b1dffe9df0", size = 2125655 }, + { url = "https://files.pythonhosted.org/packages/63/90/84534f81ff1453a1bcc049b03ea6820ca7ab497519b79b129d7297bb4e60/tokenizers-0.20.0-cp310-none-win_amd64.whl", hash = "sha256:0a61a11e93eeadbf02aea082ffc75241c4198e0608bbbac4f65a9026851dcf37", size = 2326217 }, + { url = "https://files.pythonhosted.org/packages/a4/f6/ae042eeae413bae9af5adceed7fe6f30fb0abc9868a55916d4e07c8ea1fb/tokenizers-0.20.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6636b798b3c4d6c9b1af1a918bd07c867808e5a21c64324e95318a237e6366c3", size = 2625296 }, + { url = "https://files.pythonhosted.org/packages/62/8b/dab4d716e9a00c1581443213283c9fdfdb982cdad6ecc046bae9c7e42fc8/tokenizers-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ec603e42eaf499ffd58b9258162add948717cf21372458132f14e13a6bc7172", size = 2516726 }, + { url = "https://files.pythonhosted.org/packages/95/1e/800e0896ea43ab86d70cfc6ed6a30d6aefcab498eff49db79cc92e08e1fe/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cce124264903a8ea6f8f48e1cc7669e5ef638c18bd4ab0a88769d5f92debdf7f", size = 2891801 }, + { url = "https://files.pythonhosted.org/packages/02/80/22ceab06d120df5b589f993248bceef177a932024ae8ee033ec3da5cc87f/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:07bbeba0231cf8de07aa6b9e33e9779ff103d47042eeeb859a8c432e3292fb98", size = 2753762 }, + { url = "https://files.pythonhosted.org/packages/22/7c/02431f0711162ab3994e4099b9ece4b6a00755e3180bf5dfe70da0c13836/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:06c0ca8397b35d38b83a44a9c6929790c1692957d88541df061cb34d82ebbf08", size = 3010928 }, + { url = "https://files.pythonhosted.org/packages/bc/14/193b7e58017e9592799498686df718c5f68bfb72205d3075ce9cdd441db7/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca6557ac3b83d912dfbb1f70ab56bd4b0594043916688e906ede09f42e192401", size = 3032435 }, + { url = "https://files.pythonhosted.org/packages/71/ae/c7fc7a614ce78cab7b8f82f7a24a074837cbc7e0086960cbe4801b2b3c83/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a5ad94c9e80ac6098328bee2e3264dbced4c6faa34429994d473f795ec58ef4", size = 3328437 }, + { url = "https://files.pythonhosted.org/packages/a5/0e/e4421e6b8c8b3ae093bef22faa28c50d7dbd654f661edc5f5880a93dbf10/tokenizers-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5c7f906ee6bec30a9dc20268a8b80f3b9584de1c9f051671cb057dc6ce28f6", size = 2936532 }, + { url = "https://files.pythonhosted.org/packages/b9/08/ac9c8fe9c1f5b4ef89bcbf543cda890e76c2ea1c2e957bf77fd5fcf72b6c/tokenizers-0.20.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:31e087e9ee1b8f075b002bfee257e858dc695f955b43903e1bb4aa9f170e37fe", size = 8965273 }, + { url = "https://files.pythonhosted.org/packages/fb/71/b9626f9f5a33dd1d80bb6d3721f0a4b0b48ced0c702e65aad5c8c7c1ae7e/tokenizers-0.20.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c3124fb6f3346cb3d8d775375d3b429bf4dcfc24f739822702009d20a4297990", size = 9283768 }, + { url = "https://files.pythonhosted.org/packages/ba/78/70f79f939385579bb25f14cb14ab0eaa49e46a7d099577c2e08e3c3597d8/tokenizers-0.20.0-cp311-none-win32.whl", hash = "sha256:a4bb8b40ba9eefa621fdcabf04a74aa6038ae3be0c614c6458bd91a4697a452f", size = 2126085 }, + { url = "https://files.pythonhosted.org/packages/c0/3c/9228601e180b177755fd9f35cbb229c13f1919a55f07a602b1bd7d716470/tokenizers-0.20.0-cp311-none-win_amd64.whl", hash = "sha256:2b709d371f1fe60a28ef0c5c67815952d455ca7f34dbe7197eaaed3cc54b658e", size = 2327670 }, + { url = "https://files.pythonhosted.org/packages/ce/d4/152f9964cee16b43b9147212e925793df1a469324b29b4c7a6cb60280c99/tokenizers-0.20.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:15c81a17d0d66f4987c6ca16f4bea7ec253b8c7ed1bb00fdc5d038b1bb56e714", size = 2613552 }, + { url = "https://files.pythonhosted.org/packages/6e/99/594b518d44ba2b099753816a9c0c33dbdcf77cc3ec5b256690f70d7431c2/tokenizers-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6a531cdf1fb6dc41c984c785a3b299cb0586de0b35683842a3afbb1e5207f910", size = 2513918 }, + { url = "https://files.pythonhosted.org/packages/24/fa/77f0cf9b3c662b4de18953fb06126c424059f4b09ca2d1b720beabc6afde/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06caabeb4587f8404e0cd9d40f458e9cba3e815c8155a38e579a74ff3e2a4301", size = 2892465 }, + { url = "https://files.pythonhosted.org/packages/2d/e6/59abfc09f1dc23a47fd03dd8e3bf3fce67d9be2b8ba15a73c9a86b5a646c/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8768f964f23f5b9f50546c0369c75ab3262de926983888bbe8b98be05392a79c", size = 2750862 }, + { url = "https://files.pythonhosted.org/packages/0f/b2/f212ca05c1b246b9429905c18a4d68abacf2a35214eceedb1d65c6c37831/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:626403860152c816f97b649fd279bd622c3d417678c93b4b1a8909b6380b69a8", size = 3012971 }, + { url = "https://files.pythonhosted.org/packages/16/0b/099f5e5b97e8323837a5828f6d21f4bb2a3b529507dc19bd274e48e15825/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c1b88fa9e5ff062326f4bf82681da5a96fca7104d921a6bd7b1e6fcf224af26", size = 3038445 }, + { url = "https://files.pythonhosted.org/packages/62/7c/4e3cb25dc1c5eea6053752f55007071da6b33a96021e0cea4b45b6ef0908/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d7e559436a07dc547f22ce1101f26d8b2fad387e28ec8e7e1e3b11695d681d8", size = 3329352 }, + { url = "https://files.pythonhosted.org/packages/32/20/a8fe63317d4f3c015cbd5b6dec0ce08e2722685ca836ad4a44dec53d000f/tokenizers-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48afb75e50449848964e4a67b0da01261dd3aa8df8daecf10db8fd7f5b076eb", size = 2938786 }, + { url = "https://files.pythonhosted.org/packages/06/e8/78f1c0f356d0a6e4e4e450e2419ace1918bfab875100c3047021a8261ba0/tokenizers-0.20.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:baf5d0e1ff44710a95eefc196dd87666ffc609fd447c5e5b68272a7c3d342a1d", size = 8967350 }, + { url = "https://files.pythonhosted.org/packages/e6/eb/3a1edfc1ffb876ffc1f668c8fa2b2ffb57edf8e9188af49218cf41f9cd9f/tokenizers-0.20.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e5e56df0e8ed23ba60ae3848c3f069a0710c4b197218fe4f89e27eba38510768", size = 9284785 }, + { url = "https://files.pythonhosted.org/packages/00/75/426a93399ba5e6e879215e1abb696adb83b1e2a98d65b47b8ba4262b3d17/tokenizers-0.20.0-cp312-none-win32.whl", hash = "sha256:ec53e5ecc142a82432f9c6c677dbbe5a2bfee92b8abf409a9ecb0d425ee0ce75", size = 2125012 }, + { url = "https://files.pythonhosted.org/packages/a5/45/9c19187645401ec30884379ada74aa6e71fb5eaf20485a82ea37a0fd3659/tokenizers-0.20.0-cp312-none-win_amd64.whl", hash = "sha256:f18661ece72e39c0dfaa174d6223248a15b457dbd4b0fc07809b8e6d3ca1a234", size = 2314154 }, + { url = "https://files.pythonhosted.org/packages/cd/99/dba2f18ba180aefddb65852d2cea69de607232f4cf1d999e789899d56c19/tokenizers-0.20.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d68e15f1815357b059ec266062340c343ea7f98f7f330602df81ffa3474b6122", size = 2626438 }, + { url = "https://files.pythonhosted.org/packages/79/e6/eb28c3c7d23f3feaa9fb6ae16ff313210474b3c9f81689afe6d132915da0/tokenizers-0.20.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:23f9ecec637b9bc80da5f703808d29ed5329e56b5aa8d791d1088014f48afadc", size = 2517016 }, + { url = "https://files.pythonhosted.org/packages/18/2f/35f7fdbf1ae6fa3d0348531596a63651fdb117ff367e3dfe8a6be5f31f5a/tokenizers-0.20.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f830b318ee599e3d0665b3e325f85bc75ee2d2ca6285f52e439dc22b64691580", size = 2890784 }, + { url = "https://files.pythonhosted.org/packages/97/10/7b74d7e5663f886d058df470f14fd492078533a5aee52bf1553eed83a49d/tokenizers-0.20.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3dc750def789cb1de1b5a37657919545e1d9ffa667658b3fa9cb7862407a1b8", size = 3007139 }, + { url = "https://files.pythonhosted.org/packages/77/5a/a59c9f97000fce432e3728fbe32c23cf3dd9933255d76166101c2b12a916/tokenizers-0.20.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e26e6c755ae884c2ea6135cd215bdd0fccafe4ee62405014b8c3cd19954e3ab9", size = 2933499 }, + { url = "https://files.pythonhosted.org/packages/bd/7a/fde367e46596855e172c466655fc416d98be6c7ae792afdb5315ca38bed0/tokenizers-0.20.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:a1158c7174f427182e08baa2a8ded2940f2b4a3e94969a85cc9cfd16004cbcea", size = 8964991 }, + { url = "https://files.pythonhosted.org/packages/9f/fa/075959c7d901a55b2a3198d0ecfbc624c553f5ff8027bc4fac0aa6bab70a/tokenizers-0.20.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:6324826287a3fc198898d3dcf758fe4a8479e42d6039f4c59e2cedd3cf92f64e", size = 9284502 }, +] + +[[package]] +name = "tomli" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/3f/d7af728f075fb08564c5949a9c95e44352e23dee646869fa104a3b2060a3/tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f", size = 15164 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", size = 12757 }, +] + +[[package]] +name = "tqdm" +version = "4.66.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/5d/acf5905c36149bbaec41ccf7f2b68814647347b72075ac0b1fe3022fdc73/tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd", size = 78351 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "urllib3" +version = "2.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/6d/fa469ae21497ddc8bc93e5877702dca7cb8f911e337aca7452b5724f1bb6/urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168", size = 292266 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", size = 121444 }, +] From 9c64ee95d0fb5f3a04a1beb2313bc065648648ec Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:42:12 +0200 Subject: [PATCH 025/112] fixed system and user prompt setting and llm client is now a singleton --- src/auto_po_lyglot/getenv.py | 47 +++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index d2b7fb9..46fb6f7 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -48,13 +48,15 @@ def parse_args(self, additional_args=None): def __init__(self, additional_args=None): "looks at args and returns an object with attributes of these args completed by the environ variables where needed" + self._client = None + args = self.parse_args(additional_args) - load_dotenv(verbose=True, override=True) + load_dotenv(override=True) self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) logger.set_verbose(self.verbose) - + # original language self.original_language = args.original_language or environ.get('ORIGINAL_LANGUAGE', 'English') # context translation language @@ -72,6 +74,9 @@ def __init__(self, additional_args=None): else: self.test_target_languages = environ.get('TARGET_LANGUAGES', 'Spanish').split(',') + self.system_prompt = environ.get('SYSTEM_PROMPT', None) + self.user_prompt = environ.get('USER_PROMPT', None) + # generic processing of additional arguments if additional_args: for argument in additional_args: @@ -82,20 +87,24 @@ def __init__(self, additional_args=None): setattr(self, arg, val) def get_client(self): - match self.llm_client: - case 'ollama': - from .openai_ollama_client import OllamaClient as LLMClient - case 'openai': - # uses OpenAI GPT-4o by default - from .openai_ollama_client import OpenAIClient as LLMClient - case 'claude': - # uses Claude Sonnet 3.5 by default - from .claude_client import ClaudeClient as LLMClient - case 'claude_cached': - # uses Claude Sonnet 3.5, cached mode for long system prompts - from .claude_client import CachedClaudeClient as LLMClient - case _: - raise Exception( - f"LLM_CLIENT must be one of 'ollama', 'openai', 'claude' or 'claude_cached', not '{self.llm_client}'" - ) - return LLMClient(self, "") + if not self._client: + + match self.llm_client: + case 'ollama': + from .openai_ollama_client import OllamaClient as LLMClient + case 'openai': + # uses OpenAI GPT-4o by default + from .openai_ollama_client import OpenAIClient as LLMClient + case 'claude': + # uses Claude Sonnet 3.5 by default + from .claude_client import ClaudeClient as LLMClient + case 'claude_cached': + # uses Claude Sonnet 3.5, cached mode for long system prompts + from .claude_client import CachedClaudeClient as LLMClient + case _: + raise Exception( + f"LLM_CLIENT must be one of 'ollama', 'openai', 'claude' or 'claude_cached', not '{self.llm_client}'" + ) + self._client = LLMClient(self, "") + + return self._client From d0723035185b7139766af260ebfd04712aadabe7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:42:30 +0200 Subject: [PATCH 026/112] adapted to uv --- .github/workflows/build-package.yaml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index a445deb..4eba59b 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -28,16 +28,14 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest - pip install -r requirements.txt - # to allow dev package testing - pip install -e . + python -m pip install uv # install python manager + uv pip install -e . # to allow dev package testing - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + uv run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + uv run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 with: @@ -57,7 +55,6 @@ jobs: sort_keys: false - name: Test with pytest run: | - mkdir -p tests/output - [[ -f .env ]] && cat .env || echo "No .env file found in root" - [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - pytest -s ./tests \ No newline at end of file + # [[ -f .env ]] && cat .env || echo "No .env file found in root" + # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" + uv run pytest -s ./tests \ No newline at end of file From c376c38070cbb066aebf608237a686002a87dc41 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:42:57 +0200 Subject: [PATCH 027/112] create test dir if ot doesn't exist instead of raising error --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index 17349a2..c1ab751 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -28,7 +28,7 @@ def output_file(llm_client): p = Path(OUTPUT_DIRECTORY) logger.vprint("Output directory:", p) if not p.is_dir(): - raise ValueError(f"Output directory {p} does not exist.") + p.mkdir(parents=True, exist_ok=True) basefile_name = f"{llm_client.params.model.replace(':', '-')}_output%i.md" i = 0 while True: From 08be21895232e5a79d1364b030db7c7b6e18380a Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:44:55 +0200 Subject: [PATCH 028/112] create venv --- .github/workflows/build-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 4eba59b..e3d2abc 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -29,6 +29,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install uv # install python manager + uv venv . # create a virtual environment uv pip install -e . # to allow dev package testing - name: Lint with flake8 run: | From 1e11807d724ff0779d14ba1fcf1d2793171e4fd3 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:55:02 +0200 Subject: [PATCH 029/112] new adaptation for uv venv --- .github/workflows/build-package.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index e3d2abc..f840462 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -28,8 +28,9 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install uv # install python manager - uv venv . # create a virtual environment + python -m pip install --upgrade uv # install python manager + uv sync . # create a virtual environment and install dependencies + source .venv/bin/activate # activate the virtual environment uv pip install -e . # to allow dev package testing - name: Lint with flake8 run: | From 0c33dfe859f4b837041b48bfd73d7001bca72c10 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 18:57:08 +0200 Subject: [PATCH 030/112] typo --- .github/workflows/build-package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index f840462..0339de6 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -25,11 +25,11 @@ jobs: with: python-version: '3.10' cache: 'pip' - - name: Install dependencies + - name: Create Venv and Install dependencies run: | python -m pip install --upgrade pip python -m pip install --upgrade uv # install python manager - uv sync . # create a virtual environment and install dependencies + uv sync # create a virtual environment and install dependencies source .venv/bin/activate # activate the virtual environment uv pip install -e . # to allow dev package testing - name: Lint with flake8 From 3e1e514355a2b4dfdf3c0ec1fe80b7a76905ad76 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 19:02:46 +0200 Subject: [PATCH 031/112] limit flake8 to src and tests --- .github/workflows/build-package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 0339de6..184e3bb 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -35,9 +35,9 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - uv run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + uv run flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - uv run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + uv run flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 with: From 5d5fad13e141f46f96510819dea9a7d0481c4d3c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 19:12:37 +0200 Subject: [PATCH 032/112] brought back ollama startup + used env variable for lisibility --- .github/workflows/build-package.yaml | 35 ++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 184e3bb..3de7a75 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -9,12 +9,23 @@ on: pull_request: branches: [ "main" ] +env: + LLM_CLIENT: ollama + MODEL: phi3 + VERBOSE: true # optional + INPUT_PO: tests/input/input.po + ORIGINAL_LANGUAGE: English + CONTEXT_LANGUAGE: French + TARGET_LANGUAGES: Italian # comma separated list + OLLAMA_BASE_URL: "http://localhost:11434/v1" + + jobs: - # start-ollama-server: - # uses: ./.github/workflows/run-ollama.yaml - # with: - # model: phi3 + start-ollama-server: + uses: ./.github/workflows/run-ollama.yaml + with: + model: ${{ env.MODEL }} build: runs-on: ubuntu-latest @@ -41,14 +52,14 @@ jobs: - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 with: - envkey_VERBOSE: true # optional - envkey_INPUT_PO: "tests/input/input.po" - envkey_ORIGINAL_LANGUAGE: "English" - envkey_CONTEXT_LANGUAGE: "French" - envkey_TARGET_LANGUAGES: "Italian" - envkey_LLM_CLIENT: "ollama" - envkey_LLM_MODEL: "phi3" - envkey_OLLAMA_BASE_URL: "http://localhost:11434/v1" + envkey_VERBOSE: ${{ env.VERBOSE }} # optional + envkey_INPUT_PO: ${{ env.INPUT_PO }} + envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} + envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} + envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} + envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} + envkey_LLM_MODEL: ${{ env.MODEL }} + envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} directory: . From a01c078712a0cce7b79e9330f02ebfaab9bf5fc4 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 19:30:53 +0200 Subject: [PATCH 033/112] trying another syntax randomly as no one can explain why it fails --- .github/workflows/build-package.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 3de7a75..027eb40 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -9,6 +9,7 @@ on: pull_request: branches: [ "main" ] + env: LLM_CLIENT: ollama MODEL: phi3 @@ -25,7 +26,7 @@ jobs: start-ollama-server: uses: ./.github/workflows/run-ollama.yaml with: - model: ${{ env.MODEL }} + model: $$MODEL build: runs-on: ubuntu-latest From d6fbc9f9a354106110d7ccb21b2455845f3d20b2 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 19:48:53 +0200 Subject: [PATCH 034/112] just a basic demo of the issue --- .github/workflows/run-test.yaml | 17 +++++++++++++++++ .github/workflows/test.yaml | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 .github/workflows/run-test.yaml create mode 100644 .github/workflows/test.yaml diff --git a/.github/workflows/run-test.yaml b/.github/workflows/run-test.yaml new file mode 100644 index 0000000..5f83b91 --- /dev/null +++ b/.github/workflows/run-test.yaml @@ -0,0 +1,17 @@ +name: reusable workflow +on: + workflow_call: + inputs: + my_parameter: + required: true + type: string + description: "a parameter for testing" +jobs: + setup-ollama: + runs-on: ubuntu-latest + steps: + - name: simple echo + run: | + echo "reusable test done wit model ${{ inputs.my_parameter }}" + outputs: + test_ready: "true" \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..b598fbb --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,18 @@ +name: Python package +on: + push: + branches: ["*"] +env: + MY_PARAMETER: 'a value' +jobs: + start-reusable-job: + uses: ./.github/workflows/run-test.yaml + with: + my_parameter: ${{env.MY_PARAMETER}} + next-job: + runs-on: ubuntu-latest + needs: start-reusable-job + steps: + - name: test + run: | + echo "it works!" From ada1654a7c89ef31c9dbca1a13533a51f0404dc4 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 20:05:28 +0200 Subject: [PATCH 035/112] big hack trial --- .github/workflows/test.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b598fbb..59f97ea 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,14 +5,24 @@ on: env: MY_PARAMETER: 'a value' jobs: + set-env: + runs-on: ubuntu-latest + outputs: + MY_PARAMETER: ${{ steps.set-output.outputs.MY_PARAMETER }} + steps: + - name: Set environment variable + id: set-output + run: echo "MY_PARAMETER=${{env.MY_PARAMETER}}" >> $GITHUB_ENV + start-reusable-job: + needs: set-env uses: ./.github/workflows/run-test.yaml with: - my_parameter: ${{env.MY_PARAMETER}} + my_parameter: ${{ needs.set-env.outputs.MY_PARAMETER }} next-job: runs-on: ubuntu-latest needs: start-reusable-job steps: - name: test run: | - echo "it works!" + echo "it works with ${{env.MY_PARAMETER}}!" From af989890b7f91f50261f4adca9699e8a3ddf697d Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 27 Aug 2024 20:08:00 +0200 Subject: [PATCH 036/112] better trace --- .github/workflows/run-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-test.yaml b/.github/workflows/run-test.yaml index 5f83b91..770d308 100644 --- a/.github/workflows/run-test.yaml +++ b/.github/workflows/run-test.yaml @@ -7,11 +7,11 @@ on: type: string description: "a parameter for testing" jobs: - setup-ollama: + reusable-job: runs-on: ubuntu-latest steps: - name: simple echo run: | - echo "reusable test done wit model ${{ inputs.my_parameter }}" + echo "reusable test done wit my_parameter= ${{ inputs.my_parameter }}" outputs: test_ready: "true" \ No newline at end of file From 75c9e545f2f20e718dbceea167eda8dee0faa5b5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:07:08 +0200 Subject: [PATCH 037/112] test with a mixtral suggestion (won't work) --- .github/workflows/run-test.yaml | 16 +++++++--------- .github/workflows/test.yaml | 32 ++++++++------------------------ 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/.github/workflows/run-test.yaml b/.github/workflows/run-test.yaml index 770d308..c2278a7 100644 --- a/.github/workflows/run-test.yaml +++ b/.github/workflows/run-test.yaml @@ -1,17 +1,15 @@ -name: reusable workflow +name: Reusable Workflow + on: workflow_call: inputs: - my_parameter: + my_param: required: true type: string - description: "a parameter for testing" + jobs: - reusable-job: + example-job: runs-on: ubuntu-latest steps: - - name: simple echo - run: | - echo "reusable test done wit my_parameter= ${{ inputs.my_parameter }}" - outputs: - test_ready: "true" \ No newline at end of file + - name: Print parameter + run: echo ${{ inputs.my_param }} \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 59f97ea..1d4e4c4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,28 +1,12 @@ -name: Python package -on: - push: - branches: ["*"] +name: Main Workflow + +on: [push] + env: - MY_PARAMETER: 'a value' -jobs: - set-env: - runs-on: ubuntu-latest - outputs: - MY_PARAMETER: ${{ steps.set-output.outputs.MY_PARAMETER }} - steps: - - name: Set environment variable - id: set-output - run: echo "MY_PARAMETER=${{env.MY_PARAMETER}}" >> $GITHUB_ENV + PARAM: value - start-reusable-job: - needs: set-env +jobs: + call-reusable-workflow: uses: ./.github/workflows/run-test.yaml with: - my_parameter: ${{ needs.set-env.outputs.MY_PARAMETER }} - next-job: - runs-on: ubuntu-latest - needs: start-reusable-job - steps: - - name: test - run: | - echo "it works with ${{env.MY_PARAMETER}}!" + my_param: ${{ env.PARAM }} From 4cfb49b40a59374d92b6da99310dbefae6927d92 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:10:25 +0200 Subject: [PATCH 038/112] another mixtral suggestion --- .github/workflows/test.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 1d4e4c4..158a5bb 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -2,11 +2,13 @@ name: Main Workflow on: [push] -env: - PARAM: value - jobs: call-reusable-workflow: - uses: ./.github/workflows/run-test.yaml - with: - my_param: ${{ env.PARAM }} + runs-on: ubuntu-latest + env: + PARAM: value + steps: + - name: Call reusable workflow + uses: ./.github/workflows/reusable.yml + with: + my_param: ${{ env.PARAM }} \ No newline at end of file From d2a13bd20e38198ace713017b9ae2410a304760c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:11:59 +0200 Subject: [PATCH 039/112] typo --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 158a5bb..16caea2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -9,6 +9,6 @@ jobs: PARAM: value steps: - name: Call reusable workflow - uses: ./.github/workflows/reusable.yml + uses: ./.github/workflows/run-test.yml with: my_param: ${{ env.PARAM }} \ No newline at end of file From a429ae90fae6087d4bc75b7070452ca5a6fd64ae Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:12:42 +0200 Subject: [PATCH 040/112] typo --- .github/workflows/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 16caea2..b71dcd0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -9,6 +9,6 @@ jobs: PARAM: value steps: - name: Call reusable workflow - uses: ./.github/workflows/run-test.yml + uses: ./.github/workflows/run-test.yaml with: my_param: ${{ env.PARAM }} \ No newline at end of file From e6f1c9597ff4d34ba571d0576646f547ed7ad8af Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:18:45 +0200 Subject: [PATCH 041/112] suggestion sonnet (won't work) --- .github/workflows/test.yaml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b71dcd0..2a631ef 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -2,13 +2,11 @@ name: Main Workflow on: [push] +env: + PARAM: value + jobs: call-reusable-workflow: - runs-on: ubuntu-latest - env: - PARAM: value - steps: - - name: Call reusable workflow - uses: ./.github/workflows/run-test.yaml - with: - my_param: ${{ env.PARAM }} \ No newline at end of file + uses: ./.github/workflows/run-test.yaml + with: + my_param: ${{ PARAM }} From c77ae011d092f3675126a02e92f7d5b741bdaef5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:24:31 +0200 Subject: [PATCH 042/112] last try --- .github/workflows/run-test.yaml | 6 +----- .github/workflows/test.yaml | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/run-test.yaml b/.github/workflows/run-test.yaml index c2278a7..3dcdb77 100644 --- a/.github/workflows/run-test.yaml +++ b/.github/workflows/run-test.yaml @@ -2,14 +2,10 @@ name: Reusable Workflow on: workflow_call: - inputs: - my_param: - required: true - type: string jobs: example-job: runs-on: ubuntu-latest steps: - name: Print parameter - run: echo ${{ inputs.my_param }} \ No newline at end of file + run: echo "param value=${{ env.PARAM }}" \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2a631ef..a6381ea 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -8,5 +8,3 @@ env: jobs: call-reusable-workflow: uses: ./.github/workflows/run-test.yaml - with: - my_param: ${{ PARAM }} From 7d44e31b3187bb736b4350f82e4646c2672d8780 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:30:56 +0200 Subject: [PATCH 043/112] remove tests yaml files --- .github/workflows/run-test.yaml | 11 ----------- .github/workflows/test.yaml | 10 ---------- 2 files changed, 21 deletions(-) delete mode 100644 .github/workflows/run-test.yaml delete mode 100644 .github/workflows/test.yaml diff --git a/.github/workflows/run-test.yaml b/.github/workflows/run-test.yaml deleted file mode 100644 index 3dcdb77..0000000 --- a/.github/workflows/run-test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: Reusable Workflow - -on: - workflow_call: - -jobs: - example-job: - runs-on: ubuntu-latest - steps: - - name: Print parameter - run: echo "param value=${{ env.PARAM }}" \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index a6381ea..0000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: Main Workflow - -on: [push] - -env: - PARAM: value - -jobs: - call-reusable-workflow: - uses: ./.github/workflows/run-test.yaml From 8ab743134b234afd527d02e53b29e03f8dacb2ac Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:31:12 +0200 Subject: [PATCH 044/112] give up and duplicate model name --- .github/workflows/build-package.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 027eb40..f583e4b 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: env: LLM_CLIENT: ollama - MODEL: phi3 + MODEL: phi3 # WARNING!!! Needs to be duplicated in the start-ollama-server.yaml job! Can't pass ${{env.MODEL}} VERBOSE: true # optional INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English @@ -26,10 +26,11 @@ jobs: start-ollama-server: uses: ./.github/workflows/run-ollama.yaml with: - model: $$MODEL + model: phi3 build: runs-on: ubuntu-latest + needs: start-ollama-server steps: - uses: actions/checkout@v4 - name: Set up Python 3.10 From d4a9b5f3ea311902c2473889fccc36ce88063d0b Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 17:44:25 +0200 Subject: [PATCH 045/112] another try for passing model based on https://github.com/orgs/community/discussions/26671 --- .github/workflows/build-package.yaml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index f583e4b..b670293 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: env: LLM_CLIENT: ollama - MODEL: phi3 # WARNING!!! Needs to be duplicated in the start-ollama-server.yaml job! Can't pass ${{env.MODEL}} + MODEL: phi3 VERBOSE: true # optional INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English @@ -23,10 +23,23 @@ env: jobs: + env-setup: + name: Setup Dynamic Environment Variables + runs-on: ubuntu-latest + outputs: + model: ${{ steps.set-output-defaults.outputs.model }} + steps: + - name: set outputs with default values + id: set-output-defaults + run: | + # If workflow_dispatch, use inputs (left), if other trigger, use default env (right) + echo "::set-output name=model::${{ env.MODEL || 'llama3.1:8b' }}" + start-ollama-server: + needs: [env-setup] uses: ./.github/workflows/run-ollama.yaml with: - model: phi3 + model: ${{needs.env-setup.outputs.model}} build: runs-on: ubuntu-latest From 26e70b3cab93f3a1eb409461df267ef9391b6322 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 28 Aug 2024 18:03:38 +0200 Subject: [PATCH 046/112] give up on using a reusable workflow for the ollama setup --- .github/workflows/build-package.yaml | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index b670293..3011158 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -23,27 +23,22 @@ env: jobs: - env-setup: - name: Setup Dynamic Environment Variables + setup-ollama: runs-on: ubuntu-latest - outputs: - model: ${{ steps.set-output-defaults.outputs.model }} steps: - - name: set outputs with default values - id: set-output-defaults - run: | - # If workflow_dispatch, use inputs (left), if other trigger, use default env (right) - echo "::set-output name=model::${{ env.MODEL || 'llama3.1:8b' }}" + - name: Install Ollama + run: | + curl -fsSL https://ollama.ai/install.sh | sh + ollama --version - start-ollama-server: - needs: [env-setup] - uses: ./.github/workflows/run-ollama.yaml - with: - model: ${{needs.env-setup.outputs.model}} + - name: Start Ollama service and pull the model + run: | + ollama serve & + ollama pull ${{ env.MODEL }} build: runs-on: ubuntu-latest - needs: start-ollama-server + needs: setup-ollama steps: - uses: actions/checkout@v4 - name: Set up Python 3.10 From 9cc12d1e9c1d3e3e313eba509fe1c2412bc846c0 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 19:14:36 +0200 Subject: [PATCH 047/112] use only one job and manage a cache --- .github/workflows/build-package.yaml | 127 ++++++++++++++++----------- 1 file changed, 76 insertions(+), 51 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 3011158..baed64a 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -23,61 +23,86 @@ env: jobs: - setup-ollama: + ollama-job: runs-on: ubuntu-latest steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Cache Ollama + uses: actions/cache@v4 + with: + path: | + ~/.ollama + /usr/local/bin/ollama + key: ${{ runner.os }}-ollama-${{ hashFiles('**/ollama-version.txt') }} + restore-keys: | + ${{ runner.os }}-ollama- + - name: Install Ollama run: | - curl -fsSL https://ollama.ai/install.sh | sh - ollama --version + if [ ! -f /usr/local/bin/ollama ]; then + curl https://ollama.ai/install.sh | sh + fi - - name: Start Ollama service and pull the model - run: | + - name: Start Ollama and wait for it to serve + run: | ollama serve & - ollama pull ${{ env.MODEL }} + sleep 10 - build: - runs-on: ubuntu-latest - needs: setup-ollama - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - - name: Create Venv and Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install --upgrade uv # install python manager - uv sync # create a virtual environment and install dependencies - source .venv/bin/activate # activate the virtual environment - uv pip install -e . # to allow dev package testing - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - uv run flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - uv run flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics - - name: Make envfile - uses: SpicyPizza/create-envfile@v2.0.3 - with: - envkey_VERBOSE: ${{ env.VERBOSE }} # optional - envkey_INPUT_PO: ${{ env.INPUT_PO }} - envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} - envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} - envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} - envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} - envkey_LLM_MODEL: ${{ env.MODEL }} - envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} - envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} - envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} - directory: . - file_name: .env - fail_on_empty: false - sort_keys: false - - name: Test with pytest - run: | - # [[ -f .env ]] && cat .env || echo "No .env file found in root" - # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - uv run pytest -s ./tests \ No newline at end of file + - name: Cache Ollama models + uses: actions/cache@v4 + with: + path: ~/.ollama/models + key: ${{ runner.os }}-ollama-models-${{ hashFiles('**/model-list.txt') }} + + - name: Pull Ollama model + run: | + if [ ! -f ~/.ollama/models/${{ env.MODEL }}.bin ]; then + ollama pull ${{ env.MODEL }} + fi + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' + + - name: Create Venv and Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install --upgrade uv # install python manager + uv sync # create a virtual environment and install dependencies + source .venv/bin/activate # activate the virtual environment + uv pip install -e . # to allow dev package testing + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + uv run flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + uv run flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + + - name: Make envfile + uses: SpicyPizza/create-envfile@v2.0.3 + with: + envkey_VERBOSE: ${{ env.VERBOSE }} # optional + envkey_INPUT_PO: ${{ env.INPUT_PO }} + envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} + envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} + envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} + envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} + envkey_LLM_MODEL: ${{ env.MODEL }} + envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} + envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} + envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} + directory: . + file_name: .env + fail_on_empty: false + sort_keys: false + + - name: Test with pytest + run: | + # [[ -f .env ]] && cat .env || echo "No .env file found in root" + # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" + uv run pytest -s ./tests \ No newline at end of file From bef02d315b64255fa3ac27e15a74a872b961c61e Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 19:52:06 +0200 Subject: [PATCH 048/112] use 2 files to manage the github cache --- .github/workflows/build-package.yaml | 28 ++++++++++++++++++++++------ .github/workflows/model-list.txt | 2 ++ .github/workflows/ollama-version.txt | 3 +++ 3 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/model-list.txt create mode 100644 .github/workflows/ollama-version.txt diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index baed64a..9c32015 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -19,7 +19,13 @@ env: CONTEXT_LANGUAGE: French TARGET_LANGUAGES: Italian # comma separated list OLLAMA_BASE_URL: "http://localhost:11434/v1" - + # 2 files used to cache the Ollama version and model list + # so that they do not need to be downloaded every time + # Touch this file to force it to update Ollama + OLLAMA_VERSION_FILE: '.github/workflows/ollama-version.txt' + # Put in this file a list of all models you want to pull from Ollama, one per line. + # MODEL must be set to one of these + MODEL_LIST_FILE: '.github/workflows/model-list.txt' jobs: @@ -35,7 +41,7 @@ jobs: path: | ~/.ollama /usr/local/bin/ollama - key: ${{ runner.os }}-ollama-${{ hashFiles('**/ollama-version.txt') }} + key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} restore-keys: | ${{ runner.os }}-ollama- @@ -44,6 +50,14 @@ jobs: if [ ! -f /usr/local/bin/ollama ]; then curl https://ollama.ai/install.sh | sh fi + # This can't work because we can't control the version of Ollama + # that is installed. So, just touch the OLLAMA_VERSION_FILE to force it to update + # INSTALLED_VERSION=$(ollama --version) + # DESIRED_VERSION=$(cat ${{ env.OLLAMA_VERSION_FILE }}) + # if [ "$INSTALLED_VERSION" != "$DESIRED_VERSION" ]; then + # echo "Updating Ollama to version $DESIRED_VERSION" + # curl https://ollama.ai/install.sh | sh + # fi - name: Start Ollama and wait for it to serve run: | @@ -54,13 +68,15 @@ jobs: uses: actions/cache@v4 with: path: ~/.ollama/models - key: ${{ runner.os }}-ollama-models-${{ hashFiles('**/model-list.txt') }} + key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} - name: Pull Ollama model run: | - if [ ! -f ~/.ollama/models/${{ env.MODEL }}.bin ]; then - ollama pull ${{ env.MODEL }} - fi + while IFS= read -r model || [[ -n "$model" ]]; do + if [ ! -f ~/.ollama/models/${model}.bin ]; then + ollama pull $model + fi + done < ${{ env.MODEL_LIST_FILE }} - name: Set up Python 3.10 uses: actions/setup-python@v5 diff --git a/.github/workflows/model-list.txt b/.github/workflows/model-list.txt new file mode 100644 index 0000000..6bc2083 --- /dev/null +++ b/.github/workflows/model-list.txt @@ -0,0 +1,2 @@ +phi3:latest +gemma2:2b \ No newline at end of file diff --git a/.github/workflows/ollama-version.txt b/.github/workflows/ollama-version.txt new file mode 100644 index 0000000..4ad50f4 --- /dev/null +++ b/.github/workflows/ollama-version.txt @@ -0,0 +1,3 @@ +# Touch this file or change its contents +# to force the GitHub workflow to install the +# latest version of Ollama. \ No newline at end of file From 60353c788bb10ddd43155215870afa519c52c0b7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 20:16:33 +0200 Subject: [PATCH 049/112] added assert again --- tests/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_main.py b/tests/test_main.py index c1ab751..4d7e43b 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -77,6 +77,6 @@ def test_main(self, params, llm_client, output_file): """ logger.vprint(trans_exp) outfile.write(f'{out} {trans_exp}') - # assert translation == tr['target_translation'] + assert translation == tr['target_translation'] outfile.close() extract_csv_translations(output_file, params) From 44a85c6d8a4b09ac475537556db1d4296f86288c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 20:17:16 +0200 Subject: [PATCH 050/112] restrict the number of tests on Github Actions to 3 --- tests/settings.py | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tests/settings.py b/tests/settings.py index 3d66fe8..46988f3 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -1,8 +1,31 @@ # Where tests results will be stored. Can be overriden on the command line +from os import environ + + OUTPUT_DIRECTORY = "./tests/output" # Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing TEST_TRANSLATIONS = [ + { + "original_phrase": "He gave her a ring.", + "context_translation": "Il lui a donné une bague.", + "target_translation": "Lui ha regalato un anello." + }, + { + "original_phrase": "She made a call.", + "context_translation": "Elle a pris une décision.", + "target_translation": "Lei ha preso una decisione." + }, + { + "original_phrase": "They left the room.", + "context_translation": "Ils ont quitté la pièce.", + "target_translation": "Si sono andati dalla stanza." + }, + { + "original_phrase": "He gave her a ring.", + "context_translation": "Il lui a passé un coup de fil.", + "target_translation": "Lui ha regalato un anello." + }, { "original_phrase": "She broke down", "context_translation": "Elle est tombée en panne", @@ -38,29 +61,13 @@ "context_translation": "Elle a passé un appel.", "target_translation": "Lei ha fatto una chiamata." }, - { - "original_phrase": "She made a call.", - "context_translation": "Elle a pris une décision.", - "target_translation": "Lei ha preso una decisione." - }, - { - "original_phrase": "They left the room.", - "context_translation": "Ils ont quitté la pièce.", - "target_translation": "Si sono andati dalla stanza." - }, { "original_phrase": "They left the room.", "context_translation": "Ils ont laissé la pièce en l'état.", "target_translation": "Si hanno lasciato la stanza." }, - { - "original_phrase": "He gave her a ring.", - "context_translation": "Il lui a donné une bague.", - "target_translation": "Lui ha regalato un anello." - }, - { - "original_phrase": "He gave her a ring.", - "context_translation": "Il lui a passé un coup de fil.", - "target_translation": "Lui ha regalato un anello." - } ] + +# use only 3 first translations for github actions +if environ.get("GITHUB_ACTIONS") == "true": + TEST_TRANSLATIONS = TEST_TRANSLATIONS[:3] From fcd737de4b051450b9cc46da1762b8c93a32ebe1 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 20:17:51 +0200 Subject: [PATCH 051/112] ollama list after pulling for checking what models are installed --- .github/workflows/build-package.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 9c32015..177b524 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -77,6 +77,7 @@ jobs: ollama pull $model fi done < ${{ env.MODEL_LIST_FILE }} + ollama list - name: Set up Python 3.10 uses: actions/setup-python@v5 From 90032c0b99e46e135bf7bd7c4a7dbd7344f50a20 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Thu, 29 Aug 2024 20:18:07 +0200 Subject: [PATCH 052/112] flake8 --- src/auto_po_lyglot/getenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index 46fb6f7..5bd20b2 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -56,7 +56,7 @@ def __init__(self, additional_args=None): self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) logger.set_verbose(self.verbose) - + # original language self.original_language = args.original_language or environ.get('ORIGINAL_LANGUAGE', 'English') # context translation language From fd815c686569f662cf061e72ff7d1d4bd12a8a35 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Fri, 30 Aug 2024 17:37:49 +0200 Subject: [PATCH 053/112] added code for debugging the cache --- .github/workflows/build-package.yaml | 77 +++++++++++++++++++++------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 177b524..290ae0e 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: env: LLM_CLIENT: ollama - MODEL: phi3 + MODEL: gemma2:2b VERBOSE: true # optional INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English @@ -33,52 +33,91 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v3 + + - name: Display file contents + run: | + echo "Ollama version file content:" + cat ${{ env.OLLAMA_VERSION_FILE }} + echo "Model list file content:" + cat ${{ env.MODEL_LIST_FILE }} - name: Cache Ollama - uses: actions/cache@v4 + uses: actions/cache@v3 + id: cache-ollama with: - path: | - ~/.ollama - /usr/local/bin/ollama + path: ~/.ollama key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} restore-keys: | ${{ runner.os }}-ollama- - - name: Install Ollama + - name: Check Ollama cache + run: | + if [ -f ~/.ollama/bin/ollama ]; then + echo "Ollama cache hit" + ls -l ~/.ollama/bin/ollama + else + echo "Ollama cache miss" + fi + + - name: Install or Use Cached Ollama run: | - if [ ! -f /usr/local/bin/ollama ]; then + if [ ! -f ~/.ollama/bin/ollama ]; then + echo "Installing Ollama" curl https://ollama.ai/install.sh | sh + mkdir -p ~/.ollama/bin + cp /usr/local/bin/ollama ~/.ollama/bin/ollama + else + echo "Using cached Ollama" fi - # This can't work because we can't control the version of Ollama - # that is installed. So, just touch the OLLAMA_VERSION_FILE to force it to update - # INSTALLED_VERSION=$(ollama --version) - # DESIRED_VERSION=$(cat ${{ env.OLLAMA_VERSION_FILE }}) - # if [ "$INSTALLED_VERSION" != "$DESIRED_VERSION" ]; then - # echo "Updating Ollama to version $DESIRED_VERSION" - # curl https://ollama.ai/install.sh | sh - # fi + sudo ln -sf ~/.ollama/bin/ollama /usr/local/bin/ollama + ollama --version - name: Start Ollama and wait for it to serve - run: | + run: | ollama serve & sleep 10 - name: Cache Ollama models - uses: actions/cache@v4 + uses: actions/cache@v3 + id: cache-models with: path: ~/.ollama/models key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} - - name: Pull Ollama model + - name: Check models cache + run: | + if [ -d ~/.ollama/models ]; then + echo "Models cache hit" + ls -l ~/.ollama/models + else + echo "Models cache miss" + fi + + - name: Pull Ollama models run: | while IFS= read -r model || [[ -n "$model" ]]; do if [ ! -f ~/.ollama/models/${model}.bin ]; then + echo "Pulling model: $model" ollama pull $model + else + echo "Model already cached: $model" fi done < ${{ env.MODEL_LIST_FILE }} ollama list + - name: Debug cache + if: always() + run: | + echo "Ollama cache restored: ${{ steps.cache-ollama.outputs.cache-hit }}" + echo "Models cache restored: ${{ steps.cache-models.outputs.cache-hit }}" + echo "Ollama version:" + ollama --version + echo "Available models:" + ollama list + echo "Ollama directory content:" + ls -R ~/.ollama + - name: Set up Python 3.10 uses: actions/setup-python@v5 with: From f391d5daff90580b00eec075322b6585a65d15a2 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Fri, 30 Aug 2024 17:56:39 +0200 Subject: [PATCH 054/112] more cache debugging --- .github/workflows/build-package.yaml | 30 +++++++++++----------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 290ae0e..7c1a285 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -35,12 +35,12 @@ jobs: - name: Checkout code uses: actions/checkout@v3 - - name: Display file contents + - name: Display Ollama version run: | echo "Ollama version file content:" cat ${{ env.OLLAMA_VERSION_FILE }} - echo "Model list file content:" - cat ${{ env.MODEL_LIST_FILE }} + echo "Ollama version hash:" + echo ${{ hashFiles(env.OLLAMA_VERSION_FILE) }} - name: Cache Ollama uses: actions/cache@v3 @@ -51,13 +51,11 @@ jobs: restore-keys: | ${{ runner.os }}-ollama- - - name: Check Ollama cache + - name: Debug Cache Ollama run: | - if [ -f ~/.ollama/bin/ollama ]; then - echo "Ollama cache hit" - ls -l ~/.ollama/bin/ollama - else - echo "Ollama cache miss" + echo "Cache hit: ${{ steps.cache-ollama.outputs.cache-hit }}" + if [ "${{ steps.cache-ollama.outputs.cache-hit }}" != 'true' ]; then + echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed." fi - name: Install or Use Cached Ollama @@ -85,13 +83,11 @@ jobs: path: ~/.ollama/models key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} - - name: Check models cache + - name: Debug Cache Models run: | - if [ -d ~/.ollama/models ]; then - echo "Models cache hit" - ls -l ~/.ollama/models - else - echo "Models cache miss" + echo "Models cache hit: ${{ steps.cache-models.outputs.cache-hit }}" + if [ "${{ steps.cache-models.outputs.cache-hit }}" != 'true' ]; then + echo "Models cache miss. This is normal if this is the first run or if the model list has changed." fi - name: Pull Ollama models @@ -106,11 +102,9 @@ jobs: done < ${{ env.MODEL_LIST_FILE }} ollama list - - name: Debug cache + - name: Debug final state if: always() run: | - echo "Ollama cache restored: ${{ steps.cache-ollama.outputs.cache-hit }}" - echo "Models cache restored: ${{ steps.cache-models.outputs.cache-hit }}" echo "Ollama version:" ollama --version echo "Available models:" From e607638ad7a2e63301f1e42d82ae76f14db0bb5a Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Fri, 30 Aug 2024 18:28:27 +0200 Subject: [PATCH 055/112] just for testing the cache --- tests/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/settings.py b/tests/settings.py index 46988f3..b96445b 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -68,6 +68,6 @@ }, ] -# use only 3 first translations for github actions +# use only 2 first translations for github actions if environ.get("GITHUB_ACTIONS") == "true": - TEST_TRANSLATIONS = TEST_TRANSLATIONS[:3] + TEST_TRANSLATIONS = TEST_TRANSLATIONS[:2] From 13339a00fe76f7f95cbf5359ea2631e11f94b573 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 16:57:21 +0200 Subject: [PATCH 056/112] added script generation --- pyproject.toml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 80e282b..a1f1169 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,3 +38,13 @@ Homepage = "https://github.com/leolivier/transpo" Repository = "https://github.com/leolivier/transpo.git" Issues = "https://github.com/leolivier/transpo/issues" Download = "https://github.com/leolivier/transpo/archive/refs/tags/v1.0.0.tar.gz" + +[tool.hatch.build.targets.wheel] +packages = ["src"] + +[project.scripts] +auto_po_lyglot = "src.auto_po_lyglot.po_main:main" + +[tool.hatch.build.targets.wheel.force-include] +"src" = "auto_po_lyglot" + From a605f9826c35310529c490df14e847ef52e7d594 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 19:51:42 +0200 Subject: [PATCH 057/112] first commit --- unset_env | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 unset_env diff --git a/unset_env b/unset_env new file mode 100644 index 0000000..a2336a2 --- /dev/null +++ b/unset_env @@ -0,0 +1,14 @@ +# make sure your env is clean by running `source unset_env` +# warning, this will unset the API keys so if they are not in your .env file, comment the 2 first lines... +unset ANTHROPIC_API_KEY +unset OPENAI_API_KEY +unset USER_PROMPT2 +unset USER_PROMPT1 +unset SYSTEM_PROMPT1 +unset SYSTEM_PROMPT2 +unset USER_PROMPT +unset SYSTEM_PROMPT +unset ORIGINAL_LANGUAGE +unset LLM_MODEL +unset TARGET_LANGUAGES +unset LLM_CLIENT From 8460b67443d49785993cb0213831f12d12d6e46e Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 19:52:00 +0200 Subject: [PATCH 058/112] 1rst commit --- src/auto_po_lyglot/examples.py | 89 ++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/auto_po_lyglot/examples.py diff --git a/src/auto_po_lyglot/examples.py b/src/auto_po_lyglot/examples.py new file mode 100644 index 0000000..d495221 --- /dev/null +++ b/src/auto_po_lyglot/examples.py @@ -0,0 +1,89 @@ +# This file contains some examples of translations in the different languages. These examples will be embedded in the +# system prompt as a guide to the LLM so they must be highly accurate. +# You can specify here 3 kind of examples: basic ones, ambiguous ones and po placeholder ones. +# The examples are providing English, Italian, Spanish, German, Portuguese and French translations and for ambiguous +# examples, orginal and contextual translations are only provided for English/French couple. +# You can another language by simply adding an entry in *ALL* corresponding lists. For ambiguous examples, you can also +# provide other couples than English/French; +# Basic examples is just a list of translations in different languages for the same simple phrase. +basic_examples = [ + { + "English": "Hello", + "French": "Bonjour", + "Italian": "Ciao", + "Spanish": "Hola", + "German": "Hallo", + "Portuguese": "Ola" + }, + { + "English": "Goodbye", + "French": "Au revoir", + "Italian": "Arrivederci", + "Spanish": "Adios", + "German": "Auf Wiedersehen", + "Portuguese": "Tchau" + }, +] + +# Ambiguous examples is a list of translations in different languages for one original phrase and its contextual translation. +ambiguous_examples = [ + { + "original_language": "English", + "context_language": "French", + "explanation": """ +Explanation: This {target_language} translation reflects the meaning of the French phrase, which indicates that the person +made a phone call, not that he gave a ring. The English phrase "He gave her a ring" can be ambiguous, as it can mean both +"giving a ring" and "making a phone call" colloquially. The French translation makes it clear that it is a phone call, so +the {target_language} version "{target_translation}" follows this interpretation.""", + "English": "He gave her a ring.", + "French": "Il lui a passé un coup de fil.", + "Italian": "Le ha fatto una telefonata.", + "Spanish": "Le llamó por teléfono.", + "German": "Er hat sie angerufen.", + "Portuguese": "Ele telefonou-lhe." + }, + { + "original_language": "French", + "context_language": "English", + "explanation": """ +Dans ce contexte, "s'effondrer" fait référence à une rupture émotionnelle plutôt qu'à une défaillance +mécanique, comme le confirme la traduction anglaise "broke down". La traduction {target_language} "{target_translation}" +reflète ce sens de rupture émotionnelle ou physique.""", + "French": "Elle s'est effondrée", + "English": "She broke down", + "Italian": "Si è crollata", + "Spanish": "Ella se derrumbó", + "German": "Sie brach zusammen", + "Portuguese": "Ela se derrubou." + }, +] + +# PO placeholder examples is a list of translations in different languages a sentence containing a set of placeholders. +# The placeholders should represent the different forms of mlaceholers supported by po files ie %(something)s, {something} +# and %s or %d. +po_placeholder_examples = [ + { + "English": "%(follower_name)s has created a new %(followed_type)s: %(followed_object_name)s", + "French": "%(follower_name)s a créé un nouveau %(followed_type)s: %(followed_object_name)s", + "Italian": "%(follower_name)s ha creato un nuovo %(followed_type)s: %(followed_object_name)s", + "Spanish": "%(follower_name)s ha creado un nuevo %(followed_type)s: %(followed_object_name)s", + "German": "%(follower_name)s hat ein neues %(followed_type)s erstellt: %(followed_object_name)s", + "Portuguese": "%(follower_name)s criou um novo %(followed_type)s: %(followed_object_name)s" + }, + { + "English": "{follower_name} has created a new {followed_type}: {followed_object_name}", + "French": "{follower_name} a créé un nouveau {followed_type}: {followed_object_name}", + "Italian": "{follower_name} ha creato un nuovo {followed_type}: {followed_object_name}", + "Spanish": "{follower_name} ha creado un nuevo {followed_type}: {followed_object_name}", + "German": "{follower_name} hat ein neues {followed_type} erstellt: {followed_object_name}", + "Portuguese": "{follower_name} criou um novo {followed_type}: {followed_object_name}" + }, + { + "English": "%s has created a new %s: %s", + "French": "%s a créé un nouveau %s: %s", + "Italian": "%s ha creato un nuovo %s: %s", + "Spanish": "%s ha creado un nuevo %s: %s", + "German": "%s hat ein neues %s erstellt: %s", + "Portuguese": "%s criou um novo %s: %s" + }, +] From bd702ec6a9373f27f03c4855cfa06eb4c6a8b284 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 20:11:12 +0200 Subject: [PATCH 059/112] the system prompt now can take a lot of parameters --- src/auto_po_lyglot/base.py | 67 ++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 1f33bca..835dda1 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -2,6 +2,7 @@ import logging from os import environ import sys +from .examples import po_placeholder_examples, basic_examples, ambiguous_examples class TranspoException(Exception): @@ -41,14 +42,66 @@ def get_system_prompt(self): print("SYSTEM_PROMPT environment variable not set") sys.exit(1) logger.debug("system prompt format: ", format) - prompt_params = { - "original_language": self.params.original_language, - "context_language": self.params.context_language, - "target_language": self.target_language, - } + try: + basic_exemple = basic_examples[0] + assert self.params.original_language in basic_exemple + assert self.params.context_language in basic_exemple + assert self.target_language in basic_exemple + simple_original_phrase = basic_exemple[self.params.original_language] + simple_context_translation = basic_exemple[self.params.context_language] + simple_target_translation = basic_exemple[self.target_language] + for ambiguous_example in ambiguous_examples: + if ambiguous_example['original_language'] == self.params.original_language and \ + ambiguous_example['context_language'] == self.params.context_language: + assert self.params.original_language in ambiguous_example + assert self.params.context_language in ambiguous_example + assert self.target_language in ambiguous_example + ambiguous_original_phrase = ambiguous_example[self.params.original_language] + ambiguous_context_translation = ambiguous_example[self.params.context_language] + ambiguous_target_translation = ambiguous_example[self.target_language] + ambiguous_explanation = ambiguous_example['explanation'] + ambiguous_target_translation = ambiguous_example[self.target_language] + break + if ambiguous_original_phrase is None: + raise TranspoException("ambiguous_examples.py does not contain an ambiguous example for these languages") + + # PO placeholders + assert len(po_placeholder_examples) == 3 + for po_placeholder_example in po_placeholder_examples: + assert self.params.original_language in po_placeholder_example + assert self.params.context_language in po_placeholder_example + assert self.target_language in po_placeholder_example + + prompt_params = { + "original_language": self.params.original_language, + "context_language": self.params.context_language, + "target_language": self.target_language, + "simple_original_phrase": simple_original_phrase, + "simple_context_translation": simple_context_translation, + "simple_target_translation": simple_target_translation, + "ambiguous_original_phrase": ambiguous_original_phrase, + "ambiguous_context_translation": ambiguous_context_translation, + "ambiguous_target_translation": ambiguous_target_translation, + "po_placeholder_original_phrase_1": po_placeholder_examples[0][self.params.original_language], + "po_placeholder_context_translation_1": po_placeholder_examples[0][self.params.context_language], + "po_placeholder_target_translation_1": po_placeholder_examples[0][self.target_language], + "po_placeholder_original_phrase_2": po_placeholder_examples[1][self.params.original_language], + "po_placeholder_context_translation_2": po_placeholder_examples[1][self.params.context_language], + "po_placeholder_target_translation_2": po_placeholder_examples[1][self.target_language], + "po_placeholder_original_phrase_3": po_placeholder_examples[2][self.params.original_language], + "po_placeholder_context_translation_3": po_placeholder_examples[2][self.params.context_language], + "po_placeholder_target_translation_3": po_placeholder_examples[2][self.target_language], + } + except KeyError as e: + raise TranspoException(f"examples.py does not contain an example for these piece: {str(e)}") + + # first format the explanation then add it to the params before formatting the prompt + explanation_params = prompt_params.copy() + explanation_params["target_translation"] = ambiguous_target_translation + prompt_params["ambiguous_explanation"] = ambiguous_explanation.format(**explanation_params) system_prompt = format.format(**prompt_params) if self.first: - logger.info("system prompt:\n", system_prompt) + logger.vprint("First system prompt:\n", system_prompt) self.first = False else: logger.debug("system prompt:\n", system_prompt) @@ -70,7 +123,7 @@ def get_user_prompt(self, phrase, context_translation): def process_translation(self, raw_result): translation_result = raw_result.split('\n') translation = translation_result[0].strip(' "') - explanation = 'Not provided' + explanation = None if len(translation_result) > 1: translation_result.pop(0) translation_result = [line for line in translation_result if line] From f2a28bcef18af9408b5afb7b05317bafd3e60bee Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 20:11:40 +0200 Subject: [PATCH 060/112] added a retry mechanism when the api is overloaded --- src/auto_po_lyglot/claude_client.py | 56 +++++++++++++++++++---------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/src/auto_po_lyglot/claude_client.py b/src/auto_po_lyglot/claude_client.py index e57c9c3..89e59a9 100644 --- a/src/auto_po_lyglot/claude_client.py +++ b/src/auto_po_lyglot/claude_client.py @@ -1,3 +1,4 @@ +from time import sleep from anthropic import Anthropic from .base import TranspoClient, TranspoException, Logger @@ -35,24 +36,41 @@ def get_translation(self, system_prompt, user_prompt): class CachedClaudeClient(ClaudeClient): + first = True def get_translation(self, system_prompt, user_prompt): - try: - # uses a beta endpoint, changes in the future - response = self.client.beta.prompt_caching.messages.create( - model=self.params.model, - max_tokens=1024, - temperature=0.2, - system=[ - { - "type": "text", - "text": system_prompt, - "cache_control": {"type": "ephemeral"} - } - ], - messages=[{"role": "user", "content": user_prompt}], - ) - logger.info("claude cached usage", response.usage) - return response.content[0].text - except Exception as e: - raise TranspoException(str(e)) + retries = 0 + next_retry_in = 1 + max_retries = 5 + while retries < max_retries: + try: + # uses a beta endpoint, changes in the future + response = self.client.beta.prompt_caching.messages.create( + model=self.params.model, + max_tokens=1024, + temperature=0.2, + system=[ + { + "type": "text", + "text": system_prompt, + "cache_control": {"type": "ephemeral"} + } + ], + messages=[{"role": "user", "content": user_prompt}], + ) + if self.first: + self.first = False + logger.vprint("claude cached usage", response.usage) + else: + logger.info("claude cached usage", response.usage) + return response.content[0].text + except Exception as e: + if "overloaded_error" in str(e): + logger.vprint(f"claude cached overloaded error, next retry in {next_retry_in} seconds") + next_retry_in = 2 ** retries + if next_retry_in > 60: # should never happen with max_retries = 5 + next_retry_in = 60 + sleep(next_retry_in) + retries += 1 + continue + raise TranspoException(str(e)) From 506d6a2f0bd0a871782cb280476e475410b27ea5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 20:12:11 +0200 Subject: [PATCH 061/112] added log level setting --- src/auto_po_lyglot/getenv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index 5bd20b2..4d8d2b0 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import logging from dotenv import load_dotenv from os import environ import argparse @@ -54,6 +55,8 @@ def __init__(self, additional_args=None): load_dotenv(override=True) + self.log_level = environ.get('LOG_LEVEL', 'WARNING') + logging.set_levelbasicConfig(self.log_level) self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) logger.set_verbose(self.verbose) From ea1c2e32e505ef6f45839bd95ae161e96b1643e5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 20:13:25 +0200 Subject: [PATCH 062/112] added model in outfilename = better traces --- src/auto_po_lyglot/po_main.py | 47 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/auto_po_lyglot/po_main.py b/src/auto_po_lyglot/po_main.py index 0295baa..a5a3e1c 100755 --- a/src/auto_po_lyglot/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from time import sleep from .getenv import TranspoParams from pathlib import Path import polib @@ -32,16 +33,17 @@ def get_outfile_name(model_name, input_po, target_language, context_language): parent = p.parent grandparent = parent.parent context_lang_code = get_language_code(context_language) + target_code = get_language_code(target_language) if parent.name == 'LC_MESSAGES' and grandparent.name == context_lang_code: # we're in something like .../locale//LC_MESSAGES/file.po # let's try to build the same with the target language code - target_code = get_language_code(target_language) dir = grandparent.parent / target_code / 'LC_MESSAGES' # create the directory if it doesn't exist dir.mkdir(parents=True, exist_ok=True) outfile = dir / p.name - else: # otherwise, just add the target language code - outfile = p.with_suffix('_{target_code}.po') + else: # otherwise, just add the model name and the target language code in the file name + model_name = model_name.replace(':', '-') + outfile = p.with_suffix(f'.{model_name}.{target_code}.po') logger.vprint("Output file:", outfile) if outfile.exists(): @@ -50,7 +52,7 @@ def get_outfile_name(model_name, input_po, target_language, context_language): i_outfile = outfile # append a number to the filename while i_outfile.exists(): - i_outfile = outfile.with_suffix('-{i}.po') + i_outfile = outfile.with_suffix(f'.{i}.po') i += 1 outfile = i_outfile logger.vprint("Output file:", outfile) @@ -95,22 +97,27 @@ def main(): output_file = get_outfile_name(client.params.model, params.input_po, target_language, params.context_language) # Load input .po file po = polib.pofile(params.input_po) - for entry in po: - if entry.msgid and not entry.fuzzy: - context_translation = entry.msgstr if entry.msgstr else entry.msgid - original_phrase = entry.msgid - translation, explanation = client.translate(original_phrase, context_translation).split('\n') - if explanation: - entry.comment = explanation - # Update translation - entry.msgstr = translation - logger.vprint(f"""================== -English: "{original_phrase}" -French: "{context_translation}" -{target_language}: "{translation}" -Comment:{explanation} -""") - # Save the new .po file + try: + for entry in po: + if entry.msgid and not entry.fuzzy: + context_translation = entry.msgstr if entry.msgstr else entry.msgid + original_phrase = entry.msgid + translation, explanation = client.translate(original_phrase, context_translation) + # Add explanation to comment + if explanation: + entry.comment = explanation + # Update translation + entry.msgstr = translation + logger.vprint(f"""================== + {params.original_language}: "{original_phrase}" + {params.context_language}: "{context_translation}" + {target_language}: "{translation}" + Comment:{explanation if explanation else ''} + """) + sleep(1.0) # Sleep for 1 second to avoid rate limiting + except Exception as e: + logger.vprint(f"Error: {e}") + # Save the new .po file even if there was an error to not lose what was translated po.save(output_file) From 43f72e07d324f0d8ad63b24bc2048ad346422d76 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Tue, 3 Sep 2024 20:23:05 +0200 Subject: [PATCH 063/112] big upgrade of the system prompt, added log level, added Portuguese in the default target languages list --- .env.example | 100 ++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/.env.example b/.env.example index f250e3d..32d841b 100644 --- a/.env.example +++ b/.env.example @@ -3,6 +3,9 @@ # Set to True if you want verbose output (recommended at the beginning) VERBOSE=False +# set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) +LOG_LEVEL=INFO + # input.po file context # The input file itself. Usually provided on the command line but can also be set in the .env INPUT_PO=tests/input/input.po @@ -33,62 +36,69 @@ OLLAMA_BASE_URL="http://localhost:11434/v1" # the target languages to test for translation. Give a list of comma separated languages # Can be overriden on the command line (only one laguage in this case) -TARGET_LANGUAGES=Italian,Spanish,German +TARGET_LANGUAGES=Italian,Spanish,German,Portuguese # Two prebuilt system and user prompts, you can create your own ones using new numbers and change the choice below # The first one uses a very long and detailed system prompt and is quite efficient.If you find a better prompt, # please open a PR and provide it to the community -SYSTEM_PROMPT1="You are a helpful, smart translation assistant. You will be given an {original_language} sentence -to be translated to {target_language}. You will also be given a {context_language} translation -for this {original_language} sentence that you will consider for desambiguating the meaning of the -{original_language} sentence. Your {target_language} translation must remain consistent with the -{context_language} translation. +SYSTEM_PROMPT1="You are a highly skilled translator with expertise in {original_language}, {context_language}, and {target_language}. +Your task is to accurately translate the {original_language} text the user provides into {target_language} while preserving the meaning, +tone, and nuance of the original text. +As the provided sentences can be short and ambiguous, the user will also provide an accurate {context_language} translation for this {original_language} +sentence. Please, consider this {context_language} translation for desambiguating the meaning of the {original_language} sentence. Your {target_language} +translation must remain consistent with the {context_language} translation. Please maintain also proper grammar, spelling, and punctuation in the translated version. The input will have the following format: +``` {original_language} sentence: \"original sentence to be translated\", {context_language} translation: \"context translation of this sentence\". -Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes -and with absolutely no words before it. -Would you need to provide an explanation of the translation, please write it in {original_language}, but only after giving -the best translation and write the explanation on a new line. For example, supposing the original language is English, the context translation is in French, -and the target language is Italien, you would receive as input: -English sentence: \"He gave her a ring.\", French translation: \"Il lui a passé un coup de fil.\" - -and your output would be: -\"Le ha dato un anello\" - -Would you need to provide an explanation of the translation, your output would be: -\"Le ha fatto una telefonata.\" -Explanation: This Italian translation reflects the meaning of the French phrase, which indicates that the person made a phone call, not that he gave a ring. -The English phrase \"He gave her a ring\" can be ambiguous, as it can mean both \"giving a ring\" and \"making a phone call\" colloquially. -The French translation makes it clear that it is a phone call, so the Italian version follows this interpretation. - -Another input example: -English sentence: \"She broke down.\", French translation: \"Elle est tombée en panne.\" - +``` +Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes and with absolutely no words before it. +Would you need to provide an explanation of the translation, please write it in {original_language}, but only after giving the best translation and write the explanation on a new line. +For example, if you would receive as input: +``` +{original_language}: \"{simple_original_phrase}\", {context_language} translation: \"{simple_context_translation}\" +``` +your output in {target_language} would be: +``` +\"{simple_target_translation}\" +``` + +Another input example with an ambiguous original sentence for which you need an explanation: +``` +{original_language} sentence: \"{ambiguous_original_phrase}\", {context_language} translation: \"{ambiguous_context_translation}\" +``` and your output would be, assuming an explanation is needed: -\"Si è guastata\" -Explanation: This translation refers to a vehicle or machinery that has stopped working, consistent with the French version that uses \"tomber en panne\", -an idiomatic expression for mechanical failure. - -Now, supposing the original language was Italian, the context translation was a German one, and the target language is Spanish, you would receive as input: -Italian sentence: \"Hanno lasciato la stanza.\", German translation: \"Sie verließen den Raum.\" - -and your output would be: -\"Ellos salieron de la habitación.\" - +``` +\"{ambiguous_target_translation}\" +{ambiguous_explanation} +``` Also, sometimes, the sentence to be translated and its context translation will contain placheholders that you are not allowed to translate and must keep in the same place in your translation. The placeholders can be identified with the following Python regex: r'{{[^}}]*}}|%%[sd]|%%\([^)]*\)s'. Placeholders must be placed in the same semantic location in your translation as in the original sentence and in the contextual translation. -Sometimes, the name of the placeholders can be relevant for understanding the sentence. For instance, this input: -English sentence: \"%%(follower_name)s has created a new %%(followed_type)s: %%(followed_object_name)s\", French translation: \"%%(follower_name)s a créé un nouveau %%(followed_type)s: %%(followed_object_name)s\" - -would be translated in Italian into: -\"%%(follower_name)s ha creato un nuovo %%(followed_type)s: %%(followed_object_name)s\" - +Sometimes, the name of the placeholders can be relevant for understanding the sentence so you can use them to understand the contex but it is very important +that you do not translate them and you keep them in the right place in your translation. For instance, this input: +``` +{original_language} sentence: \"{po_placeholder_original_phrase_1}\\", {context_language} translation: \"{po_placeholder_context_translation_1}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_1}\" +``` and, using another placheolder format: -English sentence: \"{{follower_name}} has created a new {{followed_type}}: {{followed_object_name}}\", French translation: \"{{follower_name}} a créé un nouveau {{followed_type}}: {{followed_object_name}}\" - -would be translated in Italian into: -\"{{follower_name}} ha creato un nuovo {{followed_type}}: {{followed_object_name}}\" +``` +{original_language} sentence: \"{po_placeholder_original_phrase_2}\\", {context_language} translation: \"{po_placeholder_context_translation_2}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_2}\" +``` +Yet another format: +``` +{original_language} sentence: \"{po_placeholder_original_phrase_3}\\", {context_language} translation: \"{po_placeholder_context_translation_3}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_3}\" +``` " USER_PROMPT1="{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" From 655436eb40f6136a05374b8fc72ef2895c49ca21 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 4 Sep 2024 19:05:52 +0200 Subject: [PATCH 064/112] 1rst commit: ignore output po files --- tests/input/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/input/.gitignore diff --git a/tests/input/.gitignore b/tests/input/.gitignore new file mode 100644 index 0000000..5d99c46 --- /dev/null +++ b/tests/input/.gitignore @@ -0,0 +1,2 @@ +*.po +!input.po From 878e5534fa6fe8dd66b04f157c4991677fc23ece Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 4 Sep 2024 19:25:16 +0200 Subject: [PATCH 065/112] remove uv as it looks it does work properly with `pip install -e .` --- .github/workflows/build-package.yaml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 7c1a285..762113c 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -121,17 +121,14 @@ jobs: - name: Create Venv and Install dependencies run: | python -m pip install --upgrade pip - python -m pip install --upgrade uv # install python manager - uv sync # create a virtual environment and install dependencies - source .venv/bin/activate # activate the virtual environment - uv pip install -e . # to allow dev package testing + pip install -e . # to allow dev package testing - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - uv run flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - uv run flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 @@ -155,4 +152,4 @@ jobs: run: | # [[ -f .env ]] && cat .env || echo "No .env file found in root" # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - uv run pytest -s ./tests \ No newline at end of file + pytest -s ./tests \ No newline at end of file From 5cf74252c1caaea20df367653c270ba9522f9c10 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 4 Sep 2024 20:12:05 +0200 Subject: [PATCH 066/112] publish to pytest as pip install -e . doesn't work --- .github/workflows/build-package.yaml | 248 ++++++++++++++------------- 1 file changed, 128 insertions(+), 120 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 762113c..62c730e 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -29,127 +29,135 @@ env: jobs: - ollama-job: + test-with-ollama: runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Display Ollama version - run: | - echo "Ollama version file content:" - cat ${{ env.OLLAMA_VERSION_FILE }} - echo "Ollama version hash:" - echo ${{ hashFiles(env.OLLAMA_VERSION_FILE) }} - - - name: Cache Ollama - uses: actions/cache@v3 - id: cache-ollama - with: - path: ~/.ollama - key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} - restore-keys: | - ${{ runner.os }}-ollama- - - - name: Debug Cache Ollama - run: | - echo "Cache hit: ${{ steps.cache-ollama.outputs.cache-hit }}" - if [ "${{ steps.cache-ollama.outputs.cache-hit }}" != 'true' ]; then - echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed." - fi - - name: Install or Use Cached Ollama - run: | - if [ ! -f ~/.ollama/bin/ollama ]; then - echo "Installing Ollama" - curl https://ollama.ai/install.sh | sh - mkdir -p ~/.ollama/bin - cp /usr/local/bin/ollama ~/.ollama/bin/ollama + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest build + + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics + + - name: Make envfile + uses: SpicyPizza/create-envfile@v2.0.3 + with: + envkey_VERBOSE: ${{ env.VERBOSE }} # optional + envkey_INPUT_PO: ${{ env.INPUT_PO }} + envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} + envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} + envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} + envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} + envkey_LLM_MODEL: ${{ env.MODEL }} + envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} + envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} + envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} + directory: . + file_name: .env + fail_on_empty: false + sort_keys: false + + - name: Build package + run: python -m build + + - name: Publish package distributions to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + user: __token__ + password: ${{ secrets.TEST_PYPI_SECRET_TOKEN }} + + - name: Display Ollama version + run: | + echo "Ollama version file content:" + cat ${{ env.OLLAMA_VERSION_FILE }} + echo "Ollama version hash:" + echo ${{ hashFiles(env.OLLAMA_VERSION_FILE) }} + + - name: Cache Ollama + uses: actions/cache@v3 + id: cache-ollama + with: + path: ~/.ollama + key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} + restore-keys: | + ${{ runner.os }}-ollama- + + - name: Debug Cache Ollama + run: | + echo "Cache hit: ${{ steps.cache-ollama.outputs.cache-hit }}" + if [ "${{ steps.cache-ollama.outputs.cache-hit }}" != 'true' ]; then + echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed." + fi + + - name: Install or Use Cached Ollama + run: | + if [ ! -f ~/.ollama/bin/ollama ]; then + echo "Installing Ollama" + curl https://ollama.ai/install.sh | sh + mkdir -p ~/.ollama/bin + cp /usr/local/bin/ollama ~/.ollama/bin/ollama + else + echo "Using cached Ollama" + fi + sudo ln -sf ~/.ollama/bin/ollama /usr/local/bin/ollama + ollama --version + + - name: Start Ollama and wait for it to serve + run: | + ollama serve & + sleep 10 + + - name: Cache Ollama models + uses: actions/cache@v3 + id: cache-models + with: + path: ~/.ollama/models + key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} + + - name: Debug Cache Models + run: | + echo "Models cache hit: ${{ steps.cache-models.outputs.cache-hit }}" + if [ "${{ steps.cache-models.outputs.cache-hit }}" != 'true' ]; then + echo "Models cache miss. This is normal if this is the first run or if the model list has changed." + fi + + - name: Pull Ollama models + run: | + while IFS= read -r model || [[ -n "$model" ]]; do + if [ ! -f ~/.ollama/models/${model}.bin ]; then + echo "Pulling model: $model" + ollama pull $model else - echo "Using cached Ollama" + echo "Model already cached: $model" fi - sudo ln -sf ~/.ollama/bin/ollama /usr/local/bin/ollama - ollama --version - - - name: Start Ollama and wait for it to serve - run: | - ollama serve & - sleep 10 - - - name: Cache Ollama models - uses: actions/cache@v3 - id: cache-models - with: - path: ~/.ollama/models - key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} - - - name: Debug Cache Models - run: | - echo "Models cache hit: ${{ steps.cache-models.outputs.cache-hit }}" - if [ "${{ steps.cache-models.outputs.cache-hit }}" != 'true' ]; then - echo "Models cache miss. This is normal if this is the first run or if the model list has changed." - fi - - - name: Pull Ollama models - run: | - while IFS= read -r model || [[ -n "$model" ]]; do - if [ ! -f ~/.ollama/models/${model}.bin ]; then - echo "Pulling model: $model" - ollama pull $model - else - echo "Model already cached: $model" - fi - done < ${{ env.MODEL_LIST_FILE }} - ollama list - - - name: Debug final state - if: always() - run: | - echo "Ollama version:" - ollama --version - echo "Available models:" - ollama list - echo "Ollama directory content:" - ls -R ~/.ollama - - - name: Set up Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - - - name: Create Venv and Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e . # to allow dev package testing - - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics - - - name: Make envfile - uses: SpicyPizza/create-envfile@v2.0.3 - with: - envkey_VERBOSE: ${{ env.VERBOSE }} # optional - envkey_INPUT_PO: ${{ env.INPUT_PO }} - envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} - envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} - envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} - envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} - envkey_LLM_MODEL: ${{ env.MODEL }} - envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} - envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} - envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} - directory: . - file_name: .env - fail_on_empty: false - sort_keys: false - - - name: Test with pytest - run: | - # [[ -f .env ]] && cat .env || echo "No .env file found in root" - # [[ -f tests/.env ]] && cat tests/.env || echo "No .env file found in tests" - pytest -s ./tests \ No newline at end of file + done < ${{ env.MODEL_LIST_FILE }} + ollama list + + - name: Debug final state + if: always() + run: | + echo "Ollama version:" + ollama --version + echo "Available models:" + ollama list + echo "Ollama directory content:" + ls -R ~/.ollama + + - name: Test with pytest + run: | + pytest -s ./tests \ No newline at end of file From 629e7764c09e1ad270930e86384a76c5fcfc072f Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 4 Sep 2024 20:19:26 +0200 Subject: [PATCH 067/112] added install from test pypi --- .github/workflows/build-package.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 62c730e..84a7b37 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -81,6 +81,12 @@ jobs: user: __token__ password: ${{ secrets.TEST_PYPI_SECRET_TOKEN }} + - name: Test with pytest (only for testing, to be removed and tested with Ollama if connection error) + run: | + pip install -i https://test.pypi.org/simple/ auto-po-lyglot + pip list | grep auto-po-lyglot + pytest -s ./tests + - name: Display Ollama version run: | echo "Ollama version file content:" @@ -160,4 +166,6 @@ jobs: - name: Test with pytest run: | + pip install -i https://test.pypi.org/simple/ auto-po-lyglot + pip list | grep auto-po-lyglot pytest -s ./tests \ No newline at end of file From 4b9d48e048799531548c37ae58a7b71e94ddc286 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Wed, 4 Sep 2024 20:28:16 +0200 Subject: [PATCH 068/112] changed version number to be able to publish on test pypi --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a1f1169..87d7b26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "auto-po-lyglot" -version = "1.0.0" +version = "1.0.1" #dynamic = ["version"] authors = [ { name="Olivier LEVILLAIN", email="levillain.olivier@gmail.com" }, From db4be7701a6aac0d81b04910635d330f4765bf81 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 18:57:54 +0200 Subject: [PATCH 069/112] removed default prompts and verbose --- .env.example | 97 ++++++++++------------------------------------------ 1 file changed, 18 insertions(+), 79 deletions(-) diff --git a/.env.example b/.env.example index 32d841b..c86a209 100644 --- a/.env.example +++ b/.env.example @@ -1,14 +1,15 @@ # .env.example file to be copied to .evn file and adapted to your needs -# Set to True if you want verbose output (recommended at the beginning) -VERBOSE=False - -# set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) +# set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). This can be overriden on the +# command line (-v = INFO, -vv = DEBUG). Default is WARNING. LOG_LEVEL=INFO # input.po file context # The input file itself. Usually provided on the command line but can also be set in the .env -INPUT_PO=tests/input/input.po +# INPUT_PO=tests/input/input.po +# The output file. Usually provided on the command line but can also be set in the .env. Use auto_po_lyglot -h to see +# how the file name is computed if not provided. +# OUTPUT_PO=tests/output/output.po # Primary language (msgids). Can be overriden on the command line ORIGINAL_LANGUAGE=English @@ -38,77 +39,15 @@ OLLAMA_BASE_URL="http://localhost:11434/v1" # Can be overriden on the command line (only one laguage in this case) TARGET_LANGUAGES=Italian,Spanish,German,Portuguese -# Two prebuilt system and user prompts, you can create your own ones using new numbers and change the choice below -# The first one uses a very long and detailed system prompt and is quite efficient.If you find a better prompt, -# please open a PR and provide it to the community -SYSTEM_PROMPT1="You are a highly skilled translator with expertise in {original_language}, {context_language}, and {target_language}. -Your task is to accurately translate the {original_language} text the user provides into {target_language} while preserving the meaning, -tone, and nuance of the original text. -As the provided sentences can be short and ambiguous, the user will also provide an accurate {context_language} translation for this {original_language} -sentence. Please, consider this {context_language} translation for desambiguating the meaning of the {original_language} sentence. Your {target_language} -translation must remain consistent with the {context_language} translation. Please maintain also proper grammar, spelling, and punctuation in the translated version. -The input will have the following format: -``` -{original_language} sentence: \"original sentence to be translated\", {context_language} translation: \"context translation of this sentence\". -``` -Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes and with absolutely no words before it. -Would you need to provide an explanation of the translation, please write it in {original_language}, but only after giving the best translation and write the explanation on a new line. -For example, if you would receive as input: -``` -{original_language}: \"{simple_original_phrase}\", {context_language} translation: \"{simple_context_translation}\" -``` -your output in {target_language} would be: -``` -\"{simple_target_translation}\" -``` - -Another input example with an ambiguous original sentence for which you need an explanation: -``` -{original_language} sentence: \"{ambiguous_original_phrase}\", {context_language} translation: \"{ambiguous_context_translation}\" -``` -and your output would be, assuming an explanation is needed: -``` -\"{ambiguous_target_translation}\" -{ambiguous_explanation} -``` -Also, sometimes, the sentence to be translated and its context translation will contain placheholders that you are not allowed to translate -and must keep in the same place in your translation. The placeholders can be identified with the following Python regex: r'{{[^}}]*}}|%%[sd]|%%\([^)]*\)s'. -Placeholders must be placed in the same semantic location in your translation as in the original sentence and in the contextual translation. -Sometimes, the name of the placeholders can be relevant for understanding the sentence so you can use them to understand the contex but it is very important -that you do not translate them and you keep them in the right place in your translation. For instance, this input: -``` -{original_language} sentence: \"{po_placeholder_original_phrase_1}\\", {context_language} translation: \"{po_placeholder_context_translation_1}\" -``` -would be translated in {target_language} into: -``` -\"{po_placeholder_target_translation_1}\" -``` -and, using another placheolder format: -``` -{original_language} sentence: \"{po_placeholder_original_phrase_2}\\", {context_language} translation: \"{po_placeholder_context_translation_2}\" -``` -would be translated in {target_language} into: -``` -\"{po_placeholder_target_translation_2}\" -``` -Yet another format: -``` -{original_language} sentence: \"{po_placeholder_original_phrase_3}\\", {context_language} translation: \"{po_placeholder_context_translation_3}\" -``` -would be translated in {target_language} into: -``` -\"{po_placeholder_target_translation_3}\" -``` -" -USER_PROMPT1="{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" - -SYSTEM_PROMPT2="You are a helpful assistant that translates text." -USER_PROMPT2="Translate the following {original_language} sentence into {target_language}, -considering the provided {context_language} context for disambiguation:\n -{original_language}: '{phrase}'\n -{context_language} context: '{context_translation}'\n -Provide only the {target_language} translation." - -# Here you choose which prompt couple to use -SYSTEM_PROMPT=${SYSTEM_PROMPT1} -USER_PROMPT=${USER_PROMPT1} +# One prebuilt system and user prompts are provided by default in `default_prompts.py`. If you want, you can create +# below your own system and user prompts. The system prompt can use the following placeholders: +# {original_language}, {context_language}, {target_language}, {simple_original_phrase}, {simple_context_translation}, +# {simple_target_translation}, {ambiguous_original_phrase}, {ambiguous_context_translation}, {ambiguous_target_translation}, +# {ambiguous_explanation}, {po_placeholder_original_phrase_1}, {po_placeholder_original_phrase_2}, {po_placeholder_original_phrase_3}, +# {po_placeholder_context_translation_1}, {po_placeholder_context_translation_2}, {po_placeholder_context_translation_3}, +# {po_placeholder_target_translation_1}, {po_placeholder_target_translation_2}, {po_placeholder_target_translation_3}. +# (all phrases, explanations and translations are taken from the examples below), +#SYSTEM_PROMPT="You are a highly skilled translator with expertise in {original_language}, {context_language}, and {target_language}..." +# The user prompt can use only the following placeholders: {original_language}, {original_phrase}, {context_language}, {context_translation}, +# also taken from the examples below. +#USER_PROMPT="{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" From 4c5ca0404e07f784a8791fd8a26478003e2b58c5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 18:59:32 +0200 Subject: [PATCH 070/112] fixed package build --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 87d7b26..2eb01fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,11 +40,13 @@ Issues = "https://github.com/leolivier/transpo/issues" Download = "https://github.com/leolivier/transpo/archive/refs/tags/v1.0.0.tar.gz" [tool.hatch.build.targets.wheel] -packages = ["src"] +packages = ["src/auto_po_lyglot"] [project.scripts] -auto_po_lyglot = "src.auto_po_lyglot.po_main:main" +auto_po_lyglot = "auto_po_lyglot.po_main:main" [tool.hatch.build.targets.wheel.force-include] "src" = "auto_po_lyglot" +[tool.hatch.build] +include = ["src"] From 29dd7d53008ce4936f7c04425836884c5cfcbe9c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:00:08 +0200 Subject: [PATCH 071/112] Update log level --- .github/workflows/build-package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 84a7b37..1d0bd75 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -13,7 +13,7 @@ on: env: LLM_CLIENT: ollama MODEL: gemma2:2b - VERBOSE: true # optional + LOG_LEVEL: INFO # optional INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English CONTEXT_LANGUAGE: French @@ -56,7 +56,7 @@ jobs: - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 with: - envkey_VERBOSE: ${{ env.VERBOSE }} # optional + envkey_LOG_LEVEL: ${{ env.LOG_LEVEL }} # optional envkey_INPUT_PO: ${{ env.INPUT_PO }} envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} From 14dfce73d2edd1dc3685fcdbeff7b0de4db73ed3 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:00:25 +0200 Subject: [PATCH 072/112] export choice --- src/auto_po_lyglot/__init__.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/auto_po_lyglot/__init__.py b/src/auto_po_lyglot/__init__.py index e69de29..02f2ce7 100644 --- a/src/auto_po_lyglot/__init__.py +++ b/src/auto_po_lyglot/__init__.py @@ -0,0 +1,15 @@ +from .getenv import ParamsLoader +from .csv_extractor import extract_csv +from .openai_ollama_client import OpenAIAPICompatibleClient, OpenAIClient, OllamaClient +from .claude_client import ClaudeClient, CachedClaudeClient +from base import TranspoClient + +__all__ = [ + 'ParamsLoader', + 'OpenAIAPICompatibleClient', + 'OpenAIClient', + 'OllamaClient', + 'ClaudeClient', + 'CachedClaudeClient', + 'TranspoClient', + 'extract_csv'] From 34132368b5b70feefc943ed4e71a5a1df8dca942 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:01:50 +0200 Subject: [PATCH 073/112] removed specific logger --- src/auto_po_lyglot/claude_client.py | 11 ++++++----- src/auto_po_lyglot/csv_extractor.py | 10 +++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/auto_po_lyglot/claude_client.py b/src/auto_po_lyglot/claude_client.py index 89e59a9..a85fa63 100644 --- a/src/auto_po_lyglot/claude_client.py +++ b/src/auto_po_lyglot/claude_client.py @@ -1,8 +1,9 @@ from time import sleep from anthropic import Anthropic -from .base import TranspoClient, TranspoException, Logger +from .base import TranspoClient, TranspoException +import logging -logger = Logger(__name__) +logger = logging.getLogger(__name__) class ClaudeClient(TranspoClient): @@ -60,13 +61,13 @@ def get_translation(self, system_prompt, user_prompt): ) if self.first: self.first = False - logger.vprint("claude cached usage", response.usage) + logger.info(f"claude cached usage: {response.usage}") else: - logger.info("claude cached usage", response.usage) + logger.debug(f"claude cached usage: {response.usage}") return response.content[0].text except Exception as e: if "overloaded_error" in str(e): - logger.vprint(f"claude cached overloaded error, next retry in {next_retry_in} seconds") + logger.info(f"claude cached overloaded error, next retry in {next_retry_in} seconds") next_retry_in = 2 ** retries if next_retry_in > 60: # should never happen with max_retries = 5 next_retry_in = 60 diff --git a/src/auto_po_lyglot/csv_extractor.py b/src/auto_po_lyglot/csv_extractor.py index d0ed55e..965a4dd 100755 --- a/src/auto_po_lyglot/csv_extractor.py +++ b/src/auto_po_lyglot/csv_extractor.py @@ -4,9 +4,9 @@ import csv import sys import os -from .base import Logger +import logging -logger = Logger(__name__) +logger = logging.getLogger(__name__) def extract_translation(line): @@ -17,7 +17,7 @@ def extract_translation(line): return res -def process_file(input_file, output_file, languages=["English", "French", "Italian", "Spanish", "German"]): +def extract_csv(input_file, output_file, languages=["English", "French", "Italian", "Spanish", "German"]): translations = {} current_key = None @@ -58,8 +58,8 @@ def main(): print(f"Error: Input file '{input_file}' does not exist.") sys.exit(1) - process_file(input_file, output_file) - logger.vprint(f"Conversion complete. CSV file created : {output_file}") + extract_csv(input_file, output_file) + logger.info(f"Conversion complete. CSV file created : {output_file}") if __name__ == "__main__": From e155abad8bd3b71254a375efc6c34dce2e75bed6 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:02:40 +0200 Subject: [PATCH 074/112] removed Logger class + default prompts --- src/auto_po_lyglot/base.py | 86 ++++---------------------------------- 1 file changed, 8 insertions(+), 78 deletions(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 835dda1..6767d9a 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -1,8 +1,9 @@ from abc import ABC, abstractmethod import logging from os import environ -import sys -from .examples import po_placeholder_examples, basic_examples, ambiguous_examples +from .default_prompt import system_prompt, user_prompt, po_placeholder_examples, basic_examples, ambiguous_examples + +logger = logging.getLogger(__name__) class TranspoException(Exception): @@ -15,7 +16,7 @@ def __init__(self, params, target_language=None): # target language can be set later but before any translation. # it can also be changed by the user at any time, the prompt will be updated automatically self.target_language = target_language - logger.info(f"TranspoClient using model {self.params.model}") + logger.debug(f"TranspoClient using model {self.params.model}") self.first = True @abstractmethod @@ -35,12 +36,7 @@ def get_translation(self, phrase, context_translation): ... def get_system_prompt(self): - format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else None - if format is None: - print(self.params.__dict__) - # raise TranspoException("SYSTEM_PROMPT environment variable not set") - print("SYSTEM_PROMPT environment variable not set") - sys.exit(1) + format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else system_prompt logger.debug("system prompt format: ", format) try: basic_exemple = basic_examples[0] @@ -101,14 +97,14 @@ def get_system_prompt(self): prompt_params["ambiguous_explanation"] = ambiguous_explanation.format(**explanation_params) system_prompt = format.format(**prompt_params) if self.first: - logger.vprint("First system prompt:\n", system_prompt) + logger.info(f"First system prompt:\n{system_prompt}") self.first = False else: - logger.debug("system prompt:\n", system_prompt) + logger.debug(f"System prompt:\n{system_prompt}") return system_prompt def get_user_prompt(self, phrase, context_translation): - format = environ.get("USER_PROMPT", None) + format = environ.get("USER_PROMPT", None) or user_prompt if format is None: raise TranspoException("USER_PROMPT environment variable not set") params = { @@ -138,69 +134,3 @@ def translate(self, phrase, context_translation): user_prompt = self.get_user_prompt(phrase, context_translation) raw_result = self.get_translation(system_prompt, user_prompt) return self.process_translation(raw_result) - - -class Logger(): - verbose_mode = False - - def __init__(self, name): - self.logger = logging.getLogger(name) - - def vprint(self, *args, **kwargs): - """Print only if verbose is set""" - if self.verbose_mode: - print(*args, **kwargs) - sys.stdout.flush() - - def info(self, *args, **kwargs): - self.logger.info(*args, **kwargs) - - def debug(self, *args, **kwargs): - self.logger.debug(*args, **kwargs) - - def error(self, *args, **kwargs): - self.logger.error(*args, **kwargs) - - def warning(self, *args, **kwargs): - self.logger.warning(*args, **kwargs) - - def critical(self, *args, **kwargs): - self.logger.critical(*args, **kwargs) - - def exception(self, *args, **kwargs): - self.logger.exception(*args, **kwargs) - - @classmethod - def set_verbose(cls, verbose): - cls.verbose_mode = verbose - - def set_level(self, level): - self.logger.setLevel(level) - - def get_level(self): - return self.logger.getEffectiveLevel() - - def set_format(self, format): - self.logger.handlers[0].setFormatter(logging.Formatter(format)) - - def get_format(self): - return self.logger.handlers[0].formatter - - def set_file(self, filename): - self.logger.addHandler(logging.FileHandler(filename)) - - def get_file(self): - return self.logger.handlers[0] - - def remove_file(self): - self.logger.removeHandler(self.logger.handlers[0]) - - def remove_console(self): - self.logger.removeHandler(self.logger.handlers[1]) - - def remove_all(self): - self.logger.removeHandler(self.logger.handlers[0]) - self.logger.removeHandler(self.logger.handlers[1]) - - -logger = Logger(__name__) From c29485dc5b61b98a0ccb699af292d0129c8688ae Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:03:31 +0200 Subject: [PATCH 075/112] renamed and added default prompts --- src/auto_po_lyglot/default_prompts.py | 175 ++++++++++++++++++++++++++ src/auto_po_lyglot/examples.py | 89 ------------- 2 files changed, 175 insertions(+), 89 deletions(-) create mode 100644 src/auto_po_lyglot/default_prompts.py delete mode 100644 src/auto_po_lyglot/examples.py diff --git a/src/auto_po_lyglot/default_prompts.py b/src/auto_po_lyglot/default_prompts.py new file mode 100644 index 0000000..f41329c --- /dev/null +++ b/src/auto_po_lyglot/default_prompts.py @@ -0,0 +1,175 @@ +# This file contains the default system and user prompts for the LLMs. These prompts can be overriden in the .env file. +# It also contains some examples of translations in the different languages. See after the prompts. + +system_prompt = """ +You are a highly skilled translator with expertise in {original_language}, {context_language}, and {target_language}. +Your task is to accurately translate the {original_language} text the user provides into {target_language} while preserving +the meaning, tone, and nuance of the original text. +As the provided sentences can be short and ambiguous, the user will also provide an accurate {context_language} translation +for this {original_language} sentence. Please, consider this {context_language} translation for desambiguating the meaning +of the {original_language} sentence. Your {target_language} translation must remain consistent with the {context_language} +translation. Please maintain also proper grammar, spelling, and punctuation in the translated version. +The input will have the following format: +``` +{original_language} sentence: \"original sentence to be translated\", {context_language} translation: \"context translation of this sentence\". +``` +Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes and +with absolutely no words before it. +Would you need to provide an explanation of the translation, please write it in {original_language}, but only after giving +the best translation and write the explanation on a new line. +For example, if you would receive as input: +``` +{original_language}: \"{simple_original_phrase}\", {context_language} translation: \"{simple_context_translation}\" +``` +your output in {target_language} would be: +``` +\"{simple_target_translation}\" +``` + +Another input example with an ambiguous original sentence for which you need an explanation: +``` +{original_language} sentence: \"{ambiguous_original_phrase}\", {context_language} translation: \"{ambiguous_context_translation}\" +``` +and your output would be, assuming an explanation is needed: +``` +\"{ambiguous_target_translation}\" +{ambiguous_explanation} +``` +Also, sometimes, the sentence to be translated and its context translation will contain placheholders that you are not allowed +to translate and must keep in the same place in your translation. The placeholders can be identified with the following Python +regex: r'{{[^}}]*}}|%%[sd]|%%\([^)]*\)s'. +Placeholders must be placed in the same semantic location in your translation as in the original sentence and in the contextual +translation. Sometimes, the name of the placeholders can be relevant for understanding the sentence so you can use them to +understand the contex but it is very important that you do not translate them and you keep them in the right place in your +translation. For instance, this input: +``` +{original_language} sentence: \"{po_placeholder_original_phrase_1}\\", {context_language} translation: \"{po_placeholder_context_translation_1}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_1}\" +``` +and, using another placheolder format: +``` +{original_language} sentence: \"{po_placeholder_original_phrase_2}\\", {context_language} translation: \"{po_placeholder_context_translation_2}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_2}\" +``` +Yet another format: +``` +{original_language} sentence: \"{po_placeholder_original_phrase_3}\\", {context_language} translation: \"{po_placeholder_context_translation_3}\" +``` +would be translated in {target_language} into: +``` +\"{po_placeholder_target_translation_3}\" +``` +""" + +user_prompt = "{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" + +###################################################################################### +# EXAMPLES OF TRANSLATIONS IN DIFFERENT LANGUAGES # +###################################################################################### + +# The values in the examples below will be embedded in the system and user prompts as a guide to the LLM so they must be +# highly accurate. +# You can specify here 3 kind of examples: basic ones, ambiguous ones and po placeholder ones. +# All examples are providing English, Italian, Spanish, German, Portuguese and French translations. +# You can provide another language by simply adding an entry in *ALL* corresponding lists. +# For ambiguous examples, orginal and contextual translations are only provided for English/French couple. +# You can also provide other originale/contextual couples than English/French for ambiguous examples + +# ========= BASIC EXAMPLES ============================================================= +# Basic examples is just a list of translations in different languages for the same simple phrase. +# The examples are providing English, French, Italian, Spanish, German and Portuguese translations. +# They are used to fill the simple_original_phrase, simple_context_translation and simple_target_translation placeholders in +# the system prompt +basic_examples = [ + { + "English": "Hello", + "French": "Bonjour", + "Italian": "Ciao", + "Spanish": "Hola", + "German": "Hallo", + "Portuguese": "Ola" + }, + { + "English": "Goodbye", + "French": "Au revoir", + "Italian": "Arrivederci", + "Spanish": "Adios", + "German": "Auf Wiedersehen", + "Portuguese": "Tchau" + }, +] + +# ========= AMBIGUOUS EXAMPLES ============================================================= +# Ambiguous examples is a list of translations in different languages for one original phrase and its contextual translation. +# These examples are used to fill the ambiguous_original_phrase, ambiguous_context_translation, ambiguous_target_translation +# and ambiguous_explanation placeholders in the system prompt +ambiguous_examples = [ + { + "original_language": "English", + "context_language": "French", + "explanation": """ +Explanation: This {target_language} translation reflects the meaning of the French phrase, which indicates that the person +made a phone call, not that he gave a ring. The English phrase "He gave her a ring" can be ambiguous, as it can mean both +"giving a ring" and "making a phone call" colloquially. The French translation makes it clear that it is a phone call, so +the {target_language} version "{target_translation}" follows this interpretation.""", + "English": "He gave her a ring.", + "French": "Il lui a passé un coup de fil.", + "Italian": "Le ha fatto una telefonata.", + "Spanish": "Le llamó por teléfono.", + "German": "Er hat sie angerufen.", + "Portuguese": "Ele telefonou-lhe." + }, + { + "original_language": "French", + "context_language": "English", + "explanation": """ +Dans ce contexte, "s'effondrer" fait référence à une rupture émotionnelle plutôt qu'à une défaillance +mécanique, comme le confirme la traduction anglaise "broke down". La traduction {target_language} "{target_translation}" +reflète ce sens de rupture émotionnelle ou physique.""", + "French": "Elle s'est effondrée", + "English": "She broke down", + "Italian": "Si è crollata", + "Spanish": "Ella se derrumbó", + "German": "Sie brach zusammen", + "Portuguese": "Ela se derrubou." + }, +] + +# ========= PO PLACHEHOLDER EXAMPLES ============================================================= + +# PO placeholder examples is a list of translations in different languages a sentence containing a set of placeholders. +# The placeholders should represent the different forms of mlaceholers supported by po files ie %(something)s, {something} +# and %s or %d. The examples are used to fill the po_placeholder_original_phrase_N, po_placeholder_context_translation_N, +# po_placeholder_target_translation_N placeholders in the system prompt +po_placeholder_examples = [ + { + "English": "%(follower_name)s has created a new %(followed_type)s: %(followed_object_name)s", + "French": "%(follower_name)s a créé un nouveau %(followed_type)s: %(followed_object_name)s", + "Italian": "%(follower_name)s ha creato un nuovo %(followed_type)s: %(followed_object_name)s", + "Spanish": "%(follower_name)s ha creado un nuevo %(followed_type)s: %(followed_object_name)s", + "German": "%(follower_name)s hat ein neues %(followed_type)s erstellt: %(followed_object_name)s", + "Portuguese": "%(follower_name)s criou um novo %(followed_type)s: %(followed_object_name)s" + }, + { + "English": "{follower_name} has created a new {followed_type}: {followed_object_name}", + "French": "{follower_name} a créé un nouveau {followed_type}: {followed_object_name}", + "Italian": "{follower_name} ha creato un nuovo {followed_type}: {followed_object_name}", + "Spanish": "{follower_name} ha creado un nuevo {followed_type}: {followed_object_name}", + "German": "{follower_name} hat ein neues {followed_type} erstellt: {followed_object_name}", + "Portuguese": "{follower_name} criou um novo {followed_type}: {followed_object_name}" + }, + { + "English": "%s has created a new %s: %s", + "French": "%s a créé un nouveau %s: %s", + "Italian": "%s ha creato un nuovo %s: %s", + "Spanish": "%s ha creado un nuevo %s: %s", + "German": "%s hat ein neues %s erstellt: %s", + "Portuguese": "%s criou um novo %s: %s" + }, +] diff --git a/src/auto_po_lyglot/examples.py b/src/auto_po_lyglot/examples.py deleted file mode 100644 index d495221..0000000 --- a/src/auto_po_lyglot/examples.py +++ /dev/null @@ -1,89 +0,0 @@ -# This file contains some examples of translations in the different languages. These examples will be embedded in the -# system prompt as a guide to the LLM so they must be highly accurate. -# You can specify here 3 kind of examples: basic ones, ambiguous ones and po placeholder ones. -# The examples are providing English, Italian, Spanish, German, Portuguese and French translations and for ambiguous -# examples, orginal and contextual translations are only provided for English/French couple. -# You can another language by simply adding an entry in *ALL* corresponding lists. For ambiguous examples, you can also -# provide other couples than English/French; -# Basic examples is just a list of translations in different languages for the same simple phrase. -basic_examples = [ - { - "English": "Hello", - "French": "Bonjour", - "Italian": "Ciao", - "Spanish": "Hola", - "German": "Hallo", - "Portuguese": "Ola" - }, - { - "English": "Goodbye", - "French": "Au revoir", - "Italian": "Arrivederci", - "Spanish": "Adios", - "German": "Auf Wiedersehen", - "Portuguese": "Tchau" - }, -] - -# Ambiguous examples is a list of translations in different languages for one original phrase and its contextual translation. -ambiguous_examples = [ - { - "original_language": "English", - "context_language": "French", - "explanation": """ -Explanation: This {target_language} translation reflects the meaning of the French phrase, which indicates that the person -made a phone call, not that he gave a ring. The English phrase "He gave her a ring" can be ambiguous, as it can mean both -"giving a ring" and "making a phone call" colloquially. The French translation makes it clear that it is a phone call, so -the {target_language} version "{target_translation}" follows this interpretation.""", - "English": "He gave her a ring.", - "French": "Il lui a passé un coup de fil.", - "Italian": "Le ha fatto una telefonata.", - "Spanish": "Le llamó por teléfono.", - "German": "Er hat sie angerufen.", - "Portuguese": "Ele telefonou-lhe." - }, - { - "original_language": "French", - "context_language": "English", - "explanation": """ -Dans ce contexte, "s'effondrer" fait référence à une rupture émotionnelle plutôt qu'à une défaillance -mécanique, comme le confirme la traduction anglaise "broke down". La traduction {target_language} "{target_translation}" -reflète ce sens de rupture émotionnelle ou physique.""", - "French": "Elle s'est effondrée", - "English": "She broke down", - "Italian": "Si è crollata", - "Spanish": "Ella se derrumbó", - "German": "Sie brach zusammen", - "Portuguese": "Ela se derrubou." - }, -] - -# PO placeholder examples is a list of translations in different languages a sentence containing a set of placeholders. -# The placeholders should represent the different forms of mlaceholers supported by po files ie %(something)s, {something} -# and %s or %d. -po_placeholder_examples = [ - { - "English": "%(follower_name)s has created a new %(followed_type)s: %(followed_object_name)s", - "French": "%(follower_name)s a créé un nouveau %(followed_type)s: %(followed_object_name)s", - "Italian": "%(follower_name)s ha creato un nuovo %(followed_type)s: %(followed_object_name)s", - "Spanish": "%(follower_name)s ha creado un nuevo %(followed_type)s: %(followed_object_name)s", - "German": "%(follower_name)s hat ein neues %(followed_type)s erstellt: %(followed_object_name)s", - "Portuguese": "%(follower_name)s criou um novo %(followed_type)s: %(followed_object_name)s" - }, - { - "English": "{follower_name} has created a new {followed_type}: {followed_object_name}", - "French": "{follower_name} a créé un nouveau {followed_type}: {followed_object_name}", - "Italian": "{follower_name} ha creato un nuovo {followed_type}: {followed_object_name}", - "Spanish": "{follower_name} ha creado un nuevo {followed_type}: {followed_object_name}", - "German": "{follower_name} hat ein neues {followed_type} erstellt: {followed_object_name}", - "Portuguese": "{follower_name} criou um novo {followed_type}: {followed_object_name}" - }, - { - "English": "%s has created a new %s: %s", - "French": "%s a créé un nouveau %s: %s", - "Italian": "%s ha creato un nuovo %s: %s", - "Spanish": "%s ha creado un nuevo %s: %s", - "German": "%s hat ein neues %s erstellt: %s", - "Portuguese": "%s criou um novo %s: %s" - }, -] From cf41753d696e90eab5a910ec8fe8b6c319e650b6 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:04:45 +0200 Subject: [PATCH 076/112] use default prompts --- src/auto_po_lyglot/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index 6767d9a..de3c0ba 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod import logging from os import environ -from .default_prompt import system_prompt, user_prompt, po_placeholder_examples, basic_examples, ambiguous_examples +from .default_prompts import system_prompt, user_prompt, po_placeholder_examples, basic_examples, ambiguous_examples logger = logging.getLogger(__name__) From c1bd1f0415d8266e1def61dfba2ada2630fa3748 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:05:54 +0200 Subject: [PATCH 077/112] removed logger, use new input/output params, show prompts command --- src/auto_po_lyglot/po_main.py | 46 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/auto_po_lyglot/po_main.py b/src/auto_po_lyglot/po_main.py index a5a3e1c..c9c6ca6 100755 --- a/src/auto_po_lyglot/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -1,12 +1,15 @@ #!/usr/bin/env python -from time import sleep -from .getenv import TranspoParams -from pathlib import Path -import polib -from .base import Logger + import langcodes +import logging +import polib +from pathlib import Path +from time import sleep + +from .getenv import ParamsLoader +from .default_prompts import system_prompt, user_prompt -logger = Logger(__name__) +logger = logging.getLogger(__name__) def get_language_code(language_name): @@ -45,9 +48,9 @@ def get_outfile_name(model_name, input_po, target_language, context_language): model_name = model_name.replace(':', '-') outfile = p.with_suffix(f'.{model_name}.{target_code}.po') - logger.vprint("Output file:", outfile) + logger.info("Output file: {outfile}") if outfile.exists(): - logger.vprint("Output file already exists, won't overwrite.") + logger.info("Output file already exists, won't overwrite.") i = 0 i_outfile = outfile # append a number to the filename @@ -55,7 +58,7 @@ def get_outfile_name(model_name, input_po, target_language, context_language): i_outfile = outfile.with_suffix(f'.{i}.po') i += 1 outfile = i_outfile - logger.vprint("Output file:", outfile) + logger.info("Corrected output file: {outfile}") return outfile @@ -76,25 +79,20 @@ def main(): None """ - additional_args = [ - { - 'arg': '--input_po', - 'env': 'INPUT_PO', - 'type': str, - 'help': 'the .po file containing the msgids (phrases to be translated) and msgstrs (context translations)', - 'default': 'tests/input/input.po' - }, - ] + params = ParamsLoader() - params = TranspoParams(additional_args) + if params.show_prompts: + print(f">>>>>>>>>>System prompt:\n{system_prompt}\n\n>>>>>>>>>>>>User prompt:\n{user_prompt}") + exit(0) client = params.get_client() - logger.vprint(f"Using model {client.params.model} to translate {params.input_po} from {params.original_language} -> " - f"{params.context_language} -> {params.test_target_languages} with an {params.llm_client} client") + logger.info(f"Using model {client.params.model} to translate {params.input_po} from {params.original_language} -> " + f"{params.context_language} -> {params.test_target_languages} with an {params.llm_client} client") for target_language in params.test_target_languages: client.target_language = target_language - output_file = get_outfile_name(client.params.model, params.input_po, target_language, params.context_language) + output_file = params.output_po or get_outfile_name(client.params.model, params.input_po, + target_language, params.context_language) # Load input .po file po = polib.pofile(params.input_po) try: @@ -108,7 +106,7 @@ def main(): entry.comment = explanation # Update translation entry.msgstr = translation - logger.vprint(f"""================== + logger.info(f"""================== {params.original_language}: "{original_phrase}" {params.context_language}: "{context_translation}" {target_language}: "{translation}" @@ -116,7 +114,7 @@ def main(): """) sleep(1.0) # Sleep for 1 second to avoid rate limiting except Exception as e: - logger.vprint(f"Error: {e}") + logger.info(f"Error: {e}, trying next translation") # Save the new .po file even if there was an error to not lose what was translated po.save(output_file) From 9293b003408b716b3fe37173c9f4df1810c395fc Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:07:28 +0200 Subject: [PATCH 078/112] removed specific logger, added new params (-i, -o, -s, -vv) --- src/auto_po_lyglot/getenv.py | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index 4d8d2b0..a968a28 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -3,12 +3,11 @@ from dotenv import load_dotenv from os import environ import argparse -from .base import Logger -logger = Logger(__name__) +logger = logging.getLogger(__name__) -class TranspoParams: +class ParamsLoader: description = """ Creates a .po translation file based on an existing one using a given model and llm type. It reads the parameters from the command line and completes them if necessary from the .env in the same directory. @@ -19,11 +18,14 @@ class TranspoParams: def parse_args(self, additional_args=None): parser = argparse.ArgumentParser(description=self.description) # Add arguments - parser.add_argument('--llm', + parser.add_argument('-p', '--show_prompts', + action='store_true', + help='show the prompts used for translations and exits') + parser.add_argument('-l', '--llm', type=str, help='Le type of LLM you want to use. Can be openai, ollama, claude or claude_cached. ' 'For openai or claude[_cached], you need to set the api key in the environment') - parser.add_argument('--model', + parser.add_argument('-m', '--model', type=str, help='the name of the model to use. If not provided, a default model ' 'will be used, based on the chosen client') @@ -36,7 +38,20 @@ def parse_args(self, additional_args=None): parser.add_argument('--target_language', type=str, help='the language into which the original phrase will be translated') - parser.add_argument('--verbose', action='store_true', help='verbose mode') + parser.add_argument('-i', '--input_po', + type=str, + help='the .po file containing the msgids (phrases to be translated) ' + 'and msgstrs (context translations)') + parser.add_argument('-o', '--output_po', + type=str, + help='the .po file where the translated results will be written. If not provided, ' + 'it will be created in the same directory as the input_po except if the input po file has ' + 'the specific format .../locale//LC_MESSAGES/. ' + 'In this case, the output po file will be created as ' + '.../locale//LC_MESSAGES/.') + + parser.add_argument('-v', '--verbose', action='store_true', help='verbose mode') + parser.add_argument('-vv', '--debug', action='store_true', help='verbose mode') if additional_args: for arg in additional_args: if arg.get('action'): @@ -53,12 +68,19 @@ def __init__(self, additional_args=None): args = self.parse_args(additional_args) + if args.show_prompts: + self.show_prompts = True + return # will exit just after showing prompts, no need to continue + load_dotenv(override=True) - self.log_level = environ.get('LOG_LEVEL', 'WARNING') - logging.set_levelbasicConfig(self.log_level) - self.verbose = args.verbose or bool(environ.get('VERBOSE', False)) - logger.set_verbose(self.verbose) + if args.debug or (not args.verbose and environ.get('LOG_LEVEL', None) == 'DEBUG'): + self.log_level = logging.DEBUG + elif args.verbose or environ.get('LOG_LEVEL', None) == 'INFO': + self.log_level = logging.INFO + else: + self.log_level = logging.WARNING + logging.getLogger().setLevel(self.log_level) # original language self.original_language = args.original_language or environ.get('ORIGINAL_LANGUAGE', 'English') @@ -80,6 +102,9 @@ def __init__(self, additional_args=None): self.system_prompt = environ.get('SYSTEM_PROMPT', None) self.user_prompt = environ.get('USER_PROMPT', None) + self.input_po = args.input_po or environ.get('INPUT_PO', None) + self.output_po = args.output_po or environ.get('OUTPUT_PO', None) + # generic processing of additional arguments if additional_args: for argument in additional_args: From 4f25c78abd6ad989c710d0be6287773e3a9b9155 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:07:56 +0200 Subject: [PATCH 079/112] adapted tests --- tests/test_main.py | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 4d7e43b..49e3fb1 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,18 +1,14 @@ #!/usr/bin/env python -from auto_po_lyglot.getenv import TranspoParams -from auto_po_lyglot.base import Logger -from auto_po_lyglot.csv_extractor import process_file +from auto_po_lyglot import ParamsLoader +# from auto_po_lyglot import process_file from .settings import OUTPUT_DIRECTORY, TEST_TRANSLATIONS -from pathlib import PurePath, Path -import sys +from pathlib import Path import pytest -logger = Logger(__name__) - @pytest.fixture(scope="class") def params(): - return TranspoParams([ + return ParamsLoader([ {'arg': 'testdir', 'type': str, 'help': 'test directory'}, {'arg': '-s', 'action': 'store_true', 'help': 'don\'t capture outputs'}, ]) @@ -26,7 +22,7 @@ def llm_client(params): @pytest.fixture(scope="class") def output_file(llm_client): p = Path(OUTPUT_DIRECTORY) - logger.vprint("Output directory:", p) + print("Output directory:", p) if not p.is_dir(): p.mkdir(parents=True, exist_ok=True) basefile_name = f"{llm_client.params.model.replace(':', '-')}_output%i.md" @@ -34,19 +30,19 @@ def output_file(llm_client): while True: outfile_name = p / (basefile_name % i) if not outfile_name.exists(): - logger.vprint("Output file:", outfile_name) + print("Output file:", outfile_name) return outfile_name i += 1 -def extract_csv_translations(output_file, params): - csv_file = PurePath(output_file).with_suffix('.csv') - if not output_file.exists(): - print(f"Error: Input file '{output_file}' does not exist.") - sys.exit(1) - languages = [params.original_language, params.context_language] + params.test_target_languages - process_file(output_file, csv_file, languages) - logger.vprint("CSV extracted to file:", csv_file) +# def extract_csv_translations(output_file, params): +# csv_file = PurePath(output_file).with_suffix('.csv') +# if not output_file.exists(): +# print(f"Error: Input file '{output_file}' does not exist.") +# sys.exit(1) +# languages = [params.original_language, params.context_language] + params.test_target_languages +# process_file(output_file, csv_file, languages) +# print("CSV extracted to file:", csv_file) class TestTranspo: @@ -56,8 +52,8 @@ def setup(self, params, llm_client, output_file): def test_main(self, params, llm_client, output_file): - logger.vprint(f"Using model {llm_client.params.model} for {params.original_language} -> {params.context_language} -> " - f"{params.test_target_languages} with an {params.llm_client} client") + print(f"Using model {llm_client.params.model} for {params.original_language} -> {params.context_language} -> " + f"{params.test_target_languages} with an {params.llm_client} client") with output_file.open('w', newline='', encoding='utf-8') as outfile: for target_language in params.test_target_languages: llm_client.target_language = target_language @@ -67,7 +63,7 @@ def test_main(self, params, llm_client, output_file): "original_phrase": "{tr['original_phrase']}", # {params.original_language} "context_translation": "{tr['context_translation']}", # {params.context_language} "target_translation": """ - logger.vprint(out, end='') + print(out, end='') translation, explanation = llm_client.translate(tr['original_phrase'], tr['context_translation']) comment = explanation.replace('\n', '\n# ') trans_exp = f"""{translation} # {target_language} @@ -75,8 +71,8 @@ def test_main(self, params, llm_client, output_file): }}, """ - logger.vprint(trans_exp) + print(trans_exp) outfile.write(f'{out} {trans_exp}') assert translation == tr['target_translation'] outfile.close() - extract_csv_translations(output_file, params) + # extract_csv_translations(output_file, params) From 6b9be098713a15792dbca084d086aad13d9438b7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:10:15 +0200 Subject: [PATCH 080/112] noqa on prompt --- src/auto_po_lyglot/default_prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/default_prompts.py b/src/auto_po_lyglot/default_prompts.py index f41329c..0b8a8e2 100644 --- a/src/auto_po_lyglot/default_prompts.py +++ b/src/auto_po_lyglot/default_prompts.py @@ -65,7 +65,7 @@ ``` \"{po_placeholder_target_translation_3}\" ``` -""" +""" # noqa user_prompt = "{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" From ffd6762c7e82a2764b4cba9b66dfdb5636e748f9 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:17:52 +0200 Subject: [PATCH 081/112] changed ambiguous variable names --- src/auto_po_lyglot/base.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index de3c0ba..c6bd031 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -1,7 +1,12 @@ from abc import ABC, abstractmethod import logging from os import environ -from .default_prompts import system_prompt, user_prompt, po_placeholder_examples, basic_examples, ambiguous_examples +from .default_prompts import ( + system_prompt as default_system_prompt, + user_prompt as default_user_prompt, + po_placeholder_examples, + basic_examples, + ambiguous_examples) logger = logging.getLogger(__name__) @@ -36,7 +41,7 @@ def get_translation(self, phrase, context_translation): ... def get_system_prompt(self): - format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else system_prompt + format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else default_system_prompt logger.debug("system prompt format: ", format) try: basic_exemple = basic_examples[0] @@ -104,7 +109,7 @@ def get_system_prompt(self): return system_prompt def get_user_prompt(self, phrase, context_translation): - format = environ.get("USER_PROMPT", None) or user_prompt + format = environ.get("USER_PROMPT", None) or default_user_prompt if format is None: raise TranspoException("USER_PROMPT environment variable not set") params = { From d0054e139d7877f803f54368f2e4f887902616f5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 19:19:09 +0200 Subject: [PATCH 082/112] typo --- src/auto_po_lyglot/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/__init__.py b/src/auto_po_lyglot/__init__.py index 02f2ce7..a03b7c1 100644 --- a/src/auto_po_lyglot/__init__.py +++ b/src/auto_po_lyglot/__init__.py @@ -2,7 +2,7 @@ from .csv_extractor import extract_csv from .openai_ollama_client import OpenAIAPICompatibleClient, OpenAIClient, OllamaClient from .claude_client import ClaudeClient, CachedClaudeClient -from base import TranspoClient +from .base import TranspoClient __all__ = [ 'ParamsLoader', From 7082a6ac547e18f43c9a5fadc20bea10bcc382a4 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 20:32:21 +0200 Subject: [PATCH 083/112] missing f-strings on logging --- src/auto_po_lyglot/po_main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/auto_po_lyglot/po_main.py b/src/auto_po_lyglot/po_main.py index c9c6ca6..0857f7a 100755 --- a/src/auto_po_lyglot/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -48,7 +48,7 @@ def get_outfile_name(model_name, input_po, target_language, context_language): model_name = model_name.replace(':', '-') outfile = p.with_suffix(f'.{model_name}.{target_code}.po') - logger.info("Output file: {outfile}") + logger.info(f"Output file: {outfile}") if outfile.exists(): logger.info("Output file already exists, won't overwrite.") i = 0 @@ -58,7 +58,7 @@ def get_outfile_name(model_name, input_po, target_language, context_language): i_outfile = outfile.with_suffix(f'.{i}.po') i += 1 outfile = i_outfile - logger.info("Corrected output file: {outfile}") + logger.info(f"Corrected output file: {outfile}") return outfile @@ -94,6 +94,8 @@ def main(): output_file = params.output_po or get_outfile_name(client.params.model, params.input_po, target_language, params.context_language) # Load input .po file + assert params.input_po, "Input .po file not provided" + assert Path(params.input_po).exists(), f"Input .po file {params.input_po} does not exist" po = polib.pofile(params.input_po) try: for entry in po: From 32e9880f0c6ce9cb60c0f6f083d80e553c0a63cf Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 20:37:13 +0200 Subject: [PATCH 084/112] fixed set log level --- src/auto_po_lyglot/getenv.py | 44 +++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index a968a28..5094c3e 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -3,10 +3,44 @@ from dotenv import load_dotenv from os import environ import argparse +import sys logger = logging.getLogger(__name__) +def set_all_loggers_level(level): + logger.info(f"Setting all loggers to level {logging.getLevelName(level)}") + + handler = logging.StreamHandler(sys.stderr) + handler.setLevel(level) + formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + + for name in logging.root.manager.loggerDict: + if not name.startswith('auto_po_lyglot.'): + continue + nlogger = logging.getLogger(name) + nlogger.handlers = [] + nlogger.addHandler(handler) + nlogger.setLevel(level) + nlogger.propagate = False + + root = logging.getLogger() + root.handlers = [] + root.addHandler(handler) + root.setLevel(level) + + +# def inspect_logger(logger): +# print(f"Logger: {logger.name}") +# print(f" Level: {logging.getLevelName(logger.level)}") +# print(f" Propagate: {logger.propagate}") +# print(" Handlers:") +# for idx, handler in enumerate(logger.handlers): +# print(f" Handler {idx}: {type(handler).__name__}") +# print(f" Level: {logging.getLevelName(handler.level)}") + + class ParamsLoader: description = """ Creates a .po translation file based on an existing one using a given model and llm type. @@ -51,7 +85,7 @@ def parse_args(self, additional_args=None): '.../locale//LC_MESSAGES/.') parser.add_argument('-v', '--verbose', action='store_true', help='verbose mode') - parser.add_argument('-vv', '--debug', action='store_true', help='verbose mode') + parser.add_argument('-vv', '--debug', action='store_true', help='debug mode') if additional_args: for arg in additional_args: if arg.get('action'): @@ -68,9 +102,11 @@ def __init__(self, additional_args=None): args = self.parse_args(additional_args) - if args.show_prompts: + if args.show_prompts: self.show_prompts = True return # will exit just after showing prompts, no need to continue + else: + self.show_prompts = False load_dotenv(override=True) @@ -80,7 +116,7 @@ def __init__(self, additional_args=None): self.log_level = logging.INFO else: self.log_level = logging.WARNING - logging.getLogger().setLevel(self.log_level) + set_all_loggers_level(self.log_level) # original language self.original_language = args.original_language or environ.get('ORIGINAL_LANGUAGE', 'English') @@ -114,6 +150,8 @@ def __init__(self, additional_args=None): val = getattr(args, arg) or environ.get(argument.get('env'), argument.get('default', None)) setattr(self, arg, val) + logger.debug(f"Params: {self.__dict__}") + def get_client(self): if not self._client: From 981921a07f81052282763cdd69a229ed09c62ab7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 20:37:30 +0200 Subject: [PATCH 085/112] fixed wrong message --- src/auto_po_lyglot/po_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/po_main.py b/src/auto_po_lyglot/po_main.py index 0857f7a..0ff2acc 100755 --- a/src/auto_po_lyglot/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -116,7 +116,7 @@ def main(): """) sleep(1.0) # Sleep for 1 second to avoid rate limiting except Exception as e: - logger.info(f"Error: {e}, trying next translation") + logger.info(f"Error: {e}") # Save the new .po file even if there was an error to not lose what was translated po.save(output_file) From 34f1b59da7697e3fdaf13bba76b279a36fb564c7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 21:14:06 +0200 Subject: [PATCH 086/112] fixed get prompt --- src/auto_po_lyglot/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/auto_po_lyglot/base.py b/src/auto_po_lyglot/base.py index c6bd031..91701b6 100644 --- a/src/auto_po_lyglot/base.py +++ b/src/auto_po_lyglot/base.py @@ -1,6 +1,5 @@ from abc import ABC, abstractmethod import logging -from os import environ from .default_prompts import ( system_prompt as default_system_prompt, user_prompt as default_user_prompt, @@ -41,8 +40,9 @@ def get_translation(self, phrase, context_translation): ... def get_system_prompt(self): - format = self.params.system_prompt if hasattr(self.params, 'system_prompt') else default_system_prompt + format = self.params.system_prompt or default_system_prompt logger.debug("system prompt format: ", format) + # print("default system prompt format: ", default_system_prompt) try: basic_exemple = basic_examples[0] assert self.params.original_language in basic_exemple @@ -109,7 +109,7 @@ def get_system_prompt(self): return system_prompt def get_user_prompt(self, phrase, context_translation): - format = environ.get("USER_PROMPT", None) or default_user_prompt + format = self.params.user_prompt or default_user_prompt if format is None: raise TranspoException("USER_PROMPT environment variable not set") params = { From f6e1145d054f0d92ffbaf20fd39654635a80795e Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 21:14:22 +0200 Subject: [PATCH 087/112] removed backslahes --- src/auto_po_lyglot/default_prompts.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/auto_po_lyglot/default_prompts.py b/src/auto_po_lyglot/default_prompts.py index 0b8a8e2..e26683a 100644 --- a/src/auto_po_lyglot/default_prompts.py +++ b/src/auto_po_lyglot/default_prompts.py @@ -11,7 +11,7 @@ translation. Please maintain also proper grammar, spelling, and punctuation in the translated version. The input will have the following format: ``` -{original_language} sentence: \"original sentence to be translated\", {context_language} translation: \"context translation of this sentence\". +{original_language} sentence: "original sentence to be translated", {context_language} translation: "context translation of this sentence". ``` Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes and with absolutely no words before it. @@ -19,20 +19,20 @@ the best translation and write the explanation on a new line. For example, if you would receive as input: ``` -{original_language}: \"{simple_original_phrase}\", {context_language} translation: \"{simple_context_translation}\" +{original_language}: "{simple_original_phrase}", {context_language} translation: "{simple_context_translation}" ``` your output in {target_language} would be: ``` -\"{simple_target_translation}\" +"{simple_target_translation}" ``` Another input example with an ambiguous original sentence for which you need an explanation: ``` -{original_language} sentence: \"{ambiguous_original_phrase}\", {context_language} translation: \"{ambiguous_context_translation}\" +{original_language} sentence: "{ambiguous_original_phrase}", {context_language} translation: "{ambiguous_context_translation}" ``` and your output would be, assuming an explanation is needed: ``` -\"{ambiguous_target_translation}\" +"{ambiguous_target_translation}" {ambiguous_explanation} ``` Also, sometimes, the sentence to be translated and its context translation will contain placheholders that you are not allowed @@ -43,31 +43,31 @@ understand the contex but it is very important that you do not translate them and you keep them in the right place in your translation. For instance, this input: ``` -{original_language} sentence: \"{po_placeholder_original_phrase_1}\\", {context_language} translation: \"{po_placeholder_context_translation_1}\" +{original_language} sentence: "{po_placeholder_original_phrase_1}", {context_language} translation: "{po_placeholder_context_translation_1}" ``` would be translated in {target_language} into: ``` -\"{po_placeholder_target_translation_1}\" +"{po_placeholder_target_translation_1}" ``` and, using another placheolder format: ``` -{original_language} sentence: \"{po_placeholder_original_phrase_2}\\", {context_language} translation: \"{po_placeholder_context_translation_2}\" +{original_language} sentence: "{po_placeholder_original_phrase_2}", {context_language} translation: "{po_placeholder_context_translation_2}" ``` would be translated in {target_language} into: ``` -\"{po_placeholder_target_translation_2}\" +"{po_placeholder_target_translation_2}" ``` Yet another format: ``` -{original_language} sentence: \"{po_placeholder_original_phrase_3}\\", {context_language} translation: \"{po_placeholder_context_translation_3}\" +{original_language} sentence: "{po_placeholder_original_phrase_3}", {context_language} translation: "{po_placeholder_context_translation_3}" ``` would be translated in {target_language} into: ``` -\"{po_placeholder_target_translation_3}\" +"{po_placeholder_target_translation_3}" ``` """ # noqa -user_prompt = "{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\"" +user_prompt = """{original_language} sentence: "{original_phrase}", {context_language} translation: "{context_translation}" """ ###################################################################################### # EXAMPLES OF TRANSLATIONS IN DIFFERENT LANGUAGES # From c260dbe817ee660dd169acf5bb848f9a92576f90 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sat, 7 Sep 2024 21:14:36 +0200 Subject: [PATCH 088/112] traces on prompts --- src/auto_po_lyglot/getenv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index 5094c3e..f1d231f 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -136,7 +136,12 @@ def __init__(self, additional_args=None): self.test_target_languages = environ.get('TARGET_LANGUAGES', 'Spanish').split(',') self.system_prompt = environ.get('SYSTEM_PROMPT', None) + if self.system_prompt: + logger.debug(f"SYSTEM_PROMPT environment variable is set to '{self.system_prompt}'") + self.user_prompt = environ.get('USER_PROMPT', None) + if self.user_prompt: + logger.debug(f"USER_PROMPT environment variable is set to '{self.user_prompt}'") self.input_po = args.input_po or environ.get('INPUT_PO', None) self.output_po = args.output_po or environ.get('OUTPUT_PO', None) From 796b2f84f5b42a2087fad37a34ea5a62fa304218 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:22:01 +0200 Subject: [PATCH 089/112] v1.0.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2eb01fa..8f47f0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "auto-po-lyglot" -version = "1.0.1" +version = "1.0.2" #dynamic = ["version"] authors = [ { name="Olivier LEVILLAIN", email="levillain.olivier@gmail.com" }, From 81ebb7a63bab8b579a17550d145bd8fc3e32c4c7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:26:27 +0200 Subject: [PATCH 090/112] removed upload to test pypi --- .github/workflows/build-package.yaml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 1d0bd75..bf4bfa2 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -71,19 +71,9 @@ jobs: fail_on_empty: false sort_keys: false - - name: Build package - run: python -m build - - - name: Publish package distributions to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ - user: __token__ - password: ${{ secrets.TEST_PYPI_SECRET_TOKEN }} - - name: Test with pytest (only for testing, to be removed and tested with Ollama if connection error) run: | - pip install -i https://test.pypi.org/simple/ auto-po-lyglot + pip install . pip list | grep auto-po-lyglot pytest -s ./tests From 7f79371d9bedb53fd16712c64c00b87963dd28ef Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:28:27 +0200 Subject: [PATCH 091/112] removed blocking trace step --- .github/workflows/build-package.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index bf4bfa2..04b51bc 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -71,12 +71,6 @@ jobs: fail_on_empty: false sort_keys: false - - name: Test with pytest (only for testing, to be removed and tested with Ollama if connection error) - run: | - pip install . - pip list | grep auto-po-lyglot - pytest -s ./tests - - name: Display Ollama version run: | echo "Ollama version file content:" @@ -156,6 +150,5 @@ jobs: - name: Test with pytest run: | - pip install -i https://test.pypi.org/simple/ auto-po-lyglot - pip list | grep auto-po-lyglot + pip install . pytest -s ./tests \ No newline at end of file From 016994948c7120d5e36329fb71a6b455b877f98c Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:48:42 +0200 Subject: [PATCH 092/112] removed vars for prompts + trace of .env before running tests --- .github/workflows/build-package.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 04b51bc..eb86282 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -13,7 +13,7 @@ on: env: LLM_CLIENT: ollama MODEL: gemma2:2b - LOG_LEVEL: INFO # optional + LOG_LEVEL: INFO INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English CONTEXT_LANGUAGE: French @@ -56,7 +56,7 @@ jobs: - name: Make envfile uses: SpicyPizza/create-envfile@v2.0.3 with: - envkey_LOG_LEVEL: ${{ env.LOG_LEVEL }} # optional + envkey_LOG_LEVEL: ${{ env.LOG_LEVEL }} envkey_INPUT_PO: ${{ env.INPUT_PO }} envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }} envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} @@ -64,8 +64,6 @@ jobs: envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} envkey_LLM_MODEL: ${{ env.MODEL }} envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} - envkey_SYSTEM_PROMPT: ${{ vars.SYSTEM_PROMPT }} - envkey_USER_PROMPT: ${{ vars.USER_PROMPT }} directory: . file_name: .env fail_on_empty: false @@ -151,4 +149,6 @@ jobs: - name: Test with pytest run: | pip install . + echo "Running pytest with .env file:" + cat .env pytest -s ./tests \ No newline at end of file From 005fd2471adafba3001364d40d483b5adcb25ee2 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:53:23 +0200 Subject: [PATCH 093/112] traces of loaded params at INFO level --- src/auto_po_lyglot/getenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index f1d231f..b869018 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -155,7 +155,7 @@ def __init__(self, additional_args=None): val = getattr(args, arg) or environ.get(argument.get('env'), argument.get('default', None)) setattr(self, arg, val) - logger.debug(f"Params: {self.__dict__}") + logger.info(f"Loaded Params: {self.__dict__}") def get_client(self): if not self._client: From 8a31c2643dcd93ddacb6b5e698443739aaaab046 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 11:58:44 +0200 Subject: [PATCH 094/112] reintroduced trace step before loading ollama for testing params --- .github/workflows/build-package.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index eb86282..6ab098f 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -69,6 +69,13 @@ jobs: fail_on_empty: false sort_keys: false + - name: Test with pytest (remove this step after testing params are ok) + run: | + pip install . + echo "Running pytest with .env file:" + cat .env + pytest -s ./tests + - name: Display Ollama version run: | echo "Ollama version file content:" From fd5fa31bde7115e241f8d8187d406dbaa1e486dd Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:21:42 +0200 Subject: [PATCH 095/112] added warning in the doc --- tests/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/settings.py b/tests/settings.py index b96445b..b6ea391 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -5,6 +5,7 @@ OUTPUT_DIRECTORY = "./tests/output" # Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing +# WARNING: assumes target language is Italian! TEST_TRANSLATIONS = [ { "original_phrase": "He gave her a ring.", From f1d5df825d5d1d284b2637bc1555e5c12ad5700d Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:22:18 +0200 Subject: [PATCH 096/112] measure percentage of translated messages --- src/auto_po_lyglot/po_main.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/auto_po_lyglot/po_main.py b/src/auto_po_lyglot/po_main.py index 0ff2acc..156487c 100755 --- a/src/auto_po_lyglot/po_main.py +++ b/src/auto_po_lyglot/po_main.py @@ -98,6 +98,7 @@ def main(): assert Path(params.input_po).exists(), f"Input .po file {params.input_po} does not exist" po = polib.pofile(params.input_po) try: + nb_translations = 0 for entry in po: if entry.msgid and not entry.fuzzy: context_translation = entry.msgstr if entry.msgstr else entry.msgid @@ -115,10 +116,14 @@ def main(): Comment:{explanation if explanation else ''} """) sleep(1.0) # Sleep for 1 second to avoid rate limiting + nb_translations += 1 except Exception as e: logger.info(f"Error: {e}") # Save the new .po file even if there was an error to not lose what was translated po.save(output_file) + percent_translated = round(nb_translations / len(po) * 100, 2) + logger.info(f"Saved {output_file}, translated {nb_translations} entries out " + f"of {len(po)} entries ({percent_translated}%)") if __name__ == "__main__": From d13d17164de1eca145420b15ca513154b0aea5c5 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:23:10 +0200 Subject: [PATCH 097/112] changed model and target language to check if taken into account --- .github/workflows/build-package.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 6ab098f..dae9f4a 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -17,7 +17,7 @@ env: INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English CONTEXT_LANGUAGE: French - TARGET_LANGUAGES: Italian # comma separated list + TARGET_LANGUAGES: German # comma separated list OLLAMA_BASE_URL: "http://localhost:11434/v1" # 2 files used to cache the Ollama version and model list # so that they do not need to be downloaded every time From 4722530819857eae561417718d71ed0cb670be53 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:27:01 +0200 Subject: [PATCH 098/112] changed model name to see if taken into account --- .github/workflows/build-package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index dae9f4a..ae441ff 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,12 +12,12 @@ on: env: LLM_CLIENT: ollama - MODEL: gemma2:2b + MODEL: phi3 LOG_LEVEL: INFO INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English CONTEXT_LANGUAGE: French - TARGET_LANGUAGES: German # comma separated list + TARGET_LANGUAGES: Italian # comma separated list OLLAMA_BASE_URL: "http://localhost:11434/v1" # 2 files used to cache the Ollama version and model list # so that they do not need to be downloaded every time From 5552be6264152623304a99c9efb284f2ab6f1976 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:32:01 +0200 Subject: [PATCH 099/112] new traces to understand why model is none --- .github/workflows/build-package.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index ae441ff..3c863fd 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: env: LLM_CLIENT: ollama - MODEL: phi3 + LLM_MODEL: phi3 LOG_LEVEL: INFO INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English @@ -24,7 +24,7 @@ env: # Touch this file to force it to update Ollama OLLAMA_VERSION_FILE: '.github/workflows/ollama-version.txt' # Put in this file a list of all models you want to pull from Ollama, one per line. - # MODEL must be set to one of these + # LLM_MODEL must be set to one of these MODEL_LIST_FILE: '.github/workflows/model-list.txt' jobs: @@ -62,7 +62,7 @@ jobs: envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }} envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }} envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }} - envkey_LLM_MODEL: ${{ env.MODEL }} + envkey_LLM_MODEL: ${{ env.LLM_MODEL }} envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }} directory: . file_name: .env @@ -74,6 +74,8 @@ jobs: pip install . echo "Running pytest with .env file:" cat .env + echo "Running pytest with environment variables:" + env | grep -E 'LOG_LEVEL|INPUT_PO|ORIGINAL_LANGUAGE|CONTEXT_LANGUAGE|TARGET_LANGUAGES|LLM_CLIENT|LLM_MODEL|OLLAMA_BASE_URL' pytest -s ./tests - name: Display Ollama version From c0377ab49a40426fc8059bd9fc04b1eae8cec00a Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 12:38:37 +0200 Subject: [PATCH 100/112] removed test step --- .github/workflows/build-package.yaml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 3c863fd..1ed598c 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -69,15 +69,6 @@ jobs: fail_on_empty: false sort_keys: false - - name: Test with pytest (remove this step after testing params are ok) - run: | - pip install . - echo "Running pytest with .env file:" - cat .env - echo "Running pytest with environment variables:" - env | grep -E 'LOG_LEVEL|INPUT_PO|ORIGINAL_LANGUAGE|CONTEXT_LANGUAGE|TARGET_LANGUAGES|LLM_CLIENT|LLM_MODEL|OLLAMA_BASE_URL' - pytest -s ./tests - - name: Display Ollama version run: | echo "Ollama version file content:" @@ -160,4 +151,7 @@ jobs: pip install . echo "Running pytest with .env file:" cat .env - pytest -s ./tests \ No newline at end of file + # EXTREMELY WEIRD: if you remove these 2 lines, the test fails because LLM_MODEL is not set. + echo "Running pytest with environment variables:" + env | grep -E 'LOG_LEVEL|INPUT_PO|ORIGINAL_LANGUAGE|CONTEXT_LANGUAGE|TARGET_LANGUAGES|LLM_CLIENT|LLM_MODEL|OLLAMA_BASE_URL' + pytest -s ./tests From 619ffcde99cc483372891769f4ff74b3cf0599fd Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 16:01:48 +0200 Subject: [PATCH 101/112] removed tries folder --- tries/ollama-chat.py | 42 ------------------------- tries/requirements.txt | 6 ---- tries/transpo_prompts.py | 33 -------------------- tries/transpo_try1.py | 38 ----------------------- tries/transpo_try2.py | 60 ------------------------------------ tries/transpo_try3.py | 30 ------------------ tries/transpo_try4-openai.py | 37 ---------------------- 7 files changed, 246 deletions(-) delete mode 100755 tries/ollama-chat.py delete mode 100644 tries/requirements.txt delete mode 100644 tries/transpo_prompts.py delete mode 100644 tries/transpo_try1.py delete mode 100644 tries/transpo_try2.py delete mode 100644 tries/transpo_try3.py delete mode 100644 tries/transpo_try4-openai.py diff --git a/tries/ollama-chat.py b/tries/ollama-chat.py deleted file mode 100755 index 38c0d1c..0000000 --- a/tries/ollama-chat.py +++ /dev/null @@ -1,42 +0,0 @@ -import requests -import json -import gradio as gr - -url = "http://localhost:11434/api/generate" - -headers = { - 'Content-Type': 'application/json', -} - -conversation_history = [] - -def generate_response(prompt): - conversation_history.append(prompt) - - full_prompt = "\n".join(conversation_history) - - data = { - "model": "mistral", - "stream": False, - "prompt": full_prompt, - } - - response = requests.post(url, headers=headers, data=json.dumps(data)) - - if response.status_code == 200: - response_text = response.text - data = json.loads(response_text) - actual_response = data["response"] - conversation_history.append(actual_response) - return actual_response - else: - print("Error:", response.status_code, response.text) - return None - -iface = gr.Interface( - fn=generate_response, - inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), - outputs="text" -) - -iface.launch() diff --git a/tries/requirements.txt b/tries/requirements.txt deleted file mode 100644 index 42cbfe8..0000000 --- a/tries/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -polib -transformers -torch -sentencepiece -sacremoses -protobuf diff --git a/tries/transpo_prompts.py b/tries/transpo_prompts.py deleted file mode 100644 index 78a60a6..0000000 --- a/tries/transpo_prompts.py +++ /dev/null @@ -1,33 +0,0 @@ -from base import TranspoPrompts - - -class TranspoPromptsImpl1(TranspoPrompts): - - def get_system_prompt(self): - return f""" -You are a helpful, smart translation assistant. You will be given an {self.original_language} sentence -to be translated to {self.target_language}. You will also be given a {self.context_language} translation -for this {self.original_language} sentence that you will consider for desambiguating the meaning of the -{self.original_language} sentence. Your {self.target_language} translation must be consistent with the -{self.context_language} translation. -Please respond with the best translation you find for the {self.context_language} sentence. If you need to provide -an explanation of the translation, please do so but only after giving the best translation and on another line.""" - - def get_user_prompt(self, phrase, context_translation): - return (f"""{self.original_language} sentence: "{phrase}", {self.context_language} translation:""" - f""" "{context_translation}", {self.target_language}:""") - - -class TranspoPromptsImpl2(TranspoPrompts): - - def get_system_prompt(self): - return "You are a helpful assistant that translates text." - - def get_user_prompt(self, phrase, context_translation): - return ( - f"Translate the following {self.original_language} sentence into {self.target_language}," - f"considering the provided {self.context_language} context for disambiguation:\n" - f"{self.original_language}: '{phrase}'\n" - f"{self.context_language} context: '{context_translation}'\n" - f"Provide only the {self.target_language} translation." - ) diff --git a/tries/transpo_try1.py b/tries/transpo_try1.py deleted file mode 100644 index 57fa867..0000000 --- a/tries/transpo_try1.py +++ /dev/null @@ -1,38 +0,0 @@ -import polib -from transformers import pipeline - - -def translate_po_file(input_file, output_file): - # Load multilingual translation template - translator = pipeline("translation", model="facebook/mbart-large-50-many-to-many-mmt") - - # Load input .po file - po = polib.pofile(input_file) - - # Browse each entry and translate - for entry in po: - if entry.msgid and not entry.fuzzy: - # Preparing the context and the text to be translated - context = entry.msgstr if entry.msgstr else entry.msgid - text_to_translate = entry.msgid - - # Building the prompt - prompt = f"Translate to Spanish. Context: {context}\nText: {text_to_translate}" - - # Translate into Spanish - translation = translator(prompt, src_lang="en_XX", tgt_lang="es_XX")[0]['translation_text'] - - # Extract the translated part (after "Text: ") - translation = translation.split("Text: ")[-1].strip() - - # Update translation - entry.msgstr = translation - - # Save the new .po file - po.save(output_file) - - -# Example of use -input_file = "input.po" -output_file = "output.es.po" -translate_po_file(input_file, output_file) \ No newline at end of file diff --git a/tries/transpo_try2.py b/tries/transpo_try2.py deleted file mode 100644 index a430c62..0000000 --- a/tries/transpo_try2.py +++ /dev/null @@ -1,60 +0,0 @@ -import polib -from transformers import pipeline -import re - -regex = re.compile(r'{[^}]*}|%[sd]|%\([^)]*\)s') - - -def replace_vars(text): - placeholders = {} - for i, match in enumerate(re.finditer(regex, text)): - placeholder = f'__VAR{i}__' - placeholders[placeholder] = match.group() - text = text.replace(match.group(), placeholder, 1) - return text, placeholders - - -def restore_vars(text, placeholders): - for placeholder, original in placeholders.items(): - text = text.replace(placeholder, original) - return text - - -def translate_po_file(input_file, output_file): - # Load multilingual translation template - translator = pipeline("translation", model="facebook/mbart-large-50-many-to-many-mmt") - - # Load input .po file - po = polib.pofile(input_file) - - # Browse each entry and translate - for entry in po: - if entry.msgid and not entry.fuzzy: - # Preparing the context and the text to be translated - context = entry.msgstr if entry.msgstr else entry.msgid - context, ctxt_placeholders = replace_vars(context) - text_to_translate = entry.msgid - text_to_translate_no_placeholders, placeholders = replace_vars(text_to_translate) - - # Building the prompt - prompt = f"Translate to Spanish. Context: {context}\nText: {text_to_translate_no_placeholders}" - - # Translate into Spanish - translation = translator(prompt, src_lang="en_XX", tgt_lang="es_XX")[0]['translation_text'] - - # Extract the translated part (after "Text: ") - translation = translation.split("Text: ")[-1].strip() - - # Reinsert placeholders into the translated text - translation = restore_vars(translation, placeholders) - - # Update translation - entry.msgstr = translation - - print(f"Text to translate: {text_to_translate}\nContext: {context}\nTranslation: {translation}") - # Save the new .po file - po.save(output_file) - - -# Example of use -translate_po_file("input.po", "output.es.po") \ No newline at end of file diff --git a/tries/transpo_try3.py b/tries/transpo_try3.py deleted file mode 100644 index fa1a486..0000000 --- a/tries/transpo_try3.py +++ /dev/null @@ -1,30 +0,0 @@ -from transformers import AutoTokenizer, AutoModelForSeq2SeqLM - -# Charger le modèle mT5 et le tokenizer -model_name = "google/mt5-large" -tokenizer = AutoTokenizer.from_pretrained(model_name) -model = AutoModelForSeq2SeqLM.from_pretrained(model_name) - -# Phrase en anglais à traduire -text_to_translate = "He went to the bank" - -# Traduction contextuelle en français (connue) -context_translation = "Il est allé au bord de la rivière" - -# Création du prompt avec une structure claire -prompt = ( - f"Translate the English sentence to Spanish using the French context provided.\n\n" - f"English: {text_to_translate}\n" - f"French context: {context_translation}\n" - f"Spanish:" -) - -# Tokeniser le prompt -inputs = tokenizer(prompt, return_tensors="pt") - -# Générer la traduction en espagnol -translated_tokens = model.generate(**inputs) -translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) - -# Afficher la traduction en espagnol -print(translated_text) diff --git a/tries/transpo_try4-openai.py b/tries/transpo_try4-openai.py deleted file mode 100644 index cfe5736..0000000 --- a/tries/transpo_try4-openai.py +++ /dev/null @@ -1,37 +0,0 @@ -import openai -# from dotenv import load_dotenv -# load_dotenv() - -# English sentence to translate -text_to_translate = "She made a call." - -# Context provided in French -context_translation = "Elle a pris une décision." - -# Prompt for GPT-4 -prompt = ( - f"Translate the following English sentence into Spanish, considering the provided French context for disambiguation:\n" - f"English: '{text_to_translate}'\n" - f"French context: '{context_translation}'\n" - f"Provide only the Spanish translation." -) - -# Call the OpenAI API to generate the response -client = openai.OpenAI() - - -response = client.chat.completions.create( - model="gpt-4o-2024-08-06", - messages=[ - {"role": "system", "content": "You are a helpful assistant that translates text."}, - {"role": "user", "content": prompt} - ], - max_tokens=60, - n=1, - stop=None, - temperature=0.3 -) - -# Display Spanish translation -spanish_translation = response.choices[0].message.content.strip() -print(spanish_translation) From a873e2a017a3e8eabb1e842f503074c60412f7d2 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 16:03:37 +0200 Subject: [PATCH 102/112] added a temparture parameter --- .env.example | 4 ++- src/auto_po_lyglot/claude_client.py | 4 +-- src/auto_po_lyglot/getenv.py | 31 ++++++++++++++-------- src/auto_po_lyglot/openai_ollama_client.py | 2 +- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/.env.example b/.env.example index c86a209..9f762d3 100644 --- a/.env.example +++ b/.env.example @@ -38,7 +38,9 @@ OLLAMA_BASE_URL="http://localhost:11434/v1" # the target languages to test for translation. Give a list of comma separated languages # Can be overriden on the command line (only one laguage in this case) TARGET_LANGUAGES=Italian,Spanish,German,Portuguese - +# set the temperature of the LLM (ie its randomness). Value between 0 and 1. The higher the value, the more "creative" the translation. +# Can be overriden on the command line (-t) +TEMPERATURE=0.2 # One prebuilt system and user prompts are provided by default in `default_prompts.py`. If you want, you can create # below your own system and user prompts. The system prompt can use the following placeholders: # {original_language}, {context_language}, {target_language}, {simple_original_phrase}, {simple_context_translation}, diff --git a/src/auto_po_lyglot/claude_client.py b/src/auto_po_lyglot/claude_client.py index a85fa63..35b9bdc 100644 --- a/src/auto_po_lyglot/claude_client.py +++ b/src/auto_po_lyglot/claude_client.py @@ -17,7 +17,7 @@ def get_translation(self, system_prompt, user_prompt): message = self.client.messages.create( model=self.params.model, max_tokens=1000, - temperature=0.2, + temperature=self.params.temperature, system=system_prompt, messages=[ { @@ -49,7 +49,7 @@ def get_translation(self, system_prompt, user_prompt): response = self.client.beta.prompt_caching.messages.create( model=self.params.model, max_tokens=1024, - temperature=0.2, + temperature=self.params.temperature, system=[ { "type": "text", diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index b869018..dd3c869 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -54,38 +54,45 @@ def parse_args(self, additional_args=None): # Add arguments parser.add_argument('-p', '--show_prompts', action='store_true', - help='show the prompts used for translations and exits') + help='show the prompts used for translation and exits') parser.add_argument('-l', '--llm', type=str, help='Le type of LLM you want to use. Can be openai, ollama, claude or claude_cached. ' - 'For openai or claude[_cached], you need to set the api key in the environment') + 'For openai or claude[_cached], you need to set the api key in the environment. ' + 'Supersedes LLM_CLIENT in .env. Default is ollama') parser.add_argument('-m', '--model', type=str, - help='the name of the model to use. If not provided, a default model ' - 'will be used, based on the chosen client') + help='the name of the model to use. Supersedes LLM_MODEL in .env. If not provided at all, ' + 'a default model will be used, based on the chosen client') + parser.add_argument('-t', '--temperature', + type=float, + help='the temperature of the model. Supersedes TEMPERATURE in .env. If not provided at all, ' + 'a default value of 0.2 will be used') parser.add_argument('--original_language', type=str, - help='the language of the original phrase') + help='the language of the original phrase. Supersedes ORIGINAL_LANGUAGE in .env. ') parser.add_argument('--context_language', type=str, - help='the language of the context translation') + help='the language of the context translation. Supersedes CONTEXT_LANGUAGE in .env. ') parser.add_argument('--target_language', type=str, - help='the language into which the original phrase will be translated') + help='the language into which the original phrase will be translated. Supersedes ' + 'TARGET_LANGUAGE in .env. ') parser.add_argument('-i', '--input_po', type=str, help='the .po file containing the msgids (phrases to be translated) ' - 'and msgstrs (context translations)') + 'and msgstrs (context translations). Supersedes INPUT_PO in .env.') parser.add_argument('-o', '--output_po', type=str, help='the .po file where the translated results will be written. If not provided, ' 'it will be created in the same directory as the input_po except if the input po file has ' 'the specific format .../locale//LC_MESSAGES/. ' 'In this case, the output po file will be created as ' - '.../locale//LC_MESSAGES/.') + '.../locale//LC_MESSAGES/. Supersedes ' + 'OUTPUT_PO in .env.') - parser.add_argument('-v', '--verbose', action='store_true', help='verbose mode') - parser.add_argument('-vv', '--debug', action='store_true', help='debug mode') + parser.add_argument('-v', '--verbose', action='store_true', help='verbose mode. Equivalent to LOG_LEVEL=INFO in .env') + parser.add_argument('-vv', '--debug', action='store_true', help='debug mode. Equivalent to LOG_LEVEL=DEBUG in .env') if additional_args: for arg in additional_args: if arg.get('action'): @@ -143,6 +150,8 @@ def __init__(self, additional_args=None): if self.user_prompt: logger.debug(f"USER_PROMPT environment variable is set to '{self.user_prompt}'") + self.temperature = args.temperature or float(environ.get('TEMPERATURE', 0.2)) + self.input_po = args.input_po or environ.get('INPUT_PO', None) self.output_po = args.output_po or environ.get('OUTPUT_PO', None) diff --git a/src/auto_po_lyglot/openai_ollama_client.py b/src/auto_po_lyglot/openai_ollama_client.py index 6d9e176..1c651b3 100644 --- a/src/auto_po_lyglot/openai_ollama_client.py +++ b/src/auto_po_lyglot/openai_ollama_client.py @@ -25,7 +25,7 @@ def get_translation(self, system_prompt, user_prompt): {"role": "user", "content": user_prompt}, ], # max_tokens=2000, - temperature=0.2, + temperature=self.params.temperature, stream=False ) return response.choices[0].message.content.strip() From cbd8f9e82fb86a3cc9a2a2c5a1fada78cbcdc936 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 16:04:12 +0200 Subject: [PATCH 103/112] allows having different test resuts depending on the used model --- tests/settings.py | 10 ++++++++-- tests/test_main.py | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/settings.py b/tests/settings.py index b6ea391..1e2e3e8 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -10,12 +10,18 @@ { "original_phrase": "He gave her a ring.", "context_translation": "Il lui a donné une bague.", - "target_translation": "Lui ha regalato un anello." + "target_translation": { + "gemma2:2b": "Lui ha regalato un anello.", + "phi3": "Ha dato a lei una collana." + } }, { "original_phrase": "She made a call.", "context_translation": "Elle a pris une décision.", - "target_translation": "Lei ha preso una decisione." + "target_translation": { + "gemma2:2b": "Lei ha preso una decisione.", + "phi3": "Ha preso una decisione." + } }, { "original_phrase": "They left the room.", diff --git a/tests/test_main.py b/tests/test_main.py index 49e3fb1..1fc99fb 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -73,6 +73,9 @@ def test_main(self, params, llm_client, output_file): """ print(trans_exp) outfile.write(f'{out} {trans_exp}') - assert translation == tr['target_translation'] + if type(tr['target_translation']) is str: + assert translation == tr['target_translation'] + else: + assert translation == tr['target_translation'][llm_client.params.model] outfile.close() # extract_csv_translations(output_file, params) From 7d31fdb7172c395d40625b747f9a43fd07aeda9e Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 16:15:33 +0200 Subject: [PATCH 104/112] Doc updated --- README.md | 74 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 8fb7736..8d65cac 100644 --- a/README.md +++ b/README.md @@ -6,27 +6,34 @@ For instance, you have a .po file containing msgids in English and msgstrs in Fr This can work with OpenAI (provided you have an OpenAI API key) or Anthropic Claude (provided you have an Anthropic AIP key) or Ollama (here, you'll run your Ollama server locally and be able to use any model that Ollama can run - depending obviously on your hardware capabilities, and that for free!). # Install -1. Create a python virtual env using pipenv or conda or whatever virtual env manager you prefer. eg: - - `conda create -n transpo python=3.10 && conda activate transpo` + +## Prerequisite +* You must have python>=3.10 installed +* While not required, it is highly recommended that you create a python virtual env, if you don't already have one, using pipenv or conda or whatever virtual env manager you prefer. eg: + `conda create -n auto_po_lyglot python=3.10 && conda activate auto_po_lyglot` +or + `python -m venv ~/auto_po_lyglot && source ~/auto_po_lyglot/bin/activate` + +## Install from PyPi +* Install the module from PyPi: + `pip install --upgrade auto_po_lyglot` + +## Install from sources 1. Fork the repo: - - `git clone https://github.com/leolivier/transpo.git` -1. cd to the transpo folder: - - `cd transpo` + `git clone https://github.com/leolivier/transpo.git auto_po_lyglot` +1. cd to the auto_po_lyglot folder: + `cd auto_po_lyglot` 1. Install the dependencies: - `pip install -r requirements` 1. check if po_main.py is executable otherwise: - `chmod a+x po_main.py` # Configuration -## `.env` file -Transpo uses a mix of command line arguments and `.env` file to be as flexible as possible; -Put in the `.env` file all parameters that don't change very often and use the command line to override their values when needed. +auto_po_lyglot uses a mix of command line arguments and `.env` file to be as flexible as possible; + +Most parameters can be given directly on the command line, but you can put in a `.env` file all parameters that don't change very often and use the command line only to override their values when needed. +## `.env` file The `.env` file can be created by copying the `.env.example` file to `.env`: `cp .env.example .env` Then edit the `.env` file and adapt it to your needs. Specifically: @@ -34,39 +41,40 @@ Then edit the `.env` file and adapt it to your needs. Specifically: Variables are: * `LLM_CLIENT`: possible values are 'ollama', 'openai', 'claude' or 'claude_cached' (claude_cached is advantageous for very big system prompts ie more than 1024 tokens with sonnet) * `LLM_MODEL`: default models are gpt-4o-2024-08-06 for OpenAI, claude-3-5-sonnet-20240620 for claude and claude_cached, llama3.1:8b for ollama. + * `TEMPERATURE`: the temperature provided to the LLM. Default is 0.2 If you choose OpenAI our Claude, you can also put in the .env file the API keys for the LLM: * `OPENAI_API_KEY` for OpenAI * `ANTHROPIC_API_KEY` for Claude -* usually, the language of the msgids and the one for the initial translation of the msgstrs will always be the same based on your own language knowledge. Especially if your native language is not English, you will probably use English as your source language and your native language as your 1st translation. Variables are: +* Usually, the language of the msgids and the one for the initial translation of the msgstrs will always be the same based on your own language knowledge. Especially if your native language is not English, you will probably use English as your source language and your native language as your 1st translation. Variables are: * `ORIGINAL_LANGUAGE` for the language used in msgids * `CONTEXT_LANGUAGE` for the langauge used in the 1rst translation * `TARGET_LANGUAGES` is a comma separated list of languages in which the .po file must be translated. Usually provide by the command line -* also, this is the place where you can tune the prompt for the LLM. The ones provided work quite well, but if you can do better, please open a PR and provide your prompt with the LLM on which you tested it and attach the original and translated .po files; - Variables used are `SYSTEM_PROMPT` and `USER_PROMPT` but you can create several ones and at the end of the file select of on them as in the .env.example file. -* `VERBOSE`=True or False drives verbose mode -* Other possible variables are more for testing with hardcode value and test_main.py... +* also, this is the place where you can tune the prompt for the LLM. The default ones provided work quite well, but if you can do better, please open a PR and provide your prompt with the LLM on which you tested it and attach the original and translated .po files; + Variables used are `SYSTEM_PROMPT` and `USER_PROMPT`. +* `LOG_LEVEL` sets the log level (values are DEBUG, INFO, WARNING, ERROR, CRITICAL). This can be overriden on the command line (-v = INFO, -vv = DEBUG) ## Tool arguments -usage: `po_main.py [-h] [--llm LLM] [--model MODEL] [--original_language ORIGINAL_LANGUAGE] [--context_language CONTEXT_LANGUAGE] - [--target_language TARGET_LANGUAGE] [--verbose] [--input_po INPUT_PO]` - +usage: auto_po_lyglot [-h] [-p] [-l LLM] [-m MODEL] [-t TEMPERATURE] [--original_language ORIGINAL_LANGUAGE] [--context_language CONTEXT_LANGUAGE] + [--target_language TARGET_LANGUAGE] [-i INPUT_PO] [-o OUTPUT_PO] [-v] [-vv] Creates a .po translation file based on an existing one using a given model and llm type. It reads the parameters from the command line and completes them if necessary from the .env in the same directory. It iterates over the provided target languages, and for each language iterates over the entries of the input po file and, using the provided client, model and prompt, translates the original phrase into the target language with the help of the context translation. -| option | can be used to | supersedes variable in the .env file | -|----------------------------------------|---------------------------------|--------------------------------------| -| -h, --help | show this help message and exit | | -| --verbose | verbose mode | VERBOSE=True or False | -| --input_po INPUT_PO | the .po file containing the msgids (phrases to be translated) and msgstrs (context translations) | INPUT_PO | -| --llm LLM | Le type of LLM you want to use. Can be openai, ollama, claude or claude_cached. For openai or claude[_cached], you need to set the proper api key in the environment or in the .env file | LLM_CLIENT | -| --model MODEL | the name of the model to use. If not provided, a default model will be used, based on the chosen client | LLM_MODEL | -| --original_language ORIGINAL_LANGUAGE | the language of the original phrase | ORIGINAL_LANGUAGE | -| --context_language CONTEXT_LANGUAGE | the language of the context translation | CONTEXT_LANGUAGE | -| --target_language TARGET_LANGUAGE | the language into which the original phrase will be translated | TARGET_LANGUAGES (which is an array) | +| option | can be used to | supersedes variable in the .env file | default value | +|----------------------------------------|---------------------------------|--------------------------------------|----------------| +| -h, --help | show this help message and exit | | | +| -v, --verbose | verbose mode | LOG_LEVEL=INFO | LOG_LEVEL=WARN | +| -vv, --debug | debug mode | LOG_LEVEL=DEBUG | LOG_LEVEL=WARN | +| -p, --show_prompts | show the prompts used for translation and exits | | | +| -i, --input_po INPUT_PO | the .po file containing the msgids (phrases to be translated) and msgstrs (context translations) | INPUT_PO | | +| -o, --output_po OUTPUT_PO | the .po file where the translated results will be written. If not provided, it will be created in the same directory as the input_po except if the input po file has the specific format .../locale//LC_MESSAGES/\. In this case, the output po file will be created as .../locale/\/LC_MESSAGES/\. | OUTPUT_PO | see doc | +| -l, --llm LLM | Le type of LLM you want to use. Can be openai, ollama, claude or claude_cached. For openai or claude[_cached], you need to set the proper api key in the environment or in the .env file | LLM_CLIENT | ollama | +| -m, --model MODEL | the name of the model to use. If not provided, a default model will be used, based on the chosen client | LLM_MODEL | see doc | +| -t, --temperature TEMPERATURE | the temperature of the model. If not provided at all, a default value of 0.2 will be used | TEMPERATURE | 0.2 | +| --original_language ORIGINAL_LANGUAGE | the language of the original phrase | ORIGINAL_LANGUAGE | | +| --context_language CONTEXT_LANGUAGE | the language of the context translation | CONTEXT_LANGUAGE | | +| --target_language TARGET_LANGUAGE | the language into which the original phrase will be translated | TARGET_LANGUAGES (which is an array) | | # TO DO -* Create scripts for running the tool on each OS (transpo.bat, transpo.sh, ...) -* Publish as a library on PyPi * Create a small website to submit a po file and get it translated using po_main.py From 3af98e1ca65e13bfaf8b9611a10c0a4454b31e47 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:00:09 +0200 Subject: [PATCH 105/112] retry managing cache --- .github/workflows/build-package.yaml | 46 ++++++++++++---------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index 1ed598c..f98b475 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -12,7 +12,7 @@ on: env: LLM_CLIENT: ollama - LLM_MODEL: phi3 + LLM_MODEL: gemma2:2b # or phi3 LOG_LEVEL: INFO INPUT_PO: tests/input/input.po ORIGINAL_LANGUAGE: English @@ -80,29 +80,22 @@ jobs: uses: actions/cache@v3 id: cache-ollama with: - path: ~/.ollama + path: /usr/local/bin/ollama key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} restore-keys: | ${{ runner.os }}-ollama- - - name: Debug Cache Ollama + - name: Install Ollama (not cached) + if : steps.cache-ollama.outputs.cache-hit != 'true' run: | - echo "Cache hit: ${{ steps.cache-ollama.outputs.cache-hit }}" - if [ "${{ steps.cache-ollama.outputs.cache-hit }}" != 'true' ]; then - echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed." - fi + echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed." + echo "Installing Ollama" + curl https://ollama.ai/install.sh | sh - - name: Install or Use Cached Ollama + - name: Use Cached Ollama + if : steps.cache-ollama.outputs.cache-hit == 'true' run: | - if [ ! -f ~/.ollama/bin/ollama ]; then - echo "Installing Ollama" - curl https://ollama.ai/install.sh | sh - mkdir -p ~/.ollama/bin - cp /usr/local/bin/ollama ~/.ollama/bin/ollama - else - echo "Using cached Ollama" - fi - sudo ln -sf ~/.ollama/bin/ollama /usr/local/bin/ollama + echo "Cache Hit. No need to reinstall Ollama. Version=" ollama --version - name: Start Ollama and wait for it to serve @@ -117,15 +110,10 @@ jobs: path: ~/.ollama/models key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }} - - name: Debug Cache Models - run: | - echo "Models cache hit: ${{ steps.cache-models.outputs.cache-hit }}" - if [ "${{ steps.cache-models.outputs.cache-hit }}" != 'true' ]; then - echo "Models cache miss. This is normal if this is the first run or if the model list has changed." - fi - - - name: Pull Ollama models + - name: Pull Ollama models (not cached) + if: steps.cache-models.outputs.cache-hit != 'true' run: | + echo "Models cache miss. This is normal if this is the first run or if the model list has changed." while IFS= read -r model || [[ -n "$model" ]]; do if [ ! -f ~/.ollama/models/${model}.bin ]; then echo "Pulling model: $model" @@ -136,6 +124,12 @@ jobs: done < ${{ env.MODEL_LIST_FILE }} ollama list + - name: Reuse Ollama cached models + if: steps.cache-models.outputs.cache-hit == 'true' + run: | + echo "Models cache hit! No need to re-pull them." + ollama list + - name: Debug final state if: always() run: | @@ -154,4 +148,4 @@ jobs: # EXTREMELY WEIRD: if you remove these 2 lines, the test fails because LLM_MODEL is not set. echo "Running pytest with environment variables:" env | grep -E 'LOG_LEVEL|INPUT_PO|ORIGINAL_LANGUAGE|CONTEXT_LANGUAGE|TARGET_LANGUAGES|LLM_CLIENT|LLM_MODEL|OLLAMA_BASE_URL' - pytest -s ./tests + pytest -s -v ./tests From f3b1acee944379a59eec084be41058e57281e7c7 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:00:31 +0200 Subject: [PATCH 106/112] fix issue when argument env param not set --- src/auto_po_lyglot/getenv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auto_po_lyglot/getenv.py b/src/auto_po_lyglot/getenv.py index dd3c869..340f005 100755 --- a/src/auto_po_lyglot/getenv.py +++ b/src/auto_po_lyglot/getenv.py @@ -161,7 +161,7 @@ def __init__(self, additional_args=None): arg = argument.get('arg') while arg.startswith('-'): arg = arg[1:] - val = getattr(args, arg) or environ.get(argument.get('env'), argument.get('default', None)) + val = getattr(args, arg) or environ.get(argument.get('env', 'UNDEFINED_VARIABLE'), argument.get('default', None)) setattr(self, arg, val) logger.info(f"Loaded Params: {self.__dict__}") From 4ced1fd29e268dc3137048c28ce88e66447fae00 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:00:54 +0200 Subject: [PATCH 107/112] values for gemma2:2b and phi3 --- tests/settings.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/settings.py b/tests/settings.py index 1e2e3e8..a77dc24 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -11,7 +11,7 @@ "original_phrase": "He gave her a ring.", "context_translation": "Il lui a donné une bague.", "target_translation": { - "gemma2:2b": "Lui ha regalato un anello.", + "gemma2:2b": "Lui ha dato una collana.", "phi3": "Ha dato a lei una collana." } }, @@ -26,52 +26,62 @@ { "original_phrase": "They left the room.", "context_translation": "Ils ont quitté la pièce.", - "target_translation": "Si sono andati dalla stanza." + "target_translation": { + "gemma2:2b": "Si sono andati dalla stanza.", + "phi3": "Hanno lasciato la stanza." + } }, { "original_phrase": "He gave her a ring.", "context_translation": "Il lui a passé un coup de fil.", - "target_translation": "Lui ha regalato un anello." + "target_translation": { + "gemma2:2b": "Gli ha passato un colpo di telefono.", + "phi3": "Ha fatto una telefonata." + } }, { "original_phrase": "She broke down", "context_translation": "Elle est tombée en panne", - "target_translation": "Lei si è guastata" + "target_translation": { + "gemma2:2b": "Si è spenta", "phi3": "È crollata." + } }, { "original_phrase": "She broke down", "context_translation": "Elle s'est effondrée", - "target_translation": "Lei si è sbandita" + "target_translation": { + "gemma2:2b": "Si è spezzata", "phi3": "Si è rotta" + } }, { "original_phrase": "bank", "context_translation": "rive", - "target_translation": "la banca" + "target_translation": {"gemma2:2b": "Banca", "phi3": ""} }, { "original_phrase": "bank", "context_translation": "banque", - "target_translation": "Banca" + "target_translation": {"gemma2:2b": "Banca", "phi3": "Banca"} }, { "original_phrase": "He saw the light.", "context_translation": "Il a compris.", - "target_translation": "Lui è capitato la luce." + "target_translation": {"gemma2:2b": "Ha visto la luce.", "phi3": "Ha capito il sole."} }, { "original_phrase": "He saw the light.", "context_translation": "Il a vu la lumière.", - "target_translation": "Lui è stata vista la luce." + "target_translation": {"gemma2:2b": "Ha visto la luce.", "phi3": "Ha visto la luce."} }, { "original_phrase": "She made a call.", "context_translation": "Elle a passé un appel.", - "target_translation": "Lei ha fatto una chiamata." + "target_translation": {"gemma2:2b": "Ha fatto una chiamata.", "phi3": "Le ha fatto una telefonata."} }, { "original_phrase": "They left the room.", "context_translation": "Ils ont laissé la pièce en l'état.", - "target_translation": "Si hanno lasciato la stanza." + "target_translation": {"gemma2:2b": "Si sono andati dalla stanza.", "phi3": "Hanno lasciato il salotto in stato puro."} }, ] From b34b673539170edb491cdfebe20ad741d79ffe7f Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:16:37 +0200 Subject: [PATCH 108/112] updated one value --- tests/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/settings.py b/tests/settings.py index a77dc24..4275951 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -11,7 +11,7 @@ "original_phrase": "He gave her a ring.", "context_translation": "Il lui a donné une bague.", "target_translation": { - "gemma2:2b": "Lui ha dato una collana.", + "gemma2:2b": "Lui ha regalato una collana.", "phi3": "Ha dato a lei una collana." } }, From fc692ab90bbed999ae9fbbd7db7abda7a6dbc2d2 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:16:56 +0200 Subject: [PATCH 109/112] added gen test only --- tests/test_main.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 1fc99fb..08eaead 100755 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import os from auto_po_lyglot import ParamsLoader # from auto_po_lyglot import process_file from .settings import OUTPUT_DIRECTORY, TEST_TRANSLATIONS @@ -51,6 +52,11 @@ def setup(self, params, llm_client, output_file): pass def test_main(self, params, llm_client, output_file): + if 'GENTESTVALUES' in os.environ: + print("Skipping test_main because GENTESTVALUES is set: only generating test values") + gentestonly = True + else: + gentestonly = False print(f"Using model {llm_client.params.model} for {params.original_language} -> {params.context_language} -> " f"{params.test_target_languages} with an {params.llm_client} client") @@ -58,8 +64,7 @@ def test_main(self, params, llm_client, output_file): for target_language in params.test_target_languages: llm_client.target_language = target_language for tr in TEST_TRANSLATIONS: - out = f""" - {{ + out = f""" {{ "original_phrase": "{tr['original_phrase']}", # {params.original_language} "context_translation": "{tr['context_translation']}", # {params.context_language} "target_translation": """ @@ -68,11 +73,13 @@ def test_main(self, params, llm_client, output_file): comment = explanation.replace('\n', '\n# ') trans_exp = f"""{translation} # {target_language} # {comment} - }}, """ print(trans_exp) outfile.write(f'{out} {trans_exp}') + outfile.flush() + if gentestonly: # no assert if gentestonly + continue if type(tr['target_translation']) is str: assert translation == tr['target_translation'] else: From 3c958133ad0369bb79bb66fec98c54218bb46de6 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:23:34 +0200 Subject: [PATCH 110/112] remove last change --- tests/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/settings.py b/tests/settings.py index 4275951..a77dc24 100644 --- a/tests/settings.py +++ b/tests/settings.py @@ -11,7 +11,7 @@ "original_phrase": "He gave her a ring.", "context_translation": "Il lui a donné une bague.", "target_translation": { - "gemma2:2b": "Lui ha regalato una collana.", + "gemma2:2b": "Lui ha dato una collana.", "phi3": "Ha dato a lei una collana." } }, From 90ee633a715e0a10eea7313cebfe1988ca5eca9b Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:23:53 +0200 Subject: [PATCH 111/112] removed restore-key on ollama cache --- .github/workflows/build-package.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml index f98b475..1be3de0 100644 --- a/.github/workflows/build-package.yaml +++ b/.github/workflows/build-package.yaml @@ -82,8 +82,6 @@ jobs: with: path: /usr/local/bin/ollama key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }} - restore-keys: | - ${{ runner.os }}-ollama- - name: Install Ollama (not cached) if : steps.cache-ollama.outputs.cache-hit != 'true' From ac157a65811cf8ba9c62e8eace075fbd2caa5537 Mon Sep 17 00:00:00 2001 From: Olivier LEVILLAIN Date: Sun, 8 Sep 2024 20:30:04 +0200 Subject: [PATCH 112/112] 1.1.0 to deliver new version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f47f0a..77c39d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "auto-po-lyglot" -version = "1.0.2" +version = "1.1.0" #dynamic = ["version"] authors = [ { name="Olivier LEVILLAIN", email="levillain.olivier@gmail.com" },