Merge pull request #4 from leolivier:pypi-packaging

Pypi-packaging and a lot of other major enhancements
leolivier · Sep 9, 2024 · 59649d3 · 59649d3
2 parents 1d2d7ee + ac157a6
commit 59649d3
Show file tree

Hide file tree

Showing 36 changed files with 1,999 additions and 942 deletions.
diff --git a/.env.example b/.env.example
@@ -1,11 +1,15 @@
 # .env.example file to be copied to .evn file and adapted to your needs
 
-# Set to True if you want verbose output (recommended at the beginning)
-VERBOSE=False
+# set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL). This can be overriden on the 
+# command line (-v = INFO, -vv = DEBUG). Default is WARNING.
+LOG_LEVEL=INFO
 
 # input.po file context
 # The input file itself. Usually provided on the command line but can also be set in the .env
-INPUT_PO=tests/input/input.po
+# INPUT_PO=tests/input/input.po
+# The output file. Usually provided on the command line but can also be set in the .env. Use auto_po_lyglot -h to see
+# how the file name is computed if not provided.
+# OUTPUT_PO=tests/output/output.po
 
 # Primary language (msgids). Can be overriden on the command line
 ORIGINAL_LANGUAGE=English
@@ -33,95 +37,19 @@ OLLAMA_BASE_URL="http://localhost:11434/v1"
 
 # the target languages to test for translation. Give a list of comma separated languages
 # Can be overriden on the command line (only one laguage in this case)
-TARGET_LANGUAGES=Italian,Spanish,German
-
-# Two prebuilt system and user prompts, you can create your own ones using new numbers and change the choice below
-# The first one uses a very long and detailed system prompt and is quite efficient.If you find a better prompt, 
-# please open a PR and provide it to the community
-SYSTEM_PROMPT1="You are a helpful, smart translation assistant. You will be given an {original_language} sentence
-to be translated to {target_language}. You will also be given a {context_language} translation
-for this {original_language} sentence that you will consider for desambiguating the meaning of the
-{original_language} sentence. Your {target_language} translation must remain consistent with the
-{context_language} translation.
-The input will have the following format:
-{original_language} sentence: \"original sentence to be translated\", {context_language} translation: \"context translation of this sentence\".
-Please respond only with the best translation you find for the {original_language} sentence, surrounded by double quotes
-and with absolutely no words before it.
-Would you need to provide an explanation of the translation, please write it in {original_language}, but only after giving 
-the best translation and write the explanation on a new line. For example, supposing the original language is English, the context translation is in French, 
-and the target language is Italien, you would receive as input: 
-English sentence: \"He gave her a ring.\", French translation: \"Il lui a passé un coup de fil.\"
-
-and your output would be: 
-\"Le ha dato un anello\"
-
-Would you need to provide an explanation of the translation, your output would be:
-\"Le ha fatto una telefonata.\"
-Explanation: This Italian translation reflects the meaning of the French phrase, which indicates that the person made a phone call, not that he gave a ring. 
-The English phrase \"He gave her a ring\" can be ambiguous, as it can mean both \"giving a ring\" and \"making a phone call\" colloquially. 
-The French translation makes it clear that it is a phone call, so the Italian version follows this interpretation.
-
-Another input example:
-English sentence: \"She broke down.\", French translation: \"Elle est tombée en panne.\"
-
-and your output would be, assuming an explanation is needed: 
-\"Si è guastata\"
-Explanation: This translation refers to a vehicle or machinery that has stopped working, consistent with the French version that uses \"tomber en panne\", 
-an idiomatic expression for mechanical failure.
-
-Now, supposing the original language was Italian, the context translation was a German one, and the target language is Spanish, you would receive as input:
-Italian sentence: \"Hanno lasciato la stanza.\", German translation: \"Sie verließen den Raum.\"
-
-and your output would be: 
-\"Ellos salieron de la habitación.\"
-
-Also, sometimes, the sentence to be translated and its context translation will contain placheholders that you are not allowed to translate
-and must keep in the same place in your translation. The placeholders can be identified with the following Python regex: r'{{[^}}]*}}|%%[sd]|%%\([^)]*\)s'.
-Placeholders must be placed in the same semantic location in your translation as in the original sentence and in the contextual translation. 
-Sometimes, the name of the placeholders can be relevant for understanding the sentence. For instance, this input: 
-English sentence: \"%%(follower_name)s has created a new %%(followed_type)s: %%(followed_object_name)s\", French translation: \"%%(follower_name)s a créé un nouveau %%(followed_type)s: %%(followed_object_name)s\"
-
-would be translated in Italian into:
-\"%%(follower_name)s ha creato un nuovo %%(followed_type)s: %%(followed_object_name)s\"
-
-and, using another placheolder format:
-English sentence: \"{{follower_name}} has created a new {{followed_type}}: {{followed_object_name}}\", French translation: \"{{follower_name}} a créé un nouveau {{followed_type}}: {{followed_object_name}}\"
-
-would be translated in Italian into:
-\"{{follower_name}} ha creato un nuovo {{followed_type}}: {{followed_object_name}}\"
-"
-USER_PROMPT1="{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\""
-
-SYSTEM_PROMPT2="You are a helpful assistant that translates text."
-USER_PROMPT2="Translate the following {original_language} sentence into {target_language},
-considering the provided {context_language} context for disambiguation:\n
-{original_language}: '{phrase}'\n
-{context_language} context: '{context_translation}'\n
-Provide only the {target_language} translation."
-
-# Here you choose which prompt couple to use
-SYSTEM_PROMPT=${SYSTEM_PROMPT1}
-USER_PROMPT=${USER_PROMPT1}
-
-#####################################################################################"
-# This section is used for testing purposes (used in test_main.py)
-#####################################################################################"
-
-# Where tests results will be stored. Can be overriden on the command line
-OUTPUT_DIRECTORY="./tests/output"
-# Some ambiguous sentences in the ORIGINAL_LANGUAGE and their CONTEXT_LANGUAGE translations for testing
-# Can be overriden on the command line with one (and only one) original phrase and its context translation
-TEST_TRANSLATIONS="[
-  {\"original_phrase\": \"She broke down\", \"context_translation\": \"Elle est tombée en panne\"},
-  {\"original_phrase\": \"She broke down\", \"context_translation\": \"Elle s'est effondrée\"},
-  {\"original_phrase\": \"bank\", \"context_translation\": \"rive\"},
-  {\"original_phrase\": \"bank\", \"context_translation\": \"banque\"},
-  {\"original_phrase\": \"He saw the light.\", \"context_translation\": \"Il a compris.\"},
-  {\"original_phrase\": \"He saw the light.\", \"context_translation\": \"Il a vu la lumière.\"},
-  {\"original_phrase\": \"She made a call.\", \"context_translation\": \"Elle a passé un appel.\"},
-  {\"original_phrase\": \"She made a call.\", \"context_translation\": \"Elle a pris une décision.\"},
-  {\"original_phrase\": \"They left the room.\", \"context_translation\": \"Ils ont quitté la pièce.\"},
-  {\"original_phrase\": \"They left the room.\", \"context_translation\": \"Ils ont laissé la pièce en l'état.\"},
-  {\"original_phrase\": \"He gave her a ring.\", \"context_translation\": \"Il lui a donné une bague.\"},
-  {\"original_phrase\": \"He gave her a ring.\", \"context_translation\": \"Il lui a passé un coup de fil.\"}
-]"
+TARGET_LANGUAGES=Italian,Spanish,German,Portuguese
+# set the temperature of the LLM (ie its randomness). Value between 0 and 1. The higher the value, the more "creative" the translation.
+# Can be overriden on the command line (-t)
+TEMPERATURE=0.2
+# One prebuilt system and user prompts are provided by default in `default_prompts.py`. If you want, you can create
+# below your own system and user prompts. The system prompt can use the following placeholders:
+# {original_language}, {context_language}, {target_language}, {simple_original_phrase}, {simple_context_translation}, 
+# {simple_target_translation}, {ambiguous_original_phrase}, {ambiguous_context_translation}, {ambiguous_target_translation},
+# {ambiguous_explanation}, {po_placeholder_original_phrase_1}, {po_placeholder_original_phrase_2}, {po_placeholder_original_phrase_3},
+# {po_placeholder_context_translation_1}, {po_placeholder_context_translation_2}, {po_placeholder_context_translation_3}, 
+# {po_placeholder_target_translation_1}, {po_placeholder_target_translation_2}, {po_placeholder_target_translation_3}.
+# (all phrases, explanations and translations are taken from the examples below), 
+#SYSTEM_PROMPT="You are a highly skilled translator with expertise in {original_language}, {context_language}, and {target_language}..."
+# The user prompt can use only the following placeholders: {original_language}, {original_phrase}, {context_language}, {context_translation},
+# also taken from the examples below.
+#USER_PROMPT="{original_language} sentence: \"{original_phrase}\", {context_language} translation: \"{context_translation}\""
diff --git a/.github/workflows/build-package.yaml b/.github/workflows/build-package.yaml
@@ -0,0 +1,149 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "main", "pypi-packaging" ]
+  pull_request:
+    branches: [ "main" ]
+
+
+env:
+  LLM_CLIENT: ollama
+  LLM_MODEL: gemma2:2b  # or phi3
+  LOG_LEVEL: INFO
+  INPUT_PO: tests/input/input.po
+  ORIGINAL_LANGUAGE: English
+  CONTEXT_LANGUAGE: French
+  TARGET_LANGUAGES: Italian  # comma separated list
+  OLLAMA_BASE_URL: "http://localhost:11434/v1"
+  # 2 files used to cache the Ollama version and model list
+  # so that they do not need to be downloaded every time
+  # Touch this file to force it to update Ollama
+  OLLAMA_VERSION_FILE: '.github/workflows/ollama-version.txt'
+  # Put in this file a list of all models you want to pull from Ollama, one per line. 
+  # LLM_MODEL must be set to one of these
+  MODEL_LIST_FILE: '.github/workflows/model-list.txt'
+
+jobs:
+
+  test-with-ollama:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+        cache: 'pip'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest build
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 ./src ./tests --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 ./src ./tests --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 2 --statistics
+
+    - name: Make envfile
+      uses: SpicyPizza/[email protected]
+      with:
+        envkey_LOG_LEVEL: ${{ env.LOG_LEVEL }}
+        envkey_INPUT_PO: ${{ env.INPUT_PO }}
+        envkey_ORIGINAL_LANGUAGE: ${{ env.ORIGINAL_LANGUAGE }}
+        envkey_CONTEXT_LANGUAGE: ${{ env.CONTEXT_LANGUAGE }}
+        envkey_TARGET_LANGUAGES: ${{ env.TARGET_LANGUAGES }}
+        envkey_LLM_CLIENT: ${{ env.LLM_CLIENT }}
+        envkey_LLM_MODEL: ${{ env.LLM_MODEL }}
+        envkey_OLLAMA_BASE_URL: ${{ env.OLLAMA_BASE_URL }}
+        directory: .
+        file_name: .env
+        fail_on_empty: false
+        sort_keys: false
+
+    - name: Display Ollama version
+      run: |
+        echo "Ollama version file content:"
+        cat ${{ env.OLLAMA_VERSION_FILE }}
+        echo "Ollama version hash:"
+        echo ${{ hashFiles(env.OLLAMA_VERSION_FILE) }}
+
+    - name: Cache Ollama
+      uses: actions/cache@v3
+      id: cache-ollama
+      with:
+        path: /usr/local/bin/ollama
+        key: ${{ runner.os }}-ollama-${{ hashFiles(env.OLLAMA_VERSION_FILE) }}
+
+    - name: Install Ollama (not cached)
+      if : steps.cache-ollama.outputs.cache-hit != 'true'
+      run: |
+        echo "Cache miss. This is normal if this is the first run or if the Ollama version has changed."
+        echo "Installing Ollama"
+        curl https://ollama.ai/install.sh | sh
+
+    - name: Use Cached Ollama
+      if : steps.cache-ollama.outputs.cache-hit == 'true'
+      run: |
+        echo "Cache Hit. No need to reinstall Ollama. Version="
+        ollama --version
+
+    - name: Start Ollama and wait for it to serve
+      run: |
+        ollama serve &
+        sleep 10
+
+    - name: Cache Ollama models
+      uses: actions/cache@v3
+      id: cache-models
+      with:
+        path: ~/.ollama/models
+        key: ${{ runner.os }}-ollama-models-${{ hashFiles(env.MODEL_LIST_FILE) }}
+
+    - name: Pull Ollama models (not cached)
+      if: steps.cache-models.outputs.cache-hit != 'true'
+      run: |
+        echo "Models cache miss. This is normal if this is the first run or if the model list has changed."
+        while IFS= read -r model || [[ -n "$model" ]]; do
+          if [ ! -f ~/.ollama/models/${model}.bin ]; then
+            echo "Pulling model: $model"
+            ollama pull $model
+          else
+            echo "Model already cached: $model"
+          fi
+        done < ${{ env.MODEL_LIST_FILE }}
+        ollama list
+
+    - name: Reuse Ollama cached models
+      if: steps.cache-models.outputs.cache-hit == 'true'
+      run: |
+          echo "Models cache hit! No need to re-pull them."
+          ollama list
+
+    - name: Debug final state
+      if: always()
+      run: |
+        echo "Ollama version:"
+        ollama --version
+        echo "Available models:"
+        ollama list
+        echo "Ollama directory content:"
+        ls -R ~/.ollama
+
+    - name: Test with pytest
+      run: |
+        pip install .
+        echo "Running pytest with .env file:"
+        cat .env
+        # EXTREMELY WEIRD: if you remove these 2 lines, the test fails because LLM_MODEL is not set.
+        echo "Running pytest with environment variables:"
+        env | grep -E 'LOG_LEVEL|INPUT_PO|ORIGINAL_LANGUAGE|CONTEXT_LANGUAGE|TARGET_LANGUAGES|LLM_CLIENT|LLM_MODEL|OLLAMA_BASE_URL'
+        pytest -s -v ./tests
diff --git a/.github/workflows/model-list.txt b/.github/workflows/model-list.txt
@@ -0,0 +1,2 @@
+phi3:latest
+gemma2:2b
diff --git a/.github/workflows/ollama-version.txt b/.github/workflows/ollama-version.txt
@@ -0,0 +1,3 @@
+# Touch this file or change its contents 
+# to force the GitHub workflow to install the 
+# latest version of Ollama.
diff --git a/.github/workflows/publish-package.yaml b/.github/workflows/publish-package.yaml
@@ -0,0 +1,32 @@
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_SECRET_TOKEN }}
+
diff --git a/.github/workflows/run-ollama.yaml b/.github/workflows/run-ollama.yaml
@@ -0,0 +1,29 @@
+name: reusable Ollama Setup
+
+on:
+  workflow_call:
+    inputs:
+      model:
+        required: true
+        type: string
+        description: "Name of Ollama model to be used"
+
+jobs:
+  setup-ollama:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Install Ollama
+        run: |
+          curl -fsSL https://ollama.ai/install.sh | sh
+          ollama --version
+
+      - name: Start Ollama service
+        run: |
+          ollama serve &
+          sleep 10  # Wait for service to start
+
+      - name: Pull Ollama model
+        run: ollama pull ${{ inputs.model }}
+
+    outputs:
+      ollama_ready: "true"
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 .env
+.venv
 *.pyc
 __pycache__/
 tests/output/