[GSK-1763] Adding python 3.11 support

Contributes to GSK-1762
Giskard-AI · Sep 21, 2023 · 38e8bba · 38e8bba
1 parent b42ff78
commit 38e8bba
Show file tree

Hide file tree

Showing 7 changed files with 5,932 additions and 68 deletions.
diff --git a/.github/workflows/build_backend.yml b/.github/workflows/build_backend.yml
@@ -123,30 +123,30 @@ jobs:
         run: ./gradlew :backend:integrationTest --info -Ptestcontainers
 
   build-python:
-    name: "Python ${{ matrix.python-version }}${{ matrix.pydantic_v1 && ' (Pydantic V1)' || ''}} on ${{ matrix.os }}${{matrix.experimental && ' (Non failing)' || '' }}"
+    name: "Python ${{ matrix.python-version }}${{ matrix.pydantic_v2 && ' (Pydantic V2)' || ''}} on ${{ matrix.os }}${{matrix.experimental && ' (Non failing)' || '' }}"
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false # Do not stop when any job fails
       matrix:
-        python-version: [ "3.8", "3.9", "3.10" ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
         os: [ubuntu-latest]
-        experimental: [false]
-        pydantic_v1: [false]
+        pydantic_v2: [false]
         # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
         include:
           - python-version: "3.10"
             os: windows-2019
-            experimental: false
-            pydantic_v1: false
+            pydantic_v2: false
           - python-version: "3.10"
-            os: ubuntu-latest
-            experimental: false
-            pydantic_v1: true
+            os: windows-2022
+            pydantic_v2: false
           - python-version: "3.10"
             os: macos-latest
-            experimental: false
-            pydantic_v1: false
-    continue-on-error: ${{ matrix.experimental }} # https://ncorti.com/blog/howto-github-actions-build-matrix
+            pydantic_v2: false
+          - python-version: "3.10"
+            os: ubuntu-latest
+            pydantic_v2: true
+
+    continue-on-error: false # https://ncorti.com/blog/howto-github-actions-build-matrix
     steps:
       - name: Checkout code
         uses: actions/[email protected]
@@ -184,18 +184,18 @@ jobs:
         working-directory: python-client 
         run: pdm run lint
 
-      - name: Install pydantic v1
-        if: ${{ matrix.pydantic_v1 }}
+      - name: Install pydantic v2
+        if: ${{ matrix.pydantic_v2 }}
         working-directory: python-client 
         run: |
           pdm run pip uninstall pydantic pydantic_core -y
-          pdm run pip install "pydantic<2"
+          pdm run pip install "pydantic>=2<3"
 
       - name: Check Pydantic installed version
         working-directory: python-client 
         run: |
           pdm run pip freeze | grep '^pydantic'
-          pdm run pip freeze | grep -q '^pydantic==${{ matrix.pydantic_v1 && '1' || '2' }}\.'
+          pdm run pip freeze | grep -q '^pydantic==${{ matrix.pydantic_v2 && '2' || '1' }}\.'
 
       - name: Test code
         working-directory: python-client

diff --git a/python-client/giskard/ml_worker/websocket/listener.py b/python-client/giskard/ml_worker/websocket/listener.py
@@ -218,6 +218,7 @@ def on_message(self, frame):
 def on_ml_worker_get_info(ml_worker: MLWorker, params: GetInfoParam, *args, **kwargs) -> websocket.GetInfo:
     logger.info("Collecting ML Worker info from WebSocket")
 
+    # TODO(Bazire): seems to be deprecated https://setuptools.pypa.io/en/latest/pkg_resources.html#workingset-objects
     installed_packages = (
         {p.project_name: p.version for p in pkg_resources.working_set} if params.list_packages else None
     )

diff --git a/python-client/pdm.lock b/python-client/pdm.lock
diff --git a/python-client/pyproject.toml b/python-client/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
-requires = ["setuptools>=61", "wheel"]
-build-backend = "setuptools.build_meta"
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
 
 [tool.setuptools.packages.find]
 include = ["giskard*"]
@@ -18,7 +18,6 @@ url = "https://download.pytorch.org/whl/cpu/torch_stable.html"
 name = "torch"
 
 
-
 [tool.pdm.scripts]
 _.env = { GSK_DISABLE_ANALYTICS = "True" }
 # add "-n auto" to the pytest command to parallelize the execution
@@ -28,8 +27,9 @@ test-fast.cmd = "pytest -n auto -m 'not slow' -c pyproject.toml tests --cov=gisk
 lint = "ruff giskard tests"
 doc = "sphinx-build docs docs/_build/html"
 watch-doc = "python -m sphinx_autobuild --watch giskard docs docs/_build/html"
-clean = "rm -rf .venv coverage.xml coverage* .coverage*"
+clean = "rm -rf coverage.xml coverage* .coverage*"
 notebook = "jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --notebook-dir ./notebooks --NotebookApp.token=''"
+check-deps = "deptry ."
 
 [tool.pdm.dev-dependencies]
 dev = [
@@ -39,39 +39,35 @@ dev = [
     "pre-commit>=2.19.0",
     "mypy>=0.982",
     "deptry>=0.5.13",
-    "httpretty>=1.1.4",
+    "ruff>=0.0.271",
+    "mlflow>2",
+    "black[d]>=22.12.0",
     "pip>=23.1.2",
+]
+ml_runtime = [
     "langchain>=0.0.187",
     "nltk>=3.8.1",
     "xgboost>=1.7.5",
     "lightgbm>=3.3.5",
     "imbalanced-learn>=0.10.1",
-    "pytest-xdist>=3.3.1",
-    "ruff>=0.0.271",
-    "mlflow>2",
-    "black[d]>=22.12.0",
-]
-test = [
-    "pytest-cov>=4.0.0",
-    "pytest>=7.1.2",
     "catboost>=1.1.1",
     "requests-mock>=1.10.0",
     "tensorflow-hub>=0.12.0",
-    "transformers>=4.33, <4.34", # https://github.com/huggingface/transformers/issues/23352
-    "sentencepiece", # needed for some transformers stuff with tokenizer
+    "transformers>=4.33, <4.34",                                                                 # https://github.com/huggingface/transformers/issues/23352
+    "sentencepiece",                                                                             # needed for some transformers stuff with tokenizer
     "torch>=2.0.0",
     "torchdata>=0.6.0",
+    "portalocker>=2.0.0", # Needed by torchdata for test_newspaper_classification_pytorch_dataset
     "torchtext>=0.15.1",
-    "portalocker>=2.0.0",
-    "scikit-learn==1.0.2",
     "tensorflow-macos>=2.13.0, <2.14; sys_platform == 'darwin' and platform_machine == 'arm64'",
     "tensorflow>=2.13, <2.14",
-    # tensorflow-text is only available on linux, and neither on arm64 nor windows
-    "tensorflow-text>=2.13, <2.14; sys_platform == 'linux' and (platform_machine == 'amd64' or platform_machine == 'x86_64')",
+    # tensorflow-text is only available on linux or mac, but not for arm64 nor windows
+    "tensorflow-text>=2.13, <2.14; python_version < '3.11' and (sys_platform == 'linux' or sys_platform == 'darwin') and platform_machine == 'x86_64'",
     "mlflow>2",
     "wandb",
-    "tensorflow-io-gcs-filesystem<0.32; platform_machine != 'arm64'",  # Tensorflow io does not work for windows from 0.32, but does not work for arm64 before...
+    "tensorflow-io-gcs-filesystem<0.32; platform_machine != 'arm64'",                                                          # Tensorflow io does not work for windows from 0.32, but does not work for arm64 before...
 ]
+test = ["pytest-cov>=4.0.0", "pytest>=7.1.2", "pytest-xdist>=3.3.1"]
 doc = [
     "furo>=2023.5.20",
     "myst-parser>=1.0.0",
@@ -85,7 +81,7 @@ doc = [
     "sphinx-copybutton>=0.5.2",
     "sphinx-click>=4.4.0",
     "nbsphinx>=0.9.2",
-    "ipython==8.12.0"
+    "ipython==8.12.0",
 ]
 
 [project.scripts]
@@ -134,42 +130,43 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
 ]
 
-requires-python = ">=3.8.1, <3.11"
+requires-python = ">=3.8.1, <3.12"
 dependencies = [
     "cloudpickle>=1.1.1",
-    "zstandard>=0.10.0 ",
+    "zstandard>=0.10.0",
     "mlflow-skinny>=2",
-    "protobuf<3.21", # Not compatible with transformers/tensorflow
-    "numpy>=1.22.0,<1.24.0", # shap doesn't work with numpy>1.24.0: module 'numpy' has no attribute 'int'
+    "gitpython",
+    "protobuf<3.21",            # Not compatible with transformers/tensorflow
+    "numpy>=1.22.0",
     "scikit-learn>=1.0",
     "scipy>=1.7.3",
     "mixpanel>=4.4.0",
     "requests>=2.19",
     "pydantic<3,>1",
-    "tqdm>=4.42.0",
-    "setuptools>=39.1.0,<68.0.0",
     "pandas>=1.3.4,<2",
     "xxhash>=3.2.0",
     "langdetect>=1.0.9",
-    "chardet", # text metadata
-    "jinja2>=3", # scan template
-    "markdown",
+    "chardet",                  # text metadata
+    "jinja2>=3",                # scan template
     "requests-toolbelt>=0.9.1",
     "stomp-py>=8.1.0",
-    "pyyaml", # cli_server
-    "packaging", # cli_server
+    "setuptools",               # used in ml worker for pkg_resources
+    "typing_extensions",        # used in registry/decorators, for python <3.10
+    "websocket-client",         # used in worker, to get exception from stomp
+    "pyyaml",                   # cli_server
+    "packaging",                # cli_server
 ]
 
 [project.optional-dependencies]
 llm = [
     "transformers",
-    "torch",
-    "langchain",
-    "evaluate",
-    # pdm lock -G:all doesn't work without fixing these two versions
-    "datasets>=2.13.0",
-    "bert-score>=0.3.13",
+    "torch", 
+    "langchain", 
+    "evaluate", 
+    "bert-score", 
+    "datasets"
 ]
+
 server = [
     "tenacity>=4.11.0",
     "psutil>=5.4.6",
@@ -267,3 +264,7 @@ addopts = [
     "--doctest-modules",
     "--doctest-continue-on-failure",
 ]
+
+[tool.deptry]
+extend_exclude = [".history"]
+ignore_notebooks = true
diff --git a/python-client/setup.py b/python-client/setup.py
diff --git a/python-client/tests/scan/test_scanner.py b/python-client/tests/scan/test_scanner.py
@@ -110,7 +110,6 @@ def test_default_dataset_is_used_with_generative_model():
 
 
 @pytest.mark.slow
-@pytest.mark.skip("Crashing test for docker")
 def test_generative_model_dataset():
     llm = FakeListLLM(responses=["Are you dumb or what?", "I don't know and I don't want to know."] * 100)
     prompt = PromptTemplate(template="{instruct}: {question}", input_variables=["instruct", "question"])

diff --git a/python-client/tests/test_performance.py b/python-client/tests/test_performance.py
@@ -149,7 +149,7 @@ def test_mse(model, data, threshold, expected_metric, actual_slices_size, reques
     ).execute()
 
     assert results.actual_slices_size[0] == actual_slices_size
-    assert results.metric == pytest.approx(expected_metric)
+    assert results.metric == pytest.approx(expected_metric, abs=1e-2)
     assert results.passed