Merge branch 'master' into patch/v0.10.4

SeldonIO · Oct 21, 2022 · ce23672 · ce23672
2 parents ace0f51 + 6df9bd4
commit ce23672
Show file tree

Hide file tree

Showing 88 changed files with 3,057 additions and 686 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -14,12 +14,18 @@ on:
     branches:
       - master
       - 'feature/*'
-    types: [opened, synchronize, reopened, ready_for_review]
+    types: [ opened, synchronize, reopened, ready_for_review ]
   # Trigger workflow once per week
   schedule:
     - cron: '0 0 * * *'
   # Trigger the workflow on manual dispatch
   workflow_dispatch:
+    inputs:
+      tmate_enabled:
+        type: boolean
+        description: 'Enable tmate debugging?'
+        required: false
+        default: false
 
 
 jobs:
@@ -29,13 +35,13 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest]
+        os: [ ubuntu-latest ]
         python-version: [ '3.7', '3.8', '3.9', '3.10' ]
         include: # Run macos and windows tests on only one python version
-          - os: windows-latest 
-            python-version:  '3.9'  # PyTorch doesn't yet have 3.10 support on Windows (https://pytorch.org/get-started/locally/#windows-python) 
-          - os: macos-latest 
-            python-version:  '3.10'
+          - os: windows-latest
+            python-version: '3.9'  # PyTorch doesn't yet have 3.10 support on Windows (https://pytorch.org/get-started/locally/#windows-python)
+          - os: macos-latest
+            python-version: '3.10'
 
     steps:
       - name: Checkout code
@@ -51,12 +57,18 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel
           python -m pip install --upgrade --upgrade-strategy eager -r requirements/dev.txt
           python -m pip install --upgrade --upgrade-strategy eager -e .
-          if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then  # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported.
-            python -m pip install --upgrade --upgrade-strategy eager -e .[prophet]
+          if [ "$RUNNER_OS" == "Linux" ]; then  # Currently, we only support KeOps on Linux.
+            python -m pip install --upgrade --upgrade-strategy eager -e .[keops]
           fi
-          python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch]
+          python -m pip install --upgrade --upgrade-strategy eager -e .[prophet,tensorflow,torch]
           python -m pip freeze
 
+      - name: Setup tmate session
+        uses: mxschmitt/action-tmate@v3
+        if: ${{ github.event_name == 'workflow_dispatch' && inputs.tmate_enabled }}
+        with:
+          limit-access-to-actor: true
+
       - name: Lint with flake8
         run: |
           flake8 alibi_detect
@@ -67,12 +79,15 @@ jobs:
 
       - name: Test with pytest
         run: |
+          if [ "$RUNNER_OS" == "macOS" ]; then  # Avoid numba/OpenMP segfault in CVMDrift (https://github.com/SeldonIO/alibi-detect/issues/648)
+            export NUMBA_THREADING_LAYER="workqueue"
+          fi
           pytest alibi_detect
 
       - name: Upload coverage to Codecov
         if: ${{ success() }}
         run: |
-          codecov
+          codecov -F ${{ matrix.os }}-${{ matrix.python-version }}
 
       - name: Build Python package
         run: |

diff --git a/.github/workflows/test_all_notebooks.yml b/.github/workflows/test_all_notebooks.yml
@@ -41,10 +41,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel
           python -m pip install --upgrade --upgrade-strategy eager -r requirements/dev.txt -r testing/requirements.txt
           python -m pip install --upgrade --upgrade-strategy eager -e .
-          if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then  # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported.
-            python -m pip install --upgrade --upgrade-strategy eager -e .[prophet]
-          fi
-          python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow]
+          python -m pip install --upgrade --upgrade-strategy eager -e .[prophet,torch,tensorflow]
           python -m pip freeze
 
       - name: Run notebooks

diff --git a/.github/workflows/test_changed_notebooks.yml b/.github/workflows/test_changed_notebooks.yml
@@ -7,17 +7,17 @@ defaults:
     shell: bash  # To override PowerShell on Windows
 
 on:
+  # Trigger the workflow on push or PR to any branch
   push:
-    branches:
-      - master
     paths:
       - 'doc/source/examples/**/*.ipynb'
   pull_request:
-    branches:
-      - master
     paths:
       - 'doc/source/examples/**/*.ipynb'
+    # don't trigger for draft PRs
     types: [ opened, synchronize, reopened, ready_for_review ]
+  # Trigger the workflow on manual dispatch
+  workflow_dispatch:
 
 jobs:
   test_changed_notebooks:
@@ -56,10 +56,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel
           python -m pip install --upgrade --upgrade-strategy eager -r requirements/dev.txt -r testing/requirements.txt
           python -m pip install --upgrade --upgrade-strategy eager -e .
-          if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then  # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported.
-            python -m pip install --upgrade --upgrade-strategy eager -e .[prophet]
-          fi
-          python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow]
+          python -m pip install --upgrade --upgrade-strategy eager -e .[prophet,torch,tensorflow]
           python -m pip freeze
 
       - name: Run notebooks

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Change Log
 
+## v0.11.0dev
+[Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.3...master)
+
+### Added
+- **New feature** MMD drift detector has been extended with a [KeOps](https://www.kernel-operations.io/keops/index.html) backend to scale and speed up the detector. 
+See the [documentation](https://docs.seldon.io/projects/alibi-detect/en/latest/cd/methods/mmddrift.html) and [example notebook](https://docs.seldon.io/projects/alibi-detect/en/latest/examples/cd_mmd_keops.html) for more info ([#548](https://github.com/SeldonIO/alibi-detect/pull/548)).
+- If a `categories_per_feature` dictionary is not passed to `TabularDrift`, a warning is now raised to inform the user that all features are assumed to be numerical ([#606](https://github.com/SeldonIO/alibi-detect/pull/606)).
+
+### Changed
+- Minimum `prophet` version bumped to `1.1.0` (used by `OutlierProphet`). This upgrade removes the dependency on `pystan` as `cmdstanpy` is used instead. This version also comes with pre-built wheels for all major platforms and Python versions, making both installation and testing easier ([#627](https://github.com/SeldonIO/alibi-detect/pull/627)).
+- **Breaking change** The configuration field `config_spec` has been removed. In order to load detectors serialized from previous Alibi Detect versions, the field will need to be deleted from the detector's `config.toml` file. However, in any case, serialization compatibility across Alibi Detect versions is not currently guranteed. ([#641](https://github.com/SeldonIO/alibi-detect/pull/641)).
+
+
+### Development
+- UTF-8 decoding is enforced when `README.md` is opened by `setup.py`. This is to prevent pip install errors on systems with `PYTHONIOENCODING` set to use other encoders ([#605](https://github.com/SeldonIO/alibi-detect/pull/605)).
+- Skip specific save/load tests that require downloading remote artefacts if the relevant URI(s) is/are down ([#607](https://github.com/SeldonIO/alibi-detect/pull/607)).
+
 ## v0.10.4
 ## [v0.10.4](https://github.com/SeldonIO/alibi-detect/tree/v0.10.4) (2022-10-21)
 [Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.3...v0.10.4)
@@ -8,14 +25,12 @@
 - Fixed an incorrect default value for the `alternative` kwarg in the `FETDrift` detector ([#661](https://github.com/SeldonIO/alibi-detect/pull/661)).
 - Fixed an issue with `ClassifierDrift` returning incorrect prediction probabilities when `train_size` given ([#662](https://github.com/SeldonIO/alibi-detect/pull/662)).
 
-## v0.10.3
 ## [v0.10.3](https://github.com/SeldonIO/alibi-detect/tree/v0.10.3) (2022-08-17)
 [Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.2...v0.10.3)
 
 ### Fixed
 - Fix to allow `config.toml` files to be loaded when the [meta] field is not present ([#591](https://github.com/SeldonIO/alibi-detect/pull/591)).
 
-## v0.10.2
 ## [v0.10.2](https://github.com/SeldonIO/alibi-detect/tree/v0.10.2) (2022-08-16)
 [Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.1...v0.10.2)
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -36,15 +36,51 @@ options are defined in `setup.cfg`.
 ## Docstrings
 We adhere to the `numpy` style docstrings (https://numpydoc.readthedocs.io/en/stable/format.html)
 with the exception of ommiting argument types in docstrings in favour of type hints in function
-and class signatures. If you're using a `PyCharm`, you can configure this under
-`File -> Settings -> Tools -> Python Integrated Tools -> Docstrings`.
+and class signatures. If you use an IDE, you may be able to configure it to assist you with writing
+docstrings in the correct format. For `PyCharm`, you can configure this under
+`File -> Settings -> Tools -> Python Integrated Tools -> Docstrings`. For `Visual Studio Code`, you can obtain
+docstring generator extensions from the [VisualStudio Marketplace](https://marketplace.visualstudio.com/).
+
+When documenting Python classes, we adhere to the convention of including docstrings in their `__init__` method, 
+rather than as a class level docstring. Docstrings should only be included at the class-level if a class does
+not posess an `__init__` method, for example because it is a static class.
 
 ## Building documentation
 We use `sphinx` for building documentation. You can call `make build_docs` from the project root,
 the docs will be built under `doc/_build/html`.
 
 ## CI
-All PRs triger a Github Actions  build to run linting, type checking, tests, and build docs.
+All PRs triger a Github Actions build to run linting, type checking, tests, and build docs. The status of each 
+Github Action can be viewed on the [actions page](https://github.com/SeldonIO/alibi-detect/actions).
+
+### Debugging via CI
+
+For various reasons, CI runs might occasionally fail. They can often be debugged locally, but sometimes it is helpful
+to debug them in the exact enviroment seen during CI. For this purpose, there is the facilty to ssh directly into
+the CI Guthub Action runner.
+
+#### Instructions
+
+1. Go to the "CI" workflows section on the Alibi Detect GitHub Actions page.
+
+2. Click on "Run Workflow", and select the "Enable tmate debugging" toggle.
+
+3. Select the workflow once it starts, and then select the build of interest (e.g. `ubuntu-latest, 3.10`).
+
+4. Once the workflow reaches the `Setup tmate session` step, click on the toggle to expand it.
+
+5. Copy and paste the `ssh` command that is being printed to your terminal e.g. `ssh [email protected]`.
+
+6. Run the ssh command locally. Assuming your ssh keys are properly set up for github, you should now be inside the GutHub Action runner.
+
+7. The tmate session is opened after the Python and pip installs are completed, so you should be ready to run `alibi-detect` and debug as required. 
+
+#### Additional notes 
+
+- If the registered public SSH key is not your default private SSH key, you will need to specify the path manually, like so: ssh -i <path-to-key> <tmate-connection-string>.
+- Once you have finished debugging, you can continue the workflow (i.e. let the full build CI run) by running `touch continue` whilst in the root directory (`~/work/alibi-detect/alibi-detect`). This will close the tmate session.
+- This new capability is currently temperamental on the `MacOS` build due to [this issue](https://github.com/mxschmitt/action-tmate/issues/69). If the MacOS build fails all the builds are failed. If this happens, it is 
+recommended to retrigger only the workflow build of interest e.g. `ubuntu-latest, 3.10`, and then follow the instructions above from step 3.
 
 ## Optional Dependencies
 
@@ -104,4 +140,4 @@ replaced with an instance of the MissingDependency class. For example:
     ...
   ```
 - Developers can use `make repl tox-env=<tox-env-name>` to run a python REPL with the specified optional dependency 
-installed. This is to allow manual testing.
+installed. This is to allow manual testing.
diff --git a/README.md b/README.md
@@ -79,7 +79,7 @@ The package, `alibi-detect` can be installed from:
    pip install git+https://github.com/SeldonIO/alibi-detect.git
    ```
 
-- To install with the tensorflow backend:
+- To install with the TensorFlow backend:
   ```bash
   pip install alibi-detect[tensorflow]
   ```
@@ -89,6 +89,11 @@ The package, `alibi-detect` can be installed from:
   pip install alibi-detect[torch]
   ```
 
+- To install with the KeOps backend:
+  ```bash
+  pip install alibi-detect[keops]
+  ```
+
 - To use the `Prophet` time series outlier detector:
 
    ```bash
@@ -181,8 +186,8 @@ The following tables show the advised use cases for each algorithm. The column *
 
 #### TensorFlow and PyTorch support
 
-The drift detectors support TensorFlow and PyTorch backends. Alibi Detect does not install these as default. See the 
-[installation options](#installation-and-usage) for more details.
+The drift detectors support TensorFlow, PyTorch and (where applicable) [KeOps](https://www.kernel-operations.io/keops/index.html) backends. 
+However, Alibi Detect does not install these by default. See the [installation options](#installation-and-usage) for more details.
 
 ```python
 from alibi_detect.cd import MMDDrift
@@ -198,6 +203,13 @@ cd = MMDDrift(x_ref, backend='pytorch', p_val=.05)
 preds = cd.predict(x)
 ```
 
+Or in KeOps:
+
+```python
+cd = MMDDrift(x_ref, backend='keops', p_val=.05)
+preds = cd.predict(x)
+```
+
 #### Built-in preprocessing steps
 
 Alibi Detect also comes with various preprocessing steps such as randomly initialized encoders, pretrained text

diff --git a/alibi_detect/base.py b/alibi_detect/base.py
@@ -4,7 +4,7 @@
 import numpy as np
 from typing import Dict, Any, Optional
 from typing_extensions import Protocol, runtime_checkable
-from alibi_detect.version import __version__, __config_spec__
+from alibi_detect.version import __version__
 
 
 DEFAULT_META = {
@@ -119,16 +119,14 @@ def get_config(self) -> dict:  # TODO - move to BaseDetector once config save/lo
         if self.config is not None:
             # Get config (stored in top-level self)
             cfg = self.config
-            # Get low-level nested detector (if needed)
-            detector = self._detector if hasattr(self, '_detector') else self  # type: ignore[attr-defined]
-            detector = detector._detector if hasattr(detector, '_detector') else detector  # type: ignore[attr-defined]
             # Add large artefacts back to config
             for key in LARGE_ARTEFACTS:
-                if key in cfg:  # self.config is validated, therefore if a key is not in cfg, it isn't valid to insert
-                    cfg[key] = getattr(detector, key)
+                if key in cfg and hasattr(self._nested_detector, key):
+                    cfg[key] = getattr(self._nested_detector, key)
             # Set x_ref_preprocessed flag
-            preprocess_at_init = getattr(detector, 'preprocess_at_init', True)  # If no preprocess_at_init, always true!
-            cfg['x_ref_preprocessed'] = preprocess_at_init and detector.preprocess_fn is not None
+            # If no preprocess_at_init, always true!
+            preprocess_at_init = getattr(self._nested_detector, 'preprocess_at_init', True)
+            cfg['x_ref_preprocessed'] = preprocess_at_init and self._nested_detector.preprocess_fn is not None
             return cfg
         else:
             raise NotImplementedError('Getting a config (or saving via a config file) is not yet implemented for this'
@@ -175,7 +173,6 @@ def _set_config(self, inputs):  # TODO - move to BaseDetector once config save/l
             'name': name,
             'meta': {
                 'version': __version__,
-                'config_spec': __config_spec__,
             }
         }
 
@@ -185,17 +182,26 @@ def _set_config(self, inputs):  # TODO - move to BaseDetector once config save/l
 
         # Overwrite any large artefacts with None to save memory. They'll be added back by get_config()
         for key in LARGE_ARTEFACTS:
-            if key in inputs:
+            if key in inputs and hasattr(self._nested_detector, key):
                 inputs[key] = None
 
         self.config.update(inputs)
 
+    @property
+    def _nested_detector(self):
+        """
+        The low-level nested detector.
+        """
+        detector = self._detector if hasattr(self, '_detector') else self  # type: ignore[attr-defined]
+        detector = detector._detector if hasattr(detector, '_detector') else detector  # type: ignore[attr-defined]
+        return detector
+
 
 @runtime_checkable
 class Detector(Protocol):
     """Type Protocol for all detectors.
 
-    Used for typing legacy save and load functionality in `alibi_detect.saving.tensorflow._saving.py`.
+    Used for typing legacy save and load functionality in `alibi_detect.saving._tensorflow.saving.py`.
 
     Note:
         This exists to distinguish between detectors with and without support for config saving and loading. Once all

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
@@ -601,11 +601,6 @@ def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray
         else:
             return self.x_ref, x  # type: ignore[return-value]
 
-    @abstractmethod
-    def kernel_matrix(self, x: Union['torch.Tensor', 'tf.Tensor'], y: Union['torch.Tensor', 'tf.Tensor']) \
-            -> Union['torch.Tensor', 'tf.Tensor']:
-        pass
-
     @abstractmethod
     def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]:
         pass

diff --git a/alibi_detect/cd/classifier.py b/alibi_detect/cd/classifier.py
@@ -1,7 +1,7 @@
 import numpy as np
 from typing import Callable, Dict, Optional, Union
 from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow, \
-    BackendValidator
+    BackendValidator, Framework
 from alibi_detect.base import DriftConfigMixin
 
 
@@ -149,9 +149,9 @@ def __init__(
 
         backend = backend.lower()
         BackendValidator(
-            backend_options={'tensorflow': ['tensorflow'],
-                             'pytorch': ['pytorch'],
-                             'sklearn': ['sklearn']},
+            backend_options={Framework.TENSORFLOW: [Framework.TENSORFLOW],
+                             Framework.PYTORCH: [Framework.PYTORCH],
+                             Framework.SKLEARN: [Framework.SKLEARN]},
             construct_name=self.__class__.__name__
         ).verify_backend(backend)
 
@@ -162,13 +162,13 @@ def __init__(
             pop_kwargs += ['optimizer']
         [kwargs.pop(k, None) for k in pop_kwargs]
 
-        if backend == 'tensorflow':
+        if backend == Framework.TENSORFLOW:
             pop_kwargs = ['device', 'dataloader', 'use_calibration', 'calibration_kwargs', 'use_oob']
             [kwargs.pop(k, None) for k in pop_kwargs]
             if dataset is None:
                 kwargs.update({'dataset': TFDataset})
             self._detector = ClassifierDriftTF(*args, **kwargs)  # type: ignore
-        elif backend == 'pytorch':
+        elif backend == Framework.PYTORCH:
             pop_kwargs = ['use_calibration', 'calibration_kwargs', 'use_oob']
             [kwargs.pop(k, None) for k in pop_kwargs]
             if dataset is None: