From 13c42feec28ec6de9d09f19eb8d969edcd20e49c Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 9 Nov 2022 14:35:02 +0000 Subject: [PATCH 1/9] keops save/load support --- alibi_detect/saving/loading.py | 10 ++--- alibi_detect/saving/registry.py | 10 ++++- alibi_detect/saving/saving.py | 10 +---- alibi_detect/saving/schemas.py | 2 +- alibi_detect/saving/tests/models.py | 32 ++++++++++------ alibi_detect/saving/tests/test_saving.py | 48 ++++++++++++++++-------- alibi_detect/saving/validators.py | 2 +- alibi_detect/utils/keops/kernels.py | 46 ++++++++++++++++++++--- 8 files changed, 111 insertions(+), 49 deletions(-) diff --git a/alibi_detect/saving/loading.py b/alibi_detect/saving/loading.py index 977da1ac3..0feabb7cc 100644 --- a/alibi_detect/saving/loading.py +++ b/alibi_detect/saving/loading.py @@ -15,6 +15,7 @@ load_model_tf, load_optimizer_tf, prep_model_and_emb_tf, get_tf_dtype from alibi_detect.saving._pytorch import load_embedding_pt, load_kernel_config_pt, load_model_pt, \ load_optimizer_pt, prep_model_and_emb_pt, get_pt_dtype +from alibi_detect.saving._keops import load_kernel_config_ke from alibi_detect.saving._sklearn import load_model_sk from alibi_detect.saving.validate import validate_config from alibi_detect.base import Detector, ConfigurableDetector @@ -135,11 +136,6 @@ def _load_detector_config(filepath: Union[str, os.PathLike]) -> ConfigurableDete cfg = validate_config(cfg, resolved=True) logger.info('Validated resolved config.') - # Backend - backend = cfg.get('backend') - if backend is not None and backend.lower() not in (Framework.TENSORFLOW, Framework.PYTORCH, Framework.SKLEARN): - raise NotImplementedError('Loading detectors with keops backend is not yet supported.') - # Init detector from config logger.info('Instantiating detector.') detector = _init_detector(cfg) @@ -186,8 +182,10 @@ def _load_kernel_config(cfg: dict, backend: str = Framework.TENSORFLOW) -> Calla """ if backend == Framework.TENSORFLOW: kernel = load_kernel_config_tf(cfg) - else: + elif backend == Framework.PYTORCH: kernel = load_kernel_config_pt(cfg) + else: # backend=='keops' + kernel = load_kernel_config_ke(cfg) return kernel diff --git a/alibi_detect/saving/registry.py b/alibi_detect/saving/registry.py index b1ad20303..c0393d327 100644 --- a/alibi_detect/saving/registry.py +++ b/alibi_detect/saving/registry.py @@ -35,7 +35,7 @@ def my_function(x: np.ndarray) -> np.ndarray: import catalogue -from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow +from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow, has_keops if has_tensorflow: from alibi_detect.cd.tensorflow import \ @@ -52,6 +52,10 @@ def my_function(x: np.ndarray) -> np.ndarray: GaussianRBF as GaussianRBF_torch, sigma_median as sigma_median_torch from alibi_detect.cd.pytorch.context_aware import _sigma_median_diag as _sigma_median_diag_torch +if has_keops: + from alibi_detect.utils.keops.kernels import \ + GaussianRBF as GaussianRBF_keops, sigma_mean as sigma_mean_keops + # Create registry registry = catalogue.create("alibi_detect", "registry") @@ -68,3 +72,7 @@ def my_function(x: np.ndarray) -> np.ndarray: registry.register('utils.pytorch.kernels.sigma_median', func=sigma_median_torch) registry.register('cd.pytorch.context_aware._sigma_median_diag', func=_sigma_median_diag_torch) registry.register('cd.pytorch.preprocess.preprocess_drift', func=preprocess_drift_torch) + +if has_keops: + registry.register('utils.keops.kernels.GaussianRBF', func=GaussianRBF_keops) + registry.register('utils.keops.kernels.sigma_mean', func=sigma_mean_keops) diff --git a/alibi_detect/saving/saving.py b/alibi_detect/saving/saving.py index 9648e404f..f0e80c1b6 100644 --- a/alibi_detect/saving/saving.py +++ b/alibi_detect/saving/saving.py @@ -15,7 +15,6 @@ from alibi_detect.saving.registry import registry from alibi_detect.utils._types import supported_models_all, supported_models_tf, supported_models_torch, \ supported_models_sklearn -from alibi_detect.utils.frameworks import Framework from alibi_detect.base import Detector, ConfigurableDetector from alibi_detect.saving._tensorflow import save_detector_legacy, save_model_config_tf, save_optimizer_config_tf from alibi_detect.saving._pytorch import save_model_config_pt @@ -53,9 +52,6 @@ def save_detector( if legacy: warnings.warn('The `legacy` option will be removed in a future version.', DeprecationWarning) - if 'backend' in list(detector.meta.keys()) and detector.meta['backend'] == Framework.KEOPS: - raise NotImplementedError('Saving detectors with keops backend is not yet supported.') - # TODO: Replace .__args__ w/ typing.get_args() once Python 3.7 dropped (and remove type ignore below) detector_name = detector.__class__.__name__ if detector_name not in [detector for detector in VALID_DETECTORS]: @@ -129,11 +125,7 @@ def _save_detector_config(detector: ConfigurableDetector, filepath: Union[str, o filepath File path to save serialized artefacts to. """ - # Get backend, input_shape and detector_name - backend = detector.meta.get('backend') - if backend not in (None, Framework.TENSORFLOW, Framework.PYTORCH, Framework.SKLEARN): - raise NotImplementedError("Currently, saving is only supported with backend='tensorflow', 'pytorch', and " - "'sklearn'.") + # detector name detector_name = detector.__class__.__name__ # Process file paths diff --git a/alibi_detect/saving/schemas.py b/alibi_detect/saving/schemas.py index 68a902929..0446ccb40 100644 --- a/alibi_detect/saving/schemas.py +++ b/alibi_detect/saving/schemas.py @@ -351,7 +351,7 @@ class KernelConfig(CustomBaseModelWithKwargs): "A string referencing a filepath to a serialized kernel in `.dill` format, or an object registry reference." # Below kwargs are only passed if kernel == @GaussianRBF - flavour: Literal['tensorflow', 'pytorch'] + flavour: Literal['tensorflow', 'pytorch', 'keops'] """ Whether the kernel is a `tensorflow` or `pytorch` kernel. """ diff --git a/alibi_detect/saving/tests/models.py b/alibi_detect/saving/tests/models.py index 5a1b28c0e..0d4bd7bde 100644 --- a/alibi_detect/saving/tests/models.py +++ b/alibi_detect/saving/tests/models.py @@ -21,6 +21,8 @@ from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import GaussianRBF as GaussianRBF_tf from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf +from alibi_detect.utils.keops.kernels import GaussianRBF as GaussianRBF_ke +from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke from alibi_detect.models.pytorch import TransformerEmbedding as TransformerEmbedding_pt from alibi_detect.models.tensorflow import TransformerEmbedding as TransformerEmbedding_tf from alibi_detect.cd.pytorch import HiddenOutput as HiddenOutput_pt @@ -46,7 +48,7 @@ def encoder_model(backend, current_cases): tf.keras.layers.Dense(LATENT_DIM, activation=None) ] ) - elif backend == 'pytorch': + elif backend in ('pytorch', 'keops'): model = nn.Sequential(nn.Linear(input_dim, 5), nn.ReLU(), nn.Linear(5, LATENT_DIM)) @@ -74,7 +76,7 @@ def encoder_dropout_model(backend, current_cases): tf.keras.layers.Dense(LATENT_DIM, activation=None) ] ) - elif backend == 'pytorch': + elif backend in ('pytorch', 'keops'): model = nn.Sequential(nn.Linear(input_dim, 5), nn.ReLU(), nn.Dropout(0.0), # 0.0 to ensure determinism @@ -115,8 +117,12 @@ def kernel(request, backend): if sigma is not None and not isinstance(sigma, torch.Tensor): sigma = torch.tensor(sigma) kernel = GaussianRBF_pt(sigma=sigma, **kernel_cfg) + elif backend == 'keops': + if sigma is not None and not isinstance(sigma, torch.Tensor): + sigma = torch.tensor(sigma) + kernel = GaussianRBF_ke(sigma=sigma, **kernel_cfg) else: - pytest.skip('`kernel` only implemented for tensorflow and pytorch.') + pytest.skip('`kernel` only implemented for tensorflow, pytorch and keops.') return kernel @@ -129,8 +135,8 @@ def optimizer(request, backend): the optimizer is a `torch.optim.Optimizer` class (NOT instantiated). """ optimizer = request.param # Get parametrized setting - if backend not in ('tensorflow', 'pytorch'): - pytest.skip('`optimizer` only implemented for tensorflow and pytorch.') + if backend not in ('tensorflow', 'pytorch', 'keops'): + pytest.skip('`optimizer` only implemented for tensorflow, pytorch and keops.') if isinstance(optimizer, str): module = 'tensorflow.keras.optimizers' if backend == 'tensorflow' else 'torch.optim' try: @@ -163,6 +169,10 @@ def deep_kernel(request, backend, encoder_model): kernel_a = GaussianRBF_pt(**kernel_a) if isinstance(kernel_a, dict) else kernel_a kernel_b = GaussianRBF_pt(**kernel_b) if isinstance(kernel_b, dict) else kernel_b deep_kernel = DeepKernel_pt(proj, kernel_a=kernel_a, kernel_b=kernel_b, eps=eps) + elif backend == 'keops': + kernel_a = GaussianRBF_ke(**kernel_a) if isinstance(kernel_a, dict) else kernel_a + kernel_b = GaussianRBF_ke(**kernel_b) if isinstance(kernel_b, dict) else kernel_b + deep_kernel = DeepKernel_ke(proj, kernel_a=kernel_a, kernel_b=kernel_b, eps=eps) else: pytest.skip('`deep_kernel` only implemented for tensorflow and pytorch.') return deep_kernel @@ -182,13 +192,13 @@ def classifier_model(backend, current_cases): tf.keras.layers.Dense(2, activation=tf.nn.softmax), ] ) - elif backend == 'pytorch': + elif backend in ('pytorch', 'keops'): model = nn.Sequential(nn.Linear(input_dim, 2), nn.Softmax(1)) elif backend == 'sklearn': model = RandomForestClassifier() else: - pytest.skip('`classifier_model` only implemented for tensorflow, pytorch, and sklearn.') + pytest.skip('`classifier_model` only implemented for tensorflow, pytorch, keops and sklearn.') return model @@ -259,12 +269,12 @@ def preprocess_nlp(embedding, tokenizer, max_len, backend): if backend == 'tensorflow': preprocess_fn = partial(preprocess_drift_tf, model=embedding, tokenizer=tokenizer, max_len=max_len, preprocess_batch_fn=preprocess_simple) - elif backend == 'pytorch': + elif backend in ('pytorch', 'keops'): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') preprocess_fn = partial(preprocess_drift_pt, model=embedding, tokenizer=tokenizer, max_len=max_len, preprocess_batch_fn=preprocess_simple, device=device) else: - pytest.skip('`preprocess_nlp` only implemented for tensorflow and pytorch.') + pytest.skip('`preprocess_nlp` only implemented for tensorflow, pytorch and keops.') return preprocess_fn @@ -279,10 +289,10 @@ def preprocess_hiddenoutput(classifier_model, current_cases, backend): if backend == 'tensorflow': model = HiddenOutput_tf(classifier_model, layer=-1, input_shape=(None, input_dim)) preprocess_fn = partial(preprocess_drift_tf, model=model) - elif backend == 'pytorch': + elif backend in ('pytorch', 'keops'): model = HiddenOutput_pt(classifier_model, layer=-1) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') preprocess_fn = partial(preprocess_drift_pt, model=model, device=device) else: - pytest.skip('`preprocess_hiddenoutput` only implemented for tensorflow and pytorch.') + pytest.skip('`preprocess_hiddenoutput` only implemented for tensorflow, pytorch and keops.') return preprocess_fn diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index 0ffa333c8..aa3c9facb 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -17,6 +17,7 @@ import tensorflow as tf import torch import torch.nn as nn +from pykeops.torch import LazyTensor from .datasets import BinData, CategoricalData, ContinuousData, MixedData, TextData from .models import (encoder_model, preprocess_custom, preprocess_hiddenoutput, preprocess_simple, # noqa: F401 @@ -45,13 +46,14 @@ from alibi_detect.saving.schemas import DeepKernelConfig, KernelConfig, ModelConfig, PreprocessConfig from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf +from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke if version.parse(scipy.__version__) >= version.parse('1.7.0'): from alibi_detect.cd import CVMDrift # TODO: We currently parametrize encoder_model etc (in models.py) with backend, so the same flavour of # preprocessing is used as the detector backend. In the future we could decouple this in tests. -backend = param_fixture("backend", ['tensorflow', 'pytorch', 'sklearn']) +backend = param_fixture("backend", ['tensorflow', 'pytorch', 'sklearn', 'keops']) P_VAL = 0.05 ERT = 10 N_PERMUTATIONS = 10 @@ -493,8 +495,10 @@ def test_save_learnedkernel(data, deep_kernel, backend, tmp_path, seed): # noqa assert isinstance(cd_load._detector.train_kwargs, dict) if backend == 'tensorflow': assert isinstance(cd_load._detector.kernel, DeepKernel_tf) - else: + elif backend == 'pytorch': assert isinstance(cd_load._detector.kernel, DeepKernel_pt) + else: # backend == keops + assert isinstance(cd_load._detector.kernel, DeepKernel_ke) # TODO: Not yet deterministic # assert preds['data']['distance'] == pytest.approx(preds_load['data']['distance'], abs=1e-6) # assert preds['data']['p_val'] == pytest.approx(preds_load['data']['p_val'], abs=1e-6) @@ -882,16 +886,22 @@ def test_save_kernel(kernel, backend, tmp_path): # noqa: F811 kernel_loaded = resolve_config(cfg, tmp_path)['kernel'] # Call kernels - X = np.random.standard_normal((10, 1)) + if backend == 'tensorflow': + X = tf.random.normal((10, 1), dtype=tf.float32) + elif backend == 'pytorch': + X = torch.randn((10, 1), dtype=torch.float32) + else: # backend == 'keops' + X = torch.randn((10, 1), dtype=torch.float32) + X = LazyTensor(X[None, :]) kernel(X, X) kernel_loaded(X, X) # Final checks assert type(kernel_loaded) == type(kernel) - if backend == 'pytorch': - np.testing.assert_array_almost_equal(kernel_loaded.sigma.detach().numpy(), kernel.sigma.detach().numpy(), 5) - else: + if backend == 'tensorflow': np.testing.assert_array_almost_equal(np.array(kernel_loaded.sigma), np.array(kernel.sigma), 5) + else: + np.testing.assert_array_almost_equal(kernel_loaded.sigma.detach().numpy(), kernel.sigma.detach().numpy(), 5) assert kernel_loaded.trainable == kernel.trainable assert kernel_loaded.init_sigma_fn == kernel.init_sigma_fn @@ -910,7 +920,14 @@ def test_save_deepkernel(data, deep_kernel, backend, tmp_path): # noqa: F811 Kernels are saved and then loaded, with assertions to check equivalence. """ # Get data dim - X, _ = data + if backend == 'tensorflow': + X = tf.random.normal((10, 1), dtype=tf.float32) + elif backend == 'pytorch': + X = torch.randn((10, 1), dtype=torch.float32) + else: # backend == 'keops' + X = torch.randn((10, 1), dtype=torch.float32) + X = LazyTensor(X[None, :]) +# X, _ = data input_shape = (X.shape[1],) # Save kernel to config @@ -937,10 +954,10 @@ def test_save_deepkernel(data, deep_kernel, backend, tmp_path): # noqa: F811 # Final checks assert isinstance(kernel_loaded.proj, (torch.nn.Module, tf.keras.Model)) - if backend == 'pytorch': - assert pytest.approx(deep_kernel.eps.detach().numpy(), abs=1e-4) == kernel_loaded.eps.detach().numpy() - else: + if backend == 'tensorflow': assert pytest.approx(deep_kernel.eps.numpy(), abs=1e-4) == kernel_loaded.eps.numpy() + else: + assert pytest.approx(deep_kernel.eps.detach().numpy(), abs=1e-4) == kernel_loaded.eps.detach().numpy() assert kernel_loaded.kernel_a.sigma == deep_kernel.kernel_a.sigma assert kernel_loaded.kernel_b.sigma == deep_kernel.kernel_b.sigma @@ -955,6 +972,7 @@ def test_save_preprocess(data, preprocess_fn, tmp_path, backend): Note: _save_model_config, _save_embedding_config, _save_tokenizer_config, _load_model_config, _load_embedding_config, _load_tokenizer_config and _prep_model_and_embedding are all well covered by this test. """ + registry_str = 'tensorflow' if backend == 'tensorflow' else 'pytorch' # Save preprocess_fn to config filepath = tmp_path X_ref, X_h0 = data @@ -962,17 +980,16 @@ def test_save_preprocess(data, preprocess_fn, tmp_path, backend): cfg_preprocess = _save_preprocess_config(preprocess_fn, input_shape=input_shape, filepath=filepath) cfg_preprocess = _path2str(cfg_preprocess) cfg_preprocess = PreprocessConfig(**cfg_preprocess).dict() # pydantic validation - assert cfg_preprocess['src'] == '@cd.' + backend + '.preprocess.preprocess_drift' + assert cfg_preprocess['src'] == '@cd.' + registry_str + '.preprocess.preprocess_drift' assert cfg_preprocess['model']['src'] == 'preprocess_fn/model' # TODO - check layer details here once implemented - # Resolve and load preprocess config cfg = {'preprocess_fn': cfg_preprocess, 'backend': backend} preprocess_fn_load = resolve_config(cfg, tmp_path)['preprocess_fn'] # tests _load_preprocess_config implicitly if backend == 'tensorflow': assert preprocess_fn_load.func.__name__ == 'preprocess_drift' assert isinstance(preprocess_fn_load.keywords['model'], tf.keras.Model) - elif backend == 'pytorch': + else: # pytorch and keops backend assert preprocess_fn_load.func.__name__ == 'preprocess_drift' assert isinstance(preprocess_fn_load.keywords['model'], nn.Module) @@ -986,6 +1003,7 @@ def test_save_preprocess_nlp(data, preprocess_fn, tmp_path, backend): Note: _save_model_config, _save_embedding_config, _save_tokenizer_config, _load_model_config, _load_embedding_config, _load_tokenizer_config and _prep_model_and_embedding are all covered by this test. """ + registry_str = 'tensorflow' if backend == 'tensorflow' else 'pytorch' # Save preprocess_fn to config filepath = tmp_path cfg_preprocess = _save_preprocess_config(preprocess_fn, @@ -993,7 +1011,7 @@ def test_save_preprocess_nlp(data, preprocess_fn, tmp_path, backend): filepath=filepath) cfg_preprocess = _path2str(cfg_preprocess) cfg_preprocess = PreprocessConfig(**cfg_preprocess).dict() # pydantic validation - assert cfg_preprocess['src'] == '@cd.' + backend + '.preprocess.preprocess_drift' + assert cfg_preprocess['src'] == '@cd.' + registry_str + '.preprocess.preprocess_drift' assert cfg_preprocess['embedding']['src'] == 'preprocess_fn/embedding' assert cfg_preprocess['tokenizer']['src'] == 'preprocess_fn/tokenizer' @@ -1014,7 +1032,7 @@ def test_save_preprocess_nlp(data, preprocess_fn, tmp_path, backend): if backend == 'tensorflow': emb = preprocess_fn.keywords['model'].encoder.layers[0] emb_load = preprocess_fn_load.keywords['model'].encoder.layers[0] - elif backend == 'pytorch': + else: # pytorch and keops backends emb = list(preprocess_fn.keywords['model'].encoder.children())[0] emb_load = list(preprocess_fn_load.keywords['model'].encoder.children())[0] assert isinstance(emb_load.model, type(emb.model)) diff --git a/alibi_detect/saving/validators.py b/alibi_detect/saving/validators.py index eaf0983e2..37be568be 100644 --- a/alibi_detect/saving/validators.py +++ b/alibi_detect/saving/validators.py @@ -77,7 +77,7 @@ def coerce_2_tensor(value: Union[float, List[float]], values: dict): raise ValueError('`coerce_2tensor` failed since no framework identified.') elif framework == Framework.TENSORFLOW and has_tensorflow: return tf.convert_to_tensor(value) - elif framework == Framework.PYTORCH and has_pytorch: + elif (framework == Framework.PYTORCH and has_pytorch) or (framework == Framework.KEOPS and has_keops): return torch.tensor(value) else: # Error should not be raised since `flavour` should have already been validated. diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 7da7a3ee9..422a67076 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -2,6 +2,7 @@ import torch import torch.nn as nn from typing import Callable, Optional, Union +from alibi_detect.utils.frameworks import Framework def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = 100) -> torch.Tensor: @@ -82,6 +83,7 @@ def __init__( """ super().__init__() init_sigma_fn = sigma_mean if init_sigma_fn is None else init_sigma_fn + self.config = {'sigma': sigma, 'trainable': trainable, 'init_sigma_fn': init_sigma_fn} if sigma is None: self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) self.init_required = True @@ -115,13 +117,36 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La kernel_mat = kernel_mat.sum(-1) / len(self.sigma) return kernel_mat + def get_config(self) -> dict: + """ + Returns a serializable config dict (excluding the input_sigma_fn, which is serialized in alibi_detect.saving). + """ + cfg = self.config.copy() + if isinstance(cfg['sigma'], torch.Tensor): + cfg['sigma'] = cfg['sigma'].detach().cpu().numpy().tolist() + cfg.update({'flavour': Framework.KEOPS.value}) + return cfg + + @classmethod + def from_config(cls, config): + """ + Instantiates a kernel from a config dictionary. + + Parameters + ---------- + config + A kernel config dictionary. + """ + config.pop('flavour') + return cls(**config) + class DeepKernel(nn.Module): def __init__( self, proj: nn.Module, - kernel_a: nn.Module = GaussianRBF(trainable=True), - kernel_b: Optional[nn.Module] = GaussianRBF(trainable=True), + kernel_a: Union[nn.Module, str] = 'rbf', + kernel_b: Optional[Union[nn.Module, str]] = 'rbf', eps: Union[float, str] = 'trainable' ) -> None: """ @@ -149,9 +174,13 @@ def __init__( either specified or set to 'trainable'. Only relavent if kernel_b is not None. """ super().__init__() - - self.kernel_a = kernel_a - self.kernel_b = kernel_b + self.config = {'proj': proj, 'kernel_a': kernel_a, 'kernel_b': kernel_b, 'eps': eps} + if kernel_a == 'rbf': + kernel_a = GaussianRBF(trainable=True) + if kernel_b == 'rbf': + kernel_b = GaussianRBF(trainable=True) + self.kernel_a: Callable = kernel_a # type: ignore[assignment] + self.kernel_b: Callable = kernel_b # type: ignore[assignment] self.proj = proj if kernel_b is not None: self._init_eps(eps) @@ -176,3 +205,10 @@ def forward(self, x_proj: LazyTensor, y_proj: LazyTensor, x: Optional[LazyTensor if self.kernel_b is not None: similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) return similarity + + def get_config(self) -> dict: + return self.config.copy() + + @classmethod + def from_config(cls, config): + return cls(**config) From 2e7daeaa36f94335325eb344e7a736a51cb2c00d Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 9 Nov 2022 14:37:35 +0000 Subject: [PATCH 2/9] Add missing saving/_keops submodule --- alibi_detect/saving/_keops/__init__.py | 9 +++++++ alibi_detect/saving/_keops/loading.py | 37 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 alibi_detect/saving/_keops/__init__.py create mode 100644 alibi_detect/saving/_keops/loading.py diff --git a/alibi_detect/saving/_keops/__init__.py b/alibi_detect/saving/_keops/__init__.py new file mode 100644 index 000000000..1c8b939dd --- /dev/null +++ b/alibi_detect/saving/_keops/__init__.py @@ -0,0 +1,9 @@ +from alibi_detect.utils.missing_optional_dependency import import_optional + +load_kernel_config_ke = import_optional( + 'alibi_detect.saving._keops.loading', + names=['load_kernel_config']) + +__all__ = [ + "load_kernel_config_ke", +] diff --git a/alibi_detect/saving/_keops/loading.py b/alibi_detect/saving/_keops/loading.py new file mode 100644 index 000000000..16dd85cdc --- /dev/null +++ b/alibi_detect/saving/_keops/loading.py @@ -0,0 +1,37 @@ +from typing import Callable +from alibi_detect.utils.keops.kernels import DeepKernel + + +def load_kernel_config(cfg: dict) -> Callable: + """ + Loads a kernel from a kernel config dict. + + Parameters + ---------- + cfg + A kernel config dict. (see pydantic schema's). + + Returns + ------- + The kernel. + """ + if 'src' in cfg: # Standard kernel config + kernel = cfg.pop('src') + if hasattr(kernel, 'from_config'): + kernel = kernel.from_config(cfg) + + elif 'proj' in cfg: # DeepKernel config + # Kernel a + kernel_a = cfg['kernel_a'] + kernel_b = cfg['kernel_b'] + if kernel_a != 'rbf': + cfg['kernel_a'] = load_kernel_config(kernel_a) + if kernel_b != 'rbf': + cfg['kernel_b'] = load_kernel_config(kernel_b) + # Assemble deep kernel + kernel = DeepKernel.from_config(cfg) + + else: + raise ValueError('Unable to process kernel. The kernel config dict must either be a `KernelConfig` with a ' + '`src` field, or a `DeepkernelConfig` with a `proj` field.)') + return kernel From c8e8ca5e0739af189859676f804d9e2382614c83 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 23 Nov 2022 15:37:16 +0000 Subject: [PATCH 3/9] Only test keops save/load when installed (on linux) --- alibi_detect/saving/tests/test_saving.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index aa3c9facb..1232e9590 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -47,13 +47,17 @@ from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke +from alibi_detect.utils.frameworks import has_keops if version.parse(scipy.__version__) >= version.parse('1.7.0'): from alibi_detect.cd import CVMDrift # TODO: We currently parametrize encoder_model etc (in models.py) with backend, so the same flavour of # preprocessing is used as the detector backend. In the future we could decouple this in tests. -backend = param_fixture("backend", ['tensorflow', 'pytorch', 'sklearn', 'keops']) +backends = ['tensorflow', 'pytorch', 'sklearn'] +if has_keops: # keops currently only installed during linux CI + backends.append('keops') +backend = param_fixture("backend", backends) P_VAL = 0.05 ERT = 10 N_PERMUTATIONS = 10 From 09308448c2cf70a5473243d1792164fb995cd4df Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 23 Nov 2022 16:03:58 +0000 Subject: [PATCH 4/9] avoid keops imports when keops not installed --- alibi_detect/saving/tests/models.py | 6 ++++-- alibi_detect/saving/tests/test_saving.py | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/alibi_detect/saving/tests/models.py b/alibi_detect/saving/tests/models.py index 0d4bd7bde..0797e0d9e 100644 --- a/alibi_detect/saving/tests/models.py +++ b/alibi_detect/saving/tests/models.py @@ -21,12 +21,14 @@ from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import GaussianRBF as GaussianRBF_tf from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf -from alibi_detect.utils.keops.kernels import GaussianRBF as GaussianRBF_ke -from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke from alibi_detect.models.pytorch import TransformerEmbedding as TransformerEmbedding_pt from alibi_detect.models.tensorflow import TransformerEmbedding as TransformerEmbedding_tf from alibi_detect.cd.pytorch import HiddenOutput as HiddenOutput_pt from alibi_detect.cd.tensorflow import HiddenOutput as HiddenOutput_tf +from alibi_detect.utils.frameworks import has_keops +if has_keops: + from alibi_detect.utils.keops.kernels import GaussianRBF as GaussianRBF_ke + from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke LATENT_DIM = 2 # Must be less than input_dim set in ./datasets.py DEVICE = "cuda" if torch.cuda.is_available() else "cpu" diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index 1232e9590..047d29f0d 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -17,7 +17,6 @@ import tensorflow as tf import torch import torch.nn as nn -from pykeops.torch import LazyTensor from .datasets import BinData, CategoricalData, ContinuousData, MixedData, TextData from .models import (encoder_model, preprocess_custom, preprocess_hiddenoutput, preprocess_simple, # noqa: F401 @@ -46,8 +45,10 @@ from alibi_detect.saving.schemas import DeepKernelConfig, KernelConfig, ModelConfig, PreprocessConfig from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf -from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke from alibi_detect.utils.frameworks import has_keops +if has_keops: + from pykeops.torch import LazyTensor + from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke if version.parse(scipy.__version__) >= version.parse('1.7.0'): from alibi_detect.cd import CVMDrift From 1b1b92ade3e8be767fa6eef3e3195c9f06856808 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 23 Nov 2022 16:19:11 +0000 Subject: [PATCH 5/9] Set sigma as float32 --- alibi_detect/saving/tests/test_saving.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index 047d29f0d..94779d6b6 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -222,9 +222,9 @@ def test_save_mmddrift(data, kernel, preprocess_custom, backend, tmp_path, seed) 'preprocess_at_init': True, 'kernel': kernel, 'configure_kernel_from_x_ref': False, - 'sigma': np.array([0.5]) + 'sigma': np.array([0.5], dtype=np.float32) } - if backend == 'pytorch': + if backend in ('pytorch', 'keops'): kwargs['device'] = 'cuda' if torch.cuda.is_available() else 'cpu' with fixed_seed(seed): cd = MMDDrift(X_ref, **kwargs) From dbe24a9b1f55bcbcb7ee40eccc642f59b857d83d Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 23 Nov 2022 16:21:12 +0000 Subject: [PATCH 6/9] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b59d714fe..9da0ffe1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - **New feature** MMD drift detector has been extended with a [KeOps](https://www.kernel-operations.io/keops/index.html) backend to scale and speed up the detector. See the [documentation](https://docs.seldon.io/projects/alibi-detect/en/latest/cd/methods/mmddrift.html) and [example notebook](https://docs.seldon.io/projects/alibi-detect/en/latest/examples/cd_mmd_keops.html) for more info ([#548](https://github.com/SeldonIO/alibi-detect/pull/548)). - **New feature** Added support for serializing detectors with PyTorch backends, and detectors containing PyTorch models in their proprocessing functions ([#656](https://github.com/SeldonIO/alibi-detect/pull/656)). +- **New feature** Added support for serializing detectors with KeOps backends ([#681](https://github.com/SeldonIO/alibi-detect/pull/681)). - **New feature** Added a PyTorch version of the `UAE` preprocessing utility function ([#656](https://github.com/SeldonIO/alibi-detect/pull/656)). - If a `categories_per_feature` dictionary is not passed to `TabularDrift`, a warning is now raised to inform the user that all features are assumed to be numerical ([#606](https://github.com/SeldonIO/alibi-detect/pull/606)). - For the `ClassifierDrift` and `SpotTheDiffDrift` detectors, we can also return the out-of-fold instances of the reference and test sets. When using `train_size` for training the detector, this allows to associate the returned prediction probabilities with the correct instances. From 5bb73142573576d82b4f01d30c67acbd906a54d6 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Thu, 24 Nov 2022 11:10:41 +0000 Subject: [PATCH 7/9] Add comments re has_keops --- alibi_detect/saving/tests/models.py | 2 +- alibi_detect/saving/tests/test_saving.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/alibi_detect/saving/tests/models.py b/alibi_detect/saving/tests/models.py index 0797e0d9e..c6bd1523c 100644 --- a/alibi_detect/saving/tests/models.py +++ b/alibi_detect/saving/tests/models.py @@ -26,7 +26,7 @@ from alibi_detect.cd.pytorch import HiddenOutput as HiddenOutput_pt from alibi_detect.cd.tensorflow import HiddenOutput as HiddenOutput_tf from alibi_detect.utils.frameworks import has_keops -if has_keops: +if has_keops: # pykeops only installed in Linux CI from alibi_detect.utils.keops.kernels import GaussianRBF as GaussianRBF_ke from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke diff --git a/alibi_detect/saving/tests/test_saving.py b/alibi_detect/saving/tests/test_saving.py index 94779d6b6..8d590b7f5 100644 --- a/alibi_detect/saving/tests/test_saving.py +++ b/alibi_detect/saving/tests/test_saving.py @@ -46,7 +46,7 @@ from alibi_detect.utils.pytorch.kernels import DeepKernel as DeepKernel_pt from alibi_detect.utils.tensorflow.kernels import DeepKernel as DeepKernel_tf from alibi_detect.utils.frameworks import has_keops -if has_keops: +if has_keops: # pykeops only installed in Linux CI from pykeops.torch import LazyTensor from alibi_detect.utils.keops.kernels import DeepKernel as DeepKernel_ke @@ -56,7 +56,7 @@ # TODO: We currently parametrize encoder_model etc (in models.py) with backend, so the same flavour of # preprocessing is used as the detector backend. In the future we could decouple this in tests. backends = ['tensorflow', 'pytorch', 'sklearn'] -if has_keops: # keops currently only installed during linux CI +if has_keops: # pykeops only installed in Linux CI backends.append('keops') backend = param_fixture("backend", backends) P_VAL = 0.05 From 669a8c60bc4cdb1bdd6e6f6ead9008bfa7d2f3a2 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Thu, 24 Nov 2022 11:46:22 +0000 Subject: [PATCH 8/9] Replace .copy()'s with deepcopy --- alibi_detect/utils/keops/kernels.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 422a67076..a8c6b36a7 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -3,6 +3,7 @@ import torch.nn as nn from typing import Callable, Optional, Union from alibi_detect.utils.frameworks import Framework +from copy import deepcopy def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = 100) -> torch.Tensor: @@ -121,7 +122,7 @@ def get_config(self) -> dict: """ Returns a serializable config dict (excluding the input_sigma_fn, which is serialized in alibi_detect.saving). """ - cfg = self.config.copy() + cfg = deepcopy(self.config) if isinstance(cfg['sigma'], torch.Tensor): cfg['sigma'] = cfg['sigma'].detach().cpu().numpy().tolist() cfg.update({'flavour': Framework.KEOPS.value}) @@ -207,7 +208,7 @@ def forward(self, x_proj: LazyTensor, y_proj: LazyTensor, x: Optional[LazyTensor return similarity def get_config(self) -> dict: - return self.config.copy() + return deepcopy(self.config) @classmethod def from_config(cls, config): From 35e3025807519b7b3ba4085b473f2dfa9d76f389 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Thu, 24 Nov 2022 11:54:05 +0000 Subject: [PATCH 9/9] Type str as Literal in keops/kernels.py --- alibi_detect/utils/keops/kernels.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index a8c6b36a7..9400be059 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -3,6 +3,7 @@ import torch.nn as nn from typing import Callable, Optional, Union from alibi_detect.utils.frameworks import Framework +from alibi_detect.utils._types import Literal from copy import deepcopy @@ -146,9 +147,9 @@ class DeepKernel(nn.Module): def __init__( self, proj: nn.Module, - kernel_a: Union[nn.Module, str] = 'rbf', - kernel_b: Optional[Union[nn.Module, str]] = 'rbf', - eps: Union[float, str] = 'trainable' + kernel_a: Union[nn.Module, Literal['rbf']] = 'rbf', + kernel_b: Optional[Union[nn.Module, Literal['rbf']]] = 'rbf', + eps: Union[float, Literal['trainable']] = 'trainable' ) -> None: """ Computes similarities as k(x,y) = (1-eps)*k_a(proj(x), proj(y)) + eps*k_b(x,y). @@ -186,7 +187,7 @@ def __init__( if kernel_b is not None: self._init_eps(eps) - def _init_eps(self, eps: Union[float, str]) -> None: + def _init_eps(self, eps: Union[float, Literal['trainable']]) -> None: if isinstance(eps, float): if not 0 < eps < 1: raise ValueError("eps should be in (0,1)")