From 4b21b59b6b5dc589a176940426aab2514e4ffed7 Mon Sep 17 00:00:00 2001 From: Janis Klaise Date: Fri, 17 Sep 2021 16:47:46 +0100 Subject: [PATCH 1/3] Add version to metadata --- alibi_detect/base.py | 6 +++++- alibi_detect/utils/saving.py | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/alibi_detect/base.py b/alibi_detect/base.py index f36156ca7..f9300b8ff 100644 --- a/alibi_detect/base.py +++ b/alibi_detect/base.py @@ -4,10 +4,13 @@ import numpy as np from typing import Dict +from alibi_detect.version import __version__ + DEFAULT_META = { "name": None, "detector_type": None, # online or offline - "data_type": None # tabular, image or time-series + "data_type": None, # tabular, image or time-series + "version": None, } # type: Dict @@ -55,6 +58,7 @@ class BaseDetector(ABC): def __init__(self): self.meta = copy.deepcopy(DEFAULT_META) self.meta['name'] = self.__class__.__name__ + self.meta['version'] = __version__ def __repr__(self): return self.__class__.__name__ diff --git a/alibi_detect/utils/saving.py b/alibi_detect/utils/saving.py index a9e002d4c..0158590ae 100644 --- a/alibi_detect/utils/saving.py +++ b/alibi_detect/utils/saving.py @@ -5,6 +5,7 @@ import logging import os from pathlib import Path +import warnings import tensorflow as tf from tensorflow.keras.layers import Input, InputLayer from tensorflow_probability.python.distributions.distribution import Distribution @@ -24,6 +25,7 @@ OutlierSeq2Seq, OutlierVAE, OutlierVAEGMM, SpectralResidual) from alibi_detect.od.llr import build_model from alibi_detect.utils.tensorflow.kernels import GaussianRBF +from alibi_detect.version import __version__ # do not extend pickle dispatch table so as not to change pickle behaviour dill.extend(use_dill=False) @@ -980,6 +982,11 @@ def load_detector(filepath: Union[str, os.PathLike], **kwargs) -> Data: # load metadata meta_dict = dill.load(open(filepath.joinpath('meta' + suffix), 'rb')) + # check version + if meta_dict['version'] != __version__: + warnings.warn(f'Trying to load detector from version {meta_dict["version"]} when using version {__version__}. ' + f'This may lead to breaking code or invalid results.') + if 'backend' in list(meta_dict.keys()) and meta_dict['backend'] == 'pytorch': raise NotImplementedError('Detectors with PyTorch backend are not yet supported.') From 2eff91a83250aa22b49c371b424a5bbf93b691cc Mon Sep 17 00:00:00 2001 From: Janis Klaise Date: Mon, 20 Sep 2021 10:21:21 +0100 Subject: [PATCH 2/3] Add __version__ to metadata tests --- alibi_detect/ad/tests/test_adae.py | 4 +++- alibi_detect/ad/tests/test_admd.py | 4 +++- alibi_detect/od/tests/test_ae.py | 3 ++- alibi_detect/od/tests/test_aegmm.py | 3 ++- alibi_detect/od/tests/test_iforest.py | 4 +++- alibi_detect/od/tests/test_llr.py | 3 ++- alibi_detect/od/tests/test_mahalanobis.py | 5 +++-- alibi_detect/od/tests/test_prophet.py | 4 +++- alibi_detect/od/tests/test_seq2seq.py | 10 ++++++---- alibi_detect/od/tests/test_sr.py | 4 +++- alibi_detect/od/tests/test_vae.py | 5 +++-- alibi_detect/od/tests/test_vaegmm.py | 4 +++- 12 files changed, 36 insertions(+), 17 deletions(-) diff --git a/alibi_detect/ad/tests/test_adae.py b/alibi_detect/ad/tests/test_adae.py index b69268e32..0c5a8ea13 100644 --- a/alibi_detect/ad/tests/test_adae.py +++ b/alibi_detect/ad/tests/test_adae.py @@ -6,6 +6,7 @@ from tensorflow.keras.layers import Dense, InputLayer from tensorflow.keras.utils import to_categorical from alibi_detect.ad import AdversarialAE +from alibi_detect.version import __version__ threshold = [None, 5.] w_model = [1., .5] @@ -68,7 +69,8 @@ def test_adv_vae(adv_ae_params): ) assert advae.threshold == threshold - assert advae.meta == {'name': 'AdversarialAE', 'detector_type': 'offline', 'data_type': None} + assert advae.meta == {'name': 'AdversarialAE', 'detector_type': 'offline', 'data_type': None, + 'version': __version__} for layer in advae.model.layers: assert not layer.trainable diff --git a/alibi_detect/ad/tests/test_admd.py b/alibi_detect/ad/tests/test_admd.py index c0f2dcbad..ef6d96e15 100644 --- a/alibi_detect/ad/tests/test_admd.py +++ b/alibi_detect/ad/tests/test_admd.py @@ -5,6 +5,7 @@ import tensorflow as tf from tensorflow.keras.utils import to_categorical from alibi_detect.ad import ModelDistillation +from alibi_detect.version import __version__ threshold = [None, 5.] loss_type = ['kld', 'xent'] @@ -54,7 +55,8 @@ def test_adv_md(adv_md_params): ) assert admd.threshold == threshold - assert admd.meta == {'name': 'ModelDistillation', 'detector_type': 'offline', 'data_type': None} + assert admd.meta == {'name': 'ModelDistillation', 'detector_type': 'offline', 'data_type': None, + 'version': __version__} for layer in admd.model.layers: assert not layer.trainable diff --git a/alibi_detect/od/tests/test_ae.py b/alibi_detect/od/tests/test_ae.py index 40c1fd7fd..83e7d6055 100644 --- a/alibi_detect/od/tests/test_ae.py +++ b/alibi_detect/od/tests/test_ae.py @@ -5,6 +5,7 @@ import tensorflow as tf from tensorflow.keras.layers import Dense, InputLayer from alibi_detect.od import OutlierAE +from alibi_detect.version import __version__ threshold = [None, 5.] threshold_perc = [90.] @@ -60,7 +61,7 @@ def test_ae(ae_params): ) assert ae.threshold == threshold - assert ae.meta == {'name': 'OutlierAE', 'detector_type': 'offline', 'data_type': None} + assert ae.meta == {'name': 'OutlierAE', 'detector_type': 'offline', 'data_type': None, 'version': __version__} # fit OutlierAE, infer threshold and compute scores ae.fit(X, epochs=5, verbose=False) diff --git a/alibi_detect/od/tests/test_aegmm.py b/alibi_detect/od/tests/test_aegmm.py index dd99d2e0f..ba2a34023 100644 --- a/alibi_detect/od/tests/test_aegmm.py +++ b/alibi_detect/od/tests/test_aegmm.py @@ -4,6 +4,7 @@ import tensorflow as tf from tensorflow.keras.layers import Dense, InputLayer from alibi_detect.od import OutlierAEGMM +from alibi_detect.version import __version__ threshold = [None, 5.] n_gmm = [1, 2] @@ -69,7 +70,7 @@ def test_aegmm(aegmm_params): ) assert aegmm.threshold == threshold - assert aegmm.meta == {'name': 'OutlierAEGMM', 'detector_type': 'offline', 'data_type': None} + assert aegmm.meta == {'name': 'OutlierAEGMM', 'detector_type': 'offline', 'data_type': None, 'version': __version__} # fit OutlierAEGMM, infer threshold and compute scores aegmm.fit(X, w_energy=w_energy, epochs=5, batch_size=1000, verbose=False) diff --git a/alibi_detect/od/tests/test_iforest.py b/alibi_detect/od/tests/test_iforest.py index 2287f41a0..93ae9f354 100644 --- a/alibi_detect/od/tests/test_iforest.py +++ b/alibi_detect/od/tests/test_iforest.py @@ -2,6 +2,7 @@ import pytest from sklearn.datasets import load_iris from alibi_detect.od import IForest +from alibi_detect.version import __version__ threshold = [None, 0.] threshold_perc = [75., 95.] @@ -23,7 +24,8 @@ def test_isolation_forest(iforest_params): X, y = load_iris(return_X_y=True) iforest = IForest(threshold) assert iforest.threshold == threshold - assert iforest.meta == {'name': 'IForest', 'detector_type': 'offline', 'data_type': 'tabular'} + assert iforest.meta == {'name': 'IForest', 'detector_type': 'offline', 'data_type': 'tabular', + 'version': __version__} iforest.fit(X) iforest.infer_threshold(X, threshold_perc=threshold_perc) iscore = iforest.score(X) diff --git a/alibi_detect/od/tests/test_llr.py b/alibi_detect/od/tests/test_llr.py index 926e113c7..227fb8ab9 100644 --- a/alibi_detect/od/tests/test_llr.py +++ b/alibi_detect/od/tests/test_llr.py @@ -4,6 +4,7 @@ import tensorflow as tf from tensorflow.keras.layers import Dense, Input, LSTM from alibi_detect.od import LLR +from alibi_detect.version import __version__ input_dim = 5 hidden_dim = 20 @@ -55,7 +56,7 @@ def test_llr(llr_params): od = LLR(threshold=threshold, sequential=True, model=model, log_prob=likelihood_fn) assert od.threshold == threshold - assert od.meta == {'name': 'LLR', 'detector_type': 'offline', 'data_type': None} + assert od.meta == {'name': 'LLR', 'detector_type': 'offline', 'data_type': None, 'version': __version__} od.fit( X_train, diff --git a/alibi_detect/od/tests/test_mahalanobis.py b/alibi_detect/od/tests/test_mahalanobis.py index ccee6032a..4aca5c55c 100644 --- a/alibi_detect/od/tests/test_mahalanobis.py +++ b/alibi_detect/od/tests/test_mahalanobis.py @@ -3,6 +3,7 @@ import pytest from sklearn.datasets import load_iris from alibi_detect.od import Mahalanobis +from alibi_detect.version import __version__ threshold = [None, 5.] n_components = [2, 3] @@ -25,13 +26,13 @@ def mahalanobis_params(request): @pytest.mark.parametrize('mahalanobis_params', list(range(n_tests)), indirect=True) def test_mahalanobis(mahalanobis_params): threshold, n_components, std_clip, start_clip, max_n, \ - threshold_perc, return_instance_score = mahalanobis_params + threshold_perc, return_instance_score = mahalanobis_params X, y = load_iris(return_X_y=True) mh = Mahalanobis(threshold, n_components=n_components, std_clip=std_clip, start_clip=start_clip, max_n=max_n) assert mh.threshold == threshold assert mh.n == 0 - assert mh.meta == {'name': 'Mahalanobis', 'detector_type': 'online', 'data_type': 'tabular'} + assert mh.meta == {'name': 'Mahalanobis', 'detector_type': 'online', 'data_type': 'tabular', 'version': __version__} mh.infer_threshold(X, threshold_perc=threshold_perc) assert mh.n == X.shape[0] iscore = mh.score(X) # noqa diff --git a/alibi_detect/od/tests/test_prophet.py b/alibi_detect/od/tests/test_prophet.py index 493d87740..7e6fb3553 100644 --- a/alibi_detect/od/tests/test_prophet.py +++ b/alibi_detect/od/tests/test_prophet.py @@ -5,6 +5,7 @@ import pandas as pd import pytest from alibi_detect.od import OutlierProphet +from alibi_detect.version import __version__ growth = ['linear', 'logistic'] return_instance_score = [True, False] @@ -36,7 +37,8 @@ def test_prophet(prophet_params): growth, return_instance_score, return_forecast = prophet_params od = OutlierProphet(growth=growth) assert isinstance(od.model, fbprophet.forecaster.Prophet) - assert od.meta == {'name': 'OutlierProphet', 'detector_type': 'offline', 'data_type': 'time-series'} + assert od.meta == {'name': 'OutlierProphet', 'detector_type': 'offline', 'data_type': 'time-series', + 'version': __version__} if growth == 'logistic': df_fit['cap'] = 10. df_test['cap'] = 10. diff --git a/alibi_detect/od/tests/test_seq2seq.py b/alibi_detect/od/tests/test_seq2seq.py index 74a71bc0f..0f38a19dc 100644 --- a/alibi_detect/od/tests/test_seq2seq.py +++ b/alibi_detect/od/tests/test_seq2seq.py @@ -3,6 +3,7 @@ import pytest from alibi_detect.od import OutlierSeq2Seq from alibi_detect.utils.perturbation import inject_outlier_ts +from alibi_detect.version import __version__ n_features = [1, 2] seq_len = [20, 50] @@ -29,14 +30,14 @@ def seq2seq_params(request): def test_seq2seq(seq2seq_params): # OutlierSeq2Seq parameters n_features, seq_len, threshold, threshold_perc, return_instance_score, \ - return_feature_score, outlier_perc, outlier_type = seq2seq_params + return_feature_score, outlier_perc, outlier_type = seq2seq_params # create artificial sine time series X = np.sin(np.linspace(-50, 50, 10000)).astype(np.float32).reshape((-1, n_features)) # create outliers for threshold and detection - X_threshold = inject_outlier_ts(X, perc_outlier=100-threshold_perc, perc_window=10, n_std=10., min_std=9.).data - X_outlier = inject_outlier_ts(X, perc_outlier=100-threshold_perc, perc_window=10, n_std=10., min_std=9.).data + X_threshold = inject_outlier_ts(X, perc_outlier=100 - threshold_perc, perc_window=10, n_std=10., min_std=9.).data + X_outlier = inject_outlier_ts(X, perc_outlier=100 - threshold_perc, perc_window=10, n_std=10., min_std=9.).data # define architecture od = OutlierSeq2Seq(n_features, seq_len, threshold=threshold, latent_dim=latent_dim) @@ -45,7 +46,8 @@ def test_seq2seq(seq2seq_params): assert od.threshold == 0. else: assert od.threshold == threshold - assert od.meta == {'name': 'OutlierSeq2Seq', 'detector_type': 'offline', 'data_type': 'time-series'} + assert od.meta == {'name': 'OutlierSeq2Seq', 'detector_type': 'offline', 'data_type': 'time-series', + 'version': __version__} # fit OutlierSeq2Seq od.fit(X, epochs=2, verbose=False) diff --git a/alibi_detect/od/tests/test_sr.py b/alibi_detect/od/tests/test_sr.py index a638ca07a..b511f9067 100644 --- a/alibi_detect/od/tests/test_sr.py +++ b/alibi_detect/od/tests/test_sr.py @@ -2,6 +2,7 @@ import numpy as np import pytest from alibi_detect.od import SpectralResidual +from alibi_detect.version import __version__ # create normal time series and one with perturbations t = np.linspace(0, 0.5, 1000) @@ -35,7 +36,8 @@ def test_sr(sr_params): assert od.threshold == threshold assert od.meta == {'name': 'SpectralResidual', 'detector_type': 'online', - 'data_type': 'time-series'} + 'data_type': 'time-series', + 'version': __version__} preds_in = od.predict(X, t, return_instance_score=return_instance_score) assert preds_in['data']['is_outlier'].sum() <= 2. if return_instance_score: diff --git a/alibi_detect/od/tests/test_vae.py b/alibi_detect/od/tests/test_vae.py index 54baba42e..d2c3c8a6d 100644 --- a/alibi_detect/od/tests/test_vae.py +++ b/alibi_detect/od/tests/test_vae.py @@ -6,6 +6,7 @@ from tensorflow.keras.layers import Dense, InputLayer from alibi_detect.od import OutlierVAE from alibi_detect.models.tensorflow.losses import elbo +from alibi_detect.version import __version__ threshold = [None, 5.] score_type = ['mse'] @@ -38,7 +39,7 @@ def vae_params(request): def test_vae(vae_params): # OutlierVAE parameters threshold, score_type, samples, loss_fn, threshold_perc, return_instance_score, \ - return_feature_score, outlier_perc, outlier_type = vae_params + return_feature_score, outlier_perc, outlier_type = vae_params # define encoder and decoder encoder_net = tf.keras.Sequential( @@ -68,7 +69,7 @@ def test_vae(vae_params): ) assert vae.threshold == threshold - assert vae.meta == {'name': 'OutlierVAE', 'detector_type': 'offline', 'data_type': None} + assert vae.meta == {'name': 'OutlierVAE', 'detector_type': 'offline', 'data_type': None, 'version': __version__} # fit OutlierVAE, infer threshold and compute scores vae.fit(X, loss_fn=loss_fn, epochs=5, verbose=False) diff --git a/alibi_detect/od/tests/test_vaegmm.py b/alibi_detect/od/tests/test_vaegmm.py index ebefe5d4f..cef1060db 100644 --- a/alibi_detect/od/tests/test_vaegmm.py +++ b/alibi_detect/od/tests/test_vaegmm.py @@ -4,6 +4,7 @@ import tensorflow as tf from tensorflow.keras.layers import Dense, InputLayer from alibi_detect.od import OutlierVAEGMM +from alibi_detect.version import __version__ threshold = [None, 5.] n_gmm = [1, 2] @@ -73,7 +74,8 @@ def test_vaegmm(vaegmm_params): ) assert vaegmm.threshold == threshold - assert vaegmm.meta == {'name': 'OutlierVAEGMM', 'detector_type': 'offline', 'data_type': None} + assert vaegmm.meta == {'name': 'OutlierVAEGMM', 'detector_type': 'offline', 'data_type': None, + 'version': __version__} # fit OutlierAEGMM, infer threshold and compute scores vaegmm.fit(X, w_recon=w_recon, w_energy=w_energy, epochs=5, batch_size=1000, verbose=False) From 7bd1cb6ba300591663d8ea443d5eb1d67242ae68 Mon Sep 17 00:00:00 2001 From: Janis Klaise Date: Mon, 20 Sep 2021 10:31:18 +0100 Subject: [PATCH 3/3] Fix flake8 indentation --- alibi_detect/od/tests/test_mahalanobis.py | 2 +- alibi_detect/od/tests/test_seq2seq.py | 2 +- alibi_detect/od/tests/test_vae.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/alibi_detect/od/tests/test_mahalanobis.py b/alibi_detect/od/tests/test_mahalanobis.py index 4aca5c55c..524f1970f 100644 --- a/alibi_detect/od/tests/test_mahalanobis.py +++ b/alibi_detect/od/tests/test_mahalanobis.py @@ -26,7 +26,7 @@ def mahalanobis_params(request): @pytest.mark.parametrize('mahalanobis_params', list(range(n_tests)), indirect=True) def test_mahalanobis(mahalanobis_params): threshold, n_components, std_clip, start_clip, max_n, \ - threshold_perc, return_instance_score = mahalanobis_params + threshold_perc, return_instance_score = mahalanobis_params X, y = load_iris(return_X_y=True) mh = Mahalanobis(threshold, n_components=n_components, std_clip=std_clip, start_clip=start_clip, max_n=max_n) diff --git a/alibi_detect/od/tests/test_seq2seq.py b/alibi_detect/od/tests/test_seq2seq.py index 0f38a19dc..5c33cb534 100644 --- a/alibi_detect/od/tests/test_seq2seq.py +++ b/alibi_detect/od/tests/test_seq2seq.py @@ -30,7 +30,7 @@ def seq2seq_params(request): def test_seq2seq(seq2seq_params): # OutlierSeq2Seq parameters n_features, seq_len, threshold, threshold_perc, return_instance_score, \ - return_feature_score, outlier_perc, outlier_type = seq2seq_params + return_feature_score, outlier_perc, outlier_type = seq2seq_params # create artificial sine time series X = np.sin(np.linspace(-50, 50, 10000)).astype(np.float32).reshape((-1, n_features)) diff --git a/alibi_detect/od/tests/test_vae.py b/alibi_detect/od/tests/test_vae.py index d2c3c8a6d..eb23d5f04 100644 --- a/alibi_detect/od/tests/test_vae.py +++ b/alibi_detect/od/tests/test_vae.py @@ -39,7 +39,7 @@ def vae_params(request): def test_vae(vae_params): # OutlierVAE parameters threshold, score_type, samples, loss_fn, threshold_perc, return_instance_score, \ - return_feature_score, outlier_perc, outlier_type = vae_params + return_feature_score, outlier_perc, outlier_type = vae_params # define encoder and decoder encoder_net = tf.keras.Sequential(