From ba5da6abaf8396fb22ea082fc803f1213324c93d Mon Sep 17 00:00:00 2001
From: Janis Klaise <jk@seldon.io>
Date: Wed, 22 Sep 2021 10:48:17 +0100
Subject: [PATCH] Add version to metadata (#339)

* Add version to metadata

* Add __version__ to metadata tests

* Fix flake8 indentation
---
 alibi_detect/ad/tests/test_adae.py        | 4 +++-
 alibi_detect/ad/tests/test_admd.py        | 4 +++-
 alibi_detect/base.py                      | 6 +++++-
 alibi_detect/od/tests/test_ae.py          | 3 ++-
 alibi_detect/od/tests/test_aegmm.py       | 3 ++-
 alibi_detect/od/tests/test_iforest.py     | 4 +++-
 alibi_detect/od/tests/test_llr.py         | 3 ++-
 alibi_detect/od/tests/test_mahalanobis.py | 3 ++-
 alibi_detect/od/tests/test_prophet.py     | 4 +++-
 alibi_detect/od/tests/test_seq2seq.py     | 8 +++++---
 alibi_detect/od/tests/test_sr.py          | 4 +++-
 alibi_detect/od/tests/test_vae.py         | 3 ++-
 alibi_detect/od/tests/test_vaegmm.py      | 4 +++-
 alibi_detect/utils/saving.py              | 7 +++++++
 14 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/alibi_detect/ad/tests/test_adae.py b/alibi_detect/ad/tests/test_adae.py
index b69268e32..0c5a8ea13 100644
--- a/alibi_detect/ad/tests/test_adae.py
+++ b/alibi_detect/ad/tests/test_adae.py
@@ -6,6 +6,7 @@
 from tensorflow.keras.layers import Dense, InputLayer
 from tensorflow.keras.utils import to_categorical
 from alibi_detect.ad import AdversarialAE
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 w_model = [1., .5]
@@ -68,7 +69,8 @@ def test_adv_vae(adv_ae_params):
     )
 
     assert advae.threshold == threshold
-    assert advae.meta == {'name': 'AdversarialAE', 'detector_type': 'offline', 'data_type': None}
+    assert advae.meta == {'name': 'AdversarialAE', 'detector_type': 'offline', 'data_type': None,
+                          'version': __version__}
     for layer in advae.model.layers:
         assert not layer.trainable
 
diff --git a/alibi_detect/ad/tests/test_admd.py b/alibi_detect/ad/tests/test_admd.py
index c0f2dcbad..ef6d96e15 100644
--- a/alibi_detect/ad/tests/test_admd.py
+++ b/alibi_detect/ad/tests/test_admd.py
@@ -5,6 +5,7 @@
 import tensorflow as tf
 from tensorflow.keras.utils import to_categorical
 from alibi_detect.ad import ModelDistillation
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 loss_type = ['kld', 'xent']
@@ -54,7 +55,8 @@ def test_adv_md(adv_md_params):
     )
 
     assert admd.threshold == threshold
-    assert admd.meta == {'name': 'ModelDistillation', 'detector_type': 'offline', 'data_type': None}
+    assert admd.meta == {'name': 'ModelDistillation', 'detector_type': 'offline', 'data_type': None,
+                         'version': __version__}
     for layer in admd.model.layers:
         assert not layer.trainable
 
diff --git a/alibi_detect/base.py b/alibi_detect/base.py
index f36156ca7..f9300b8ff 100644
--- a/alibi_detect/base.py
+++ b/alibi_detect/base.py
@@ -4,10 +4,13 @@
 import numpy as np
 from typing import Dict
 
+from alibi_detect.version import __version__
+
 DEFAULT_META = {
     "name": None,
     "detector_type": None,  # online or offline
-    "data_type": None  # tabular, image or time-series
+    "data_type": None,  # tabular, image or time-series
+    "version": None,
 }  # type: Dict
 
 
@@ -55,6 +58,7 @@ class BaseDetector(ABC):
     def __init__(self):
         self.meta = copy.deepcopy(DEFAULT_META)
         self.meta['name'] = self.__class__.__name__
+        self.meta['version'] = __version__
 
     def __repr__(self):
         return self.__class__.__name__
diff --git a/alibi_detect/od/tests/test_ae.py b/alibi_detect/od/tests/test_ae.py
index 40c1fd7fd..83e7d6055 100644
--- a/alibi_detect/od/tests/test_ae.py
+++ b/alibi_detect/od/tests/test_ae.py
@@ -5,6 +5,7 @@
 import tensorflow as tf
 from tensorflow.keras.layers import Dense, InputLayer
 from alibi_detect.od import OutlierAE
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 threshold_perc = [90.]
@@ -60,7 +61,7 @@ def test_ae(ae_params):
     )
 
     assert ae.threshold == threshold
-    assert ae.meta == {'name': 'OutlierAE', 'detector_type': 'offline', 'data_type': None}
+    assert ae.meta == {'name': 'OutlierAE', 'detector_type': 'offline', 'data_type': None, 'version': __version__}
 
     # fit OutlierAE, infer threshold and compute scores
     ae.fit(X, epochs=5, verbose=False)
diff --git a/alibi_detect/od/tests/test_aegmm.py b/alibi_detect/od/tests/test_aegmm.py
index dd99d2e0f..ba2a34023 100644
--- a/alibi_detect/od/tests/test_aegmm.py
+++ b/alibi_detect/od/tests/test_aegmm.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 from tensorflow.keras.layers import Dense, InputLayer
 from alibi_detect.od import OutlierAEGMM
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 n_gmm = [1, 2]
@@ -69,7 +70,7 @@ def test_aegmm(aegmm_params):
     )
 
     assert aegmm.threshold == threshold
-    assert aegmm.meta == {'name': 'OutlierAEGMM', 'detector_type': 'offline', 'data_type': None}
+    assert aegmm.meta == {'name': 'OutlierAEGMM', 'detector_type': 'offline', 'data_type': None, 'version': __version__}
 
     # fit OutlierAEGMM, infer threshold and compute scores
     aegmm.fit(X, w_energy=w_energy, epochs=5, batch_size=1000, verbose=False)
diff --git a/alibi_detect/od/tests/test_iforest.py b/alibi_detect/od/tests/test_iforest.py
index 2287f41a0..93ae9f354 100644
--- a/alibi_detect/od/tests/test_iforest.py
+++ b/alibi_detect/od/tests/test_iforest.py
@@ -2,6 +2,7 @@
 import pytest
 from sklearn.datasets import load_iris
 from alibi_detect.od import IForest
+from alibi_detect.version import __version__
 
 threshold = [None, 0.]
 threshold_perc = [75., 95.]
@@ -23,7 +24,8 @@ def test_isolation_forest(iforest_params):
     X, y = load_iris(return_X_y=True)
     iforest = IForest(threshold)
     assert iforest.threshold == threshold
-    assert iforest.meta == {'name': 'IForest', 'detector_type': 'offline', 'data_type': 'tabular'}
+    assert iforest.meta == {'name': 'IForest', 'detector_type': 'offline', 'data_type': 'tabular',
+                            'version': __version__}
     iforest.fit(X)
     iforest.infer_threshold(X, threshold_perc=threshold_perc)
     iscore = iforest.score(X)
diff --git a/alibi_detect/od/tests/test_llr.py b/alibi_detect/od/tests/test_llr.py
index 926e113c7..227fb8ab9 100644
--- a/alibi_detect/od/tests/test_llr.py
+++ b/alibi_detect/od/tests/test_llr.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 from tensorflow.keras.layers import Dense, Input, LSTM
 from alibi_detect.od import LLR
+from alibi_detect.version import __version__
 
 input_dim = 5
 hidden_dim = 20
@@ -55,7 +56,7 @@ def test_llr(llr_params):
     od = LLR(threshold=threshold, sequential=True, model=model, log_prob=likelihood_fn)
 
     assert od.threshold == threshold
-    assert od.meta == {'name': 'LLR', 'detector_type': 'offline', 'data_type': None}
+    assert od.meta == {'name': 'LLR', 'detector_type': 'offline', 'data_type': None, 'version': __version__}
 
     od.fit(
         X_train,
diff --git a/alibi_detect/od/tests/test_mahalanobis.py b/alibi_detect/od/tests/test_mahalanobis.py
index ccee6032a..524f1970f 100644
--- a/alibi_detect/od/tests/test_mahalanobis.py
+++ b/alibi_detect/od/tests/test_mahalanobis.py
@@ -3,6 +3,7 @@
 import pytest
 from sklearn.datasets import load_iris
 from alibi_detect.od import Mahalanobis
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 n_components = [2, 3]
@@ -31,7 +32,7 @@ def test_mahalanobis(mahalanobis_params):
                      start_clip=start_clip, max_n=max_n)
     assert mh.threshold == threshold
     assert mh.n == 0
-    assert mh.meta == {'name': 'Mahalanobis', 'detector_type': 'online', 'data_type': 'tabular'}
+    assert mh.meta == {'name': 'Mahalanobis', 'detector_type': 'online', 'data_type': 'tabular', 'version': __version__}
     mh.infer_threshold(X, threshold_perc=threshold_perc)
     assert mh.n == X.shape[0]
     iscore = mh.score(X)  # noqa
diff --git a/alibi_detect/od/tests/test_prophet.py b/alibi_detect/od/tests/test_prophet.py
index 493d87740..7e6fb3553 100644
--- a/alibi_detect/od/tests/test_prophet.py
+++ b/alibi_detect/od/tests/test_prophet.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import pytest
 from alibi_detect.od import OutlierProphet
+from alibi_detect.version import __version__
 
 growth = ['linear', 'logistic']
 return_instance_score = [True, False]
@@ -36,7 +37,8 @@ def test_prophet(prophet_params):
     growth, return_instance_score, return_forecast = prophet_params
     od = OutlierProphet(growth=growth)
     assert isinstance(od.model, fbprophet.forecaster.Prophet)
-    assert od.meta == {'name': 'OutlierProphet', 'detector_type': 'offline', 'data_type': 'time-series'}
+    assert od.meta == {'name': 'OutlierProphet', 'detector_type': 'offline', 'data_type': 'time-series',
+                       'version': __version__}
     if growth == 'logistic':
         df_fit['cap'] = 10.
         df_test['cap'] = 10.
diff --git a/alibi_detect/od/tests/test_seq2seq.py b/alibi_detect/od/tests/test_seq2seq.py
index 74a71bc0f..5c33cb534 100644
--- a/alibi_detect/od/tests/test_seq2seq.py
+++ b/alibi_detect/od/tests/test_seq2seq.py
@@ -3,6 +3,7 @@
 import pytest
 from alibi_detect.od import OutlierSeq2Seq
 from alibi_detect.utils.perturbation import inject_outlier_ts
+from alibi_detect.version import __version__
 
 n_features = [1, 2]
 seq_len = [20, 50]
@@ -35,8 +36,8 @@ def test_seq2seq(seq2seq_params):
     X = np.sin(np.linspace(-50, 50, 10000)).astype(np.float32).reshape((-1, n_features))
 
     # create outliers for threshold and detection
-    X_threshold = inject_outlier_ts(X, perc_outlier=100-threshold_perc, perc_window=10, n_std=10., min_std=9.).data
-    X_outlier = inject_outlier_ts(X, perc_outlier=100-threshold_perc, perc_window=10, n_std=10., min_std=9.).data
+    X_threshold = inject_outlier_ts(X, perc_outlier=100 - threshold_perc, perc_window=10, n_std=10., min_std=9.).data
+    X_outlier = inject_outlier_ts(X, perc_outlier=100 - threshold_perc, perc_window=10, n_std=10., min_std=9.).data
 
     # define architecture
     od = OutlierSeq2Seq(n_features, seq_len, threshold=threshold, latent_dim=latent_dim)
@@ -45,7 +46,8 @@ def test_seq2seq(seq2seq_params):
         assert od.threshold == 0.
     else:
         assert od.threshold == threshold
-    assert od.meta == {'name': 'OutlierSeq2Seq', 'detector_type': 'offline', 'data_type': 'time-series'}
+    assert od.meta == {'name': 'OutlierSeq2Seq', 'detector_type': 'offline', 'data_type': 'time-series',
+                       'version': __version__}
 
     # fit OutlierSeq2Seq
     od.fit(X, epochs=2, verbose=False)
diff --git a/alibi_detect/od/tests/test_sr.py b/alibi_detect/od/tests/test_sr.py
index a638ca07a..b511f9067 100644
--- a/alibi_detect/od/tests/test_sr.py
+++ b/alibi_detect/od/tests/test_sr.py
@@ -2,6 +2,7 @@
 import numpy as np
 import pytest
 from alibi_detect.od import SpectralResidual
+from alibi_detect.version import __version__
 
 # create normal time series and one with perturbations
 t = np.linspace(0, 0.5, 1000)
@@ -35,7 +36,8 @@ def test_sr(sr_params):
     assert od.threshold == threshold
     assert od.meta == {'name': 'SpectralResidual',
                        'detector_type': 'online',
-                       'data_type': 'time-series'}
+                       'data_type': 'time-series',
+                       'version': __version__}
     preds_in = od.predict(X, t, return_instance_score=return_instance_score)
     assert preds_in['data']['is_outlier'].sum() <= 2.
     if return_instance_score:
diff --git a/alibi_detect/od/tests/test_vae.py b/alibi_detect/od/tests/test_vae.py
index 54baba42e..eb23d5f04 100644
--- a/alibi_detect/od/tests/test_vae.py
+++ b/alibi_detect/od/tests/test_vae.py
@@ -6,6 +6,7 @@
 from tensorflow.keras.layers import Dense, InputLayer
 from alibi_detect.od import OutlierVAE
 from alibi_detect.models.tensorflow.losses import elbo
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 score_type = ['mse']
@@ -68,7 +69,7 @@ def test_vae(vae_params):
     )
 
     assert vae.threshold == threshold
-    assert vae.meta == {'name': 'OutlierVAE', 'detector_type': 'offline', 'data_type': None}
+    assert vae.meta == {'name': 'OutlierVAE', 'detector_type': 'offline', 'data_type': None, 'version': __version__}
 
     # fit OutlierVAE, infer threshold and compute scores
     vae.fit(X, loss_fn=loss_fn, epochs=5, verbose=False)
diff --git a/alibi_detect/od/tests/test_vaegmm.py b/alibi_detect/od/tests/test_vaegmm.py
index ebefe5d4f..cef1060db 100644
--- a/alibi_detect/od/tests/test_vaegmm.py
+++ b/alibi_detect/od/tests/test_vaegmm.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 from tensorflow.keras.layers import Dense, InputLayer
 from alibi_detect.od import OutlierVAEGMM
+from alibi_detect.version import __version__
 
 threshold = [None, 5.]
 n_gmm = [1, 2]
@@ -73,7 +74,8 @@ def test_vaegmm(vaegmm_params):
     )
 
     assert vaegmm.threshold == threshold
-    assert vaegmm.meta == {'name': 'OutlierVAEGMM', 'detector_type': 'offline', 'data_type': None}
+    assert vaegmm.meta == {'name': 'OutlierVAEGMM', 'detector_type': 'offline', 'data_type': None,
+                           'version': __version__}
 
     # fit OutlierAEGMM, infer threshold and compute scores
     vaegmm.fit(X, w_recon=w_recon, w_energy=w_energy, epochs=5, batch_size=1000, verbose=False)
diff --git a/alibi_detect/utils/saving.py b/alibi_detect/utils/saving.py
index a9e002d4c..0158590ae 100644
--- a/alibi_detect/utils/saving.py
+++ b/alibi_detect/utils/saving.py
@@ -5,6 +5,7 @@
 import logging
 import os
 from pathlib import Path
+import warnings
 import tensorflow as tf
 from tensorflow.keras.layers import Input, InputLayer
 from tensorflow_probability.python.distributions.distribution import Distribution
@@ -24,6 +25,7 @@
                              OutlierSeq2Seq, OutlierVAE, OutlierVAEGMM, SpectralResidual)
 from alibi_detect.od.llr import build_model
 from alibi_detect.utils.tensorflow.kernels import GaussianRBF
+from alibi_detect.version import __version__
 
 # do not extend pickle dispatch table so as not to change pickle behaviour
 dill.extend(use_dill=False)
@@ -980,6 +982,11 @@ def load_detector(filepath: Union[str, os.PathLike], **kwargs) -> Data:
     # load metadata
     meta_dict = dill.load(open(filepath.joinpath('meta' + suffix), 'rb'))
 
+    # check version
+    if meta_dict['version'] != __version__:
+        warnings.warn(f'Trying to load detector from version {meta_dict["version"]} when using version {__version__}. '
+                      f'This may lead to breaking code or invalid results.')
+
     if 'backend' in list(meta_dict.keys()) and meta_dict['backend'] == 'pytorch':
         raise NotImplementedError('Detectors with PyTorch backend are not yet supported.')