Skip to content

Commit

Permalink
Support for serializing detectors with scikit-learn backends and/or m…
Browse files Browse the repository at this point in the history
…odels (#642)
  • Loading branch information
ascillitoe authored Oct 12, 2022
1 parent b915d63 commit 1898ad2
Show file tree
Hide file tree
Showing 14 changed files with 660 additions and 403 deletions.
7 changes: 7 additions & 0 deletions alibi_detect/saving/_sklearn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from alibi_detect.saving._sklearn.saving import save_model_config as save_model_config_sk
from alibi_detect.saving._sklearn.loading import load_model as load_model_sk

__all__ = [
"save_model_config_sk",
"load_model_sk"
]
26 changes: 26 additions & 0 deletions alibi_detect/saving/_sklearn/loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
from pathlib import Path
from typing import Union

import joblib
from sklearn.base import BaseEstimator


def load_model(filepath: Union[str, os.PathLike],
) -> BaseEstimator:
"""
Load scikit-learn (or xgboost) model. Models are assumed to be a subclass of :class:`~sklearn.base.BaseEstimator`.
This includes xgboost models following the scikit-learn API
(see https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn).
Parameters
----------
filepath
Saved model directory.
Returns
-------
Loaded model.
"""
model_dir = Path(filepath)
return joblib.load(model_dir.joinpath('model.joblib'))
68 changes: 68 additions & 0 deletions alibi_detect/saving/_sklearn/saving.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
import os
from pathlib import Path
from typing import Union

import joblib
from sklearn.base import BaseEstimator

logger = logging.getLogger(__name__)


def save_model_config(model: BaseEstimator,
base_path: Path,
local_path: Path = Path('.')) -> dict:
"""
Save a scikit-learn (or xgboost) model to a config dictionary.
Models are assumed to be a subclass of :class:`~sklearn.base.BaseEstimator`. This includes xgboost models
following the scikit-learn API
(see https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn).
Parameters
----------
model
The model to save.
base_path
Base filepath to save to (the location of the `config.toml` file).
local_path
A local (relative) filepath to append to base_path.
Returns
-------
The model config dict.
"""
filepath = base_path.joinpath(local_path)
save_model(model, filepath=filepath, save_dir='model')
cfg_model = {
'flavour': 'sklearn',
'src': local_path.joinpath('model')
}
return cfg_model


def save_model(model: BaseEstimator,
filepath: Union[str, os.PathLike],
save_dir: Union[str, os.PathLike] = 'model') -> None:
"""
Save scikit-learn (and xgboost) models. Models are assumed to be a subclass of :class:`~sklearn.base.BaseEstimator`.
This includes xgboost models following the scikit-learn API
(see https://xgboost.readthedocs.io/en/latest/python/python_api.html#module-xgboost.sklearn).
Parameters
----------
model
The tf.keras.Model to save.
filepath
Save directory.
save_dir
Name of folder to save to within the filepath directory.
"""
# create folder to save model in
model_path = Path(filepath).joinpath(save_dir)
if not model_path.is_dir():
logger.warning('Directory {} does not exist and is now created.'.format(model_path))
model_path.mkdir(parents=True, exist_ok=True)

# save model
model_path = model_path.joinpath('model.joblib')
joblib.dump(model, model_path)
32 changes: 32 additions & 0 deletions alibi_detect/saving/_sklearn/tests/test_saving_sk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pytest_cases import param_fixture, parametrize, parametrize_with_cases

from alibi_detect.saving.tests.datasets import ContinuousData
from alibi_detect.saving.tests.models import classifier_model, xgb_classifier_model

from alibi_detect.saving.loading import _load_model_config
from alibi_detect.saving.saving import _path2str, _save_model_config
from alibi_detect.saving.schemas import ModelConfig

backend = param_fixture("backend", ['sklearn'])


@parametrize_with_cases("data", cases=ContinuousData.data_synthetic_nd, prefix='data_')
@parametrize('model', [classifier_model, xgb_classifier_model])
def test_save_model_sk(data, model, tmp_path):
"""
Unit test for _save_model_config and _load_model_config with scikit-learn and xgboost model.
"""
# Save model
filepath = tmp_path
cfg_model, _ = _save_model_config(model, base_path=filepath)
cfg_model = _path2str(cfg_model)
cfg_model = ModelConfig(**cfg_model).dict()
assert tmp_path.joinpath('model').is_dir()
assert tmp_path.joinpath('model/model.joblib').is_file()

# Adjust config
cfg_model['src'] = tmp_path.joinpath('model') # Need to manually set to absolute path here

# Load model
model_load = _load_model_config(cfg_model)
assert isinstance(model_load, type(model))
22 changes: 7 additions & 15 deletions alibi_detect/saving/_tensorflow/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def load_model(filepath: Union[str, os.PathLike],
return model


def prep_model_and_emb(model: Optional[Callable], emb: Optional[TransformerEmbedding]) -> Callable:
def prep_model_and_emb(model: Callable, emb: Optional[TransformerEmbedding]) -> Callable:
"""
Function to perform final preprocessing of model (and/or embedding) before it is passed to preprocess_drift.
Expand All @@ -78,25 +78,17 @@ def prep_model_and_emb(model: Optional[Callable], emb: Optional[TransformerEmbed
model
A compatible model.
emb
A text embedding model.
An optional text embedding model.
Returns
-------
The final model ready to passed to preprocess_drift.
"""
# If a model exists, process it (and embedding)
if model is not None:
model = model.encoder if isinstance(model, UAE) else model # This is to avoid nesting UAE's already a UAE
if emb is not None:
model = _Encoder(emb, mlp=model)
model = UAE(encoder_net=model)
# If no model exists, store embedding as model
else:
model = emb
if model is None:
raise ValueError("A 'model' and/or `embedding` must be specified when "
"preprocess_fn='preprocess_drift'")

# Process model (and embedding)
model = model.encoder if isinstance(model, UAE) else model # This is to avoid nesting UAE's already a UAE
if emb is not None:
model = _Encoder(emb, mlp=model)
model = UAE(encoder_net=model)
return model


Expand Down
13 changes: 10 additions & 3 deletions alibi_detect/saving/_tensorflow/saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@

def save_model_config(model: Callable,
base_path: Path,
input_shape: tuple,
input_shape: Optional[tuple],
local_path: Path = Path('.')) -> Tuple[dict, Optional[dict]]:
"""
Save a model to a config dictionary. When a model has a text embedding model contained within it,
Save a TensorFlow model to a config dictionary. When a model has a text embedding model contained within it,
this is extracted and saved separately.
Parameters
Expand All @@ -53,6 +53,9 @@ def save_model_config(model: Callable,
cfg_embed = None # type: Optional[Dict[str, Any]]
if isinstance(model, UAE):
if isinstance(model.encoder.layers[0], TransformerEmbedding): # if UAE contains embedding and encoder
if input_shape is None:
raise ValueError('Cannot save combined embedding and model when `input_shape` is None.')

# embedding
embed = model.encoder.layers[0]
cfg_embed = save_embedding_config(embed, base_path, local_path.joinpath('embedding'))
Expand All @@ -78,7 +81,10 @@ def save_model_config(model: Callable,
if model is not None:
filepath = base_path.joinpath(local_path)
save_model(model, filepath=filepath, save_dir='model')
cfg_model = {'src': local_path.joinpath('model')}
cfg_model = {
'flavour': 'tensorflow',
'src': local_path.joinpath('model')
}
return cfg_model, cfg_embed


Expand Down Expand Up @@ -142,6 +148,7 @@ def save_embedding_config(embed: TransformerEmbedding,
cfg_embed.update({'type': embed.emb_type})
cfg_embed.update({'layers': embed.hs_emb.keywords['layers']})
cfg_embed.update({'src': local_path})
cfg_embed.update({'flavour': 'tensorflow'})

# Save embedding model
logger.info('Saving embedding model to {}.'.format(filepath))
Expand Down
64 changes: 64 additions & 0 deletions alibi_detect/saving/_tensorflow/tests/test_saving_tf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from pytest_cases import param_fixture, parametrize, parametrize_with_cases

from alibi_detect.saving.tests.datasets import ContinuousData
from alibi_detect.saving.tests.models import encoder_model

from alibi_detect.cd.tensorflow import HiddenOutput as HiddenOutput_tf
from alibi_detect.saving.loading import _load_model_config, _load_optimizer_config
from alibi_detect.saving.saving import _path2str, _save_model_config
from alibi_detect.saving.schemas import ModelConfig

backend = param_fixture("backend", ['tensorflow'])


def test_load_optimizer_tf(backend):
"Test the tensorflow _load_optimizer_config."
class_name = 'Adam'
learning_rate = 0.01
epsilon = 1e-7
amsgrad = False

# Load
cfg_opt = {
'class_name': class_name,
'config': {
'name': class_name,
'learning_rate': learning_rate,
'epsilon': epsilon,
'amsgrad': amsgrad
}
}
optimizer = _load_optimizer_config(cfg_opt, backend=backend)
assert type(optimizer).__name__ == class_name
assert optimizer.learning_rate == learning_rate
assert optimizer.epsilon == epsilon
assert optimizer.amsgrad == amsgrad


@parametrize_with_cases("data", cases=ContinuousData.data_synthetic_nd, prefix='data_')
@parametrize('model', [encoder_model])
@parametrize('layer', [None, -1])
def test_save_model_tf(data, model, layer, tmp_path):
"""
Unit test for _save_model_config and _load_model_config with tensorflow model.
"""
# Save model
filepath = tmp_path
input_shape = (data[0].shape[1],)
cfg_model, _ = _save_model_config(model, base_path=filepath, input_shape=input_shape)
cfg_model = _path2str(cfg_model)
cfg_model = ModelConfig(**cfg_model).dict()
assert tmp_path.joinpath('model').is_dir()
assert tmp_path.joinpath('model/model.h5').is_file()

# Adjust config
cfg_model['src'] = tmp_path.joinpath('model') # Need to manually set to absolute path here
if layer is not None:
cfg_model['layer'] = layer

# Load model
model_load = _load_model_config(cfg_model)
if layer is None:
assert isinstance(model_load, type(model))
else:
assert isinstance(model_load, HiddenOutput_tf)
Loading

0 comments on commit 1898ad2

Please sign in to comment.