Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge patch/v0.10.5 into master #733

Merged
merged 5 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Change Log

## v0.11.0dev
[Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.3...master)
[Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.5...master)

### Added
- **New feature** MMD drift detector has been extended with a [KeOps](https://www.kernel-operations.io/keops/index.html) backend to scale and speed up the detector.
Expand All @@ -26,6 +26,15 @@ See the [documentation](https://docs.seldon.io/projects/alibi-detect/en/latest/c
- UTF-8 decoding is enforced when `README.md` is opened by `setup.py`. This is to prevent pip install errors on systems with `PYTHONIOENCODING` set to use other encoders ([#605](https://github.com/SeldonIO/alibi-detect/pull/605)).
- Skip specific save/load tests that require downloading remote artefacts if the relevant URI(s) is/are down ([#607](https://github.com/SeldonIO/alibi-detect/pull/607)).

## v0.10.5
## [v0.10.5](https://github.com/SeldonIO/alibi-detect/tree/v0.10.5) (2023-01-26)
[Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.4...v0.10.5)

### Fixed
- Fixed two bugs preventing backward compatibility when loading detectors saved with `<v0.10.0`
([#729](https://github.com/SeldonIO/alibi-detect/pull/729) and [#732](https://github.com/SeldonIO/alibi-detect/pull/732)). This bug also meant that detectors
saved with `save_detector(..., legacy=True)` in `>=v0.10.0` did not properly obey the legacy file format. The `config.toml` file format used by default in `>=v0.10.0` is unaffected.

## v0.10.4
## [v0.10.4](https://github.com/SeldonIO/alibi-detect/tree/v0.10.4) (2022-10-21)
[Full Changelog](https://github.com/SeldonIO/alibi-detect/compare/v0.10.3...v0.10.4)
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ authors:
- family-names: "Athorne"
given-names: "Alex"
title: "Alibi Detect: Algorithms for outlier, adversarial and drift detection"
version: 0.10.4
date-released: 2022-10-21
version: 0.10.5
date-released: 2023-01-26
url: "https://github.com/SeldonIO/alibi-detect"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,8 @@ BibTeX entry:
title = {Alibi Detect: Algorithms for outlier, adversarial and drift detection},
author = {Van Looveren, Arnaud and Klaise, Janis and Vacanti, Giovanni and Cobb, Oliver and Scillitoe, Ashley and Samoilescu, Robert and Athorne, Alex},
url = {https://github.com/SeldonIO/alibi-detect},
version = {0.10.4},
date = {2022-10-21},
version = {0.10.5},
date = {2023-01-26},
year = {2019}
}
```
36 changes: 24 additions & 12 deletions alibi_detect/saving/_tensorflow/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@


def load_model(filepath: Union[str, os.PathLike],
load_dir: str = 'model',
filename: str = 'model',
custom_objects: dict = None,
layer: Optional[int] = None,
) -> tf.keras.Model:
Expand All @@ -47,8 +47,8 @@ def load_model(filepath: Union[str, os.PathLike],
----------
filepath
Saved model directory.
load_dir
Name of saved model folder within the filepath directory.
filename
Name of saved model within the filepath directory.
custom_objects
Optional custom objects when loading the TensorFlow model.
layer
Expand All @@ -60,11 +60,12 @@ def load_model(filepath: Union[str, os.PathLike],
Loaded model.
"""
# TODO - update this to accept tf format - later PR.
model_dir = Path(filepath).joinpath(load_dir)
model_dir = Path(filepath)
model_name = filename + '.h5'
# Check if model exists
if 'model.h5' not in [f.name for f in model_dir.glob('[!.]*.h5')]:
raise FileNotFoundError(f'No .h5 file found in {model_dir}.')
model = tf.keras.models.load_model(model_dir.joinpath('model.h5'), custom_objects=custom_objects)
if model_name not in [f.name for f in model_dir.glob('[!.]*.h5')]:
raise FileNotFoundError(f'{model_name} not found in {model_dir.resolve()}.')
model = tf.keras.models.load_model(model_dir.joinpath(model_name), custom_objects=custom_objects)
# Optionally extract hidden layer
if isinstance(layer, int):
model = HiddenOutput(model, layer=layer)
Expand Down Expand Up @@ -233,7 +234,17 @@ def load_detector_legacy(filepath: Union[str, os.PathLike], suffix: str, **kwarg
# load outlier detector specific parameters
state_dict = dill.load(open(filepath.joinpath(detector_name + suffix), 'rb'))

# Update the drift detector preprocess kwargs if state_dict is from an old alibi-detect version (<v0.10).
# See https://github.com/SeldonIO/alibi-detect/pull/732
if 'kwargs' in state_dict and 'other' in state_dict: # A drift detector if both of these exist
if 'x_ref_preprocessed' not in state_dict['kwargs']: # if already exists then must have been saved w/ >=v0.10
# Set x_ref_preprocessed to True
state_dict['kwargs']['x_ref_preprocessed'] = True
# Move `preprocess_x_ref` from `other` to `kwargs`
state_dict['kwargs']['preprocess_x_ref'] = state_dict['other']['preprocess_x_ref']

# initialize detector
model_dir = filepath.joinpath('model')
detector: Optional[Detector] = None # to avoid mypy errors
if detector_name == 'OutlierAE':
ae = load_tf_ae(filepath)
Expand All @@ -254,13 +265,13 @@ def load_detector_legacy(filepath: Union[str, os.PathLike], suffix: str, **kwarg
elif detector_name == 'AdversarialAE':
ae = load_tf_ae(filepath)
custom_objects = kwargs['custom_objects'] if 'custom_objects' in k else None
model = load_model(filepath, custom_objects=custom_objects)
model = load_model(model_dir, custom_objects=custom_objects)
model_hl = load_tf_hl(filepath, model, state_dict)
detector = init_ad_ae(state_dict, ae, model, model_hl)
elif detector_name == 'ModelDistillation':
md = load_model(filepath, load_dir='distilled_model')
md = load_model(model_dir, filename='distilled_model')
custom_objects = kwargs['custom_objects'] if 'custom_objects' in k else None
model = load_model(filepath, custom_objects=custom_objects)
model = load_model(model_dir, custom_objects=custom_objects)
detector = init_ad_md(state_dict, md, model)
elif detector_name == 'OutlierProphet':
detector = init_od_prophet(state_dict) # type: ignore[assignment]
Expand All @@ -274,8 +285,9 @@ def load_detector_legacy(filepath: Union[str, os.PathLike], suffix: str, **kwarg
if state_dict['other']['load_text_embedding']:
emb, tokenizer = load_text_embed(filepath)
try: # legacy load_model behaviour was to return None if not found. Now it raises error, hence need try-except.
model = load_model(filepath, load_dir='encoder')
model = load_model(model_dir, filename='encoder')
except FileNotFoundError:
logger.warning('No model found in {}, setting `model` to `None`.'.format(model_dir))
model = None
if detector_name == 'KSDrift':
load_fn = init_cd_ksdrift # type: ignore[assignment]
Expand All @@ -287,7 +299,7 @@ def load_detector_legacy(filepath: Union[str, os.PathLike], suffix: str, **kwarg
load_fn = init_cd_tabulardrift # type: ignore[assignment]
elif detector_name == 'ClassifierDriftTF':
# Don't need try-except here since model is not optional for ClassifierDrift
clf_drift = load_model(filepath, load_dir='clf_drift')
clf_drift = load_model(model_dir, filename='clf_drift')
load_fn = partial(init_cd_classifierdrift, clf_drift) # type: ignore[assignment]
else:
raise NotImplementedError
Expand Down
23 changes: 12 additions & 11 deletions alibi_detect/saving/_tensorflow/saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def save_model_config(model: Callable,

if model is not None:
filepath = base_path.joinpath(local_path)
save_model(model, filepath=filepath, save_dir='model')
save_model(model, filepath=filepath.joinpath('model'))
cfg_model = {
'flavour': Framework.TENSORFLOW.value,
'src': local_path.joinpath('model')
Expand All @@ -91,7 +91,7 @@ def save_model_config(model: Callable,

def save_model(model: tf.keras.Model,
filepath: Union[str, os.PathLike],
save_dir: Union[str, os.PathLike] = 'model',
filename: str = 'model',
save_format: Literal['tf', 'h5'] = 'h5') -> None: # TODO - change to tf, later PR
"""
Save TensorFlow model.
Expand All @@ -102,20 +102,20 @@ def save_model(model: tf.keras.Model,
The tf.keras.Model to save.
filepath
Save directory.
save_dir
Name of folder to save to within the filepath directory.
filename
Name of file to save to within the filepath directory.
save_format
The format to save to. 'tf' to save to the newer SavedModel format, 'h5' to save to the lighter-weight
legacy hdf5 format.
"""
# create folder to save model in
model_path = Path(filepath).joinpath(save_dir)
model_path = Path(filepath)
if not model_path.is_dir():
logger.warning('Directory {} does not exist and is now created.'.format(model_path))
model_path.mkdir(parents=True, exist_ok=True)

# save model
model_path = model_path.joinpath('model.h5') if save_format == 'h5' else model_path
model_path = model_path.joinpath(filename + '.h5') if save_format == 'h5' else model_path

if isinstance(model, tf.keras.Model):
model.save(model_path, save_format=save_format)
Expand Down Expand Up @@ -254,30 +254,31 @@ def save_detector_legacy(detector, filepath):
dill.dump(state_dict, f)

# save detector specific TensorFlow models
model_dir = filepath.joinpath('model')
if isinstance(detector, OutlierAE):
save_tf_ae(detector, filepath)
elif isinstance(detector, OutlierVAE):
save_tf_vae(detector, filepath)
elif isinstance(detector, (ChiSquareDrift, ClassifierDrift, KSDrift, MMDDrift, TabularDrift)):
if model is not None:
save_model(model, filepath, save_dir='encoder')
save_model(model, model_dir, filename='encoder')
if embed is not None:
save_embedding_legacy(embed, embed_args, filepath)
if tokenizer is not None:
tokenizer.save_pretrained(filepath.joinpath('model'))
if detector_name == 'ClassifierDriftTF':
save_model(clf_drift, filepath, save_dir='clf_drift')
save_model(clf_drift, model_dir, filename='clf_drift')
elif isinstance(detector, OutlierAEGMM):
save_tf_aegmm(detector, filepath)
elif isinstance(detector, OutlierVAEGMM):
save_tf_vaegmm(detector, filepath)
elif isinstance(detector, AdversarialAE):
save_tf_ae(detector, filepath)
save_model(detector.model, filepath)
save_model(detector.model, model_dir)
save_tf_hl(detector.model_hl, filepath)
elif isinstance(detector, ModelDistillation):
save_model(detector.distilled_model, filepath, save_dir='distilled_model')
save_model(detector.model, filepath, save_dir='model')
save_model(detector.distilled_model, model_dir, filename='distilled_model')
save_model(detector.model, model_dir, filename='model')
elif isinstance(detector, OutlierSeq2Seq):
save_tf_s2s(detector, filepath)
elif isinstance(detector, LLR):
Expand Down
2 changes: 1 addition & 1 deletion alibi_detect/saving/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def _load_model_config(cfg: dict) -> Callable:
"a compatible model.")

if flavour == Framework.TENSORFLOW:
model = load_model_tf(src, load_dir='.', custom_objects=custom_obj, layer=layer)
model = load_model_tf(src, custom_objects=custom_obj, layer=layer)
elif flavour == Framework.PYTORCH:
model = load_model_pt(src, layer=layer)
elif flavour == Framework.SKLEARN:
Expand Down
2 changes: 1 addition & 1 deletion alibi_detect/utils/tests/test_saving_legacy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Tests for saving/loading of detectors with legacy .dill state_dict. As legacy save/load functionality becomes
deprecated, these tests will be removed, and more tests will be added to test_savin.py.
deprecated, these tests will be removed, and more tests will be added to test_saving.py.
"""
from alibi_detect.utils.missing_optional_dependency import MissingDependency
from functools import partial
Expand Down
Loading