diff --git a/gtime/explainability/explainer.py b/gtime/explainability/explainer.py index f105f3a..92ce3d5 100644 --- a/gtime/explainability/explainer.py +++ b/gtime/explainability/explainer.py @@ -60,6 +60,7 @@ class _LimeExplainer(_RegressorExplainer): >>> explainer.explanations_[0] {'d': -0.10406889434277307, 'c': 0.07973507022816899, 'b': 0.02312395991550859, 'a': 0.006403509251399996, 'e': 0.006272607738125953} """ + def fit( self, model: RegressorMixin, X: np.ndarray, feature_names: List[str] = None ): diff --git a/gtime/forecasting/gar.py b/gtime/forecasting/gar.py index e8e037d..c2ae26d 100644 --- a/gtime/forecasting/gar.py +++ b/gtime/forecasting/gar.py @@ -13,7 +13,9 @@ from gtime.regressors.multi_output import MultiFeatureMultiOutputRegressor -def initialize_estimator(estimator: RegressorMixin, explainer_type: Optional[str]) -> RegressorMixin: +def initialize_estimator( + estimator: RegressorMixin, explainer_type: Optional[str] +) -> RegressorMixin: if explainer_type is None: return estimator else: diff --git a/gtime/forecasting/tests/test_gar.py b/gtime/forecasting/tests/test_gar.py index 8f5e73c..4c6b1c5 100644 --- a/gtime/forecasting/tests/test_gar.py +++ b/gtime/forecasting/tests/test_gar.py @@ -1,3 +1,4 @@ +import itertools import random from typing import List @@ -47,6 +48,34 @@ ) +forecasters = [GAR, GARFF, MultiFeatureGAR] +explainers = [ + "shap", +] # "lime"] for speed reason + + +@pytest.mark.parametrize( + "forecaster,explainer", itertools.product(forecasters, explainers) +) +@given( + X_y=X_y_matrices( + horizon=4, + df_transformer=df_transformer, + min_length=10, + allow_nan_infinity=False, + ) +) +def test_predict_has_explainers(forecaster, explainer, X_y): + X, y = X_y + X_train, y_train, X_test, y_test = FeatureSplitter().transform(X, y) + model = forecaster(LinearRegression(), explainer_type=explainer) + model.fit(X_train, y_train) + model.predict(X_test.iloc[:1, :]) + assert len(model.estimators_) == y_test.shape[1] + for estimator in model.estimators_: + assert len(estimator.explainer_.explanations_) == 1 + + @pytest.fixture def time_series(): testing.N, testing.K = 200, 1 @@ -131,7 +160,7 @@ def test_initialize_estimator(estimator): @given(models()) def test_initialize_estimator_explainable(estimator): - explainable_estimator = initialize_estimator(estimator, explainer_type='shap') + explainable_estimator = initialize_estimator(estimator, explainer_type="shap") assert isinstance(explainable_estimator, ExplainableRegressor) assert isinstance(explainable_estimator.explainer, _ShapExplainer) diff --git a/gtime/regressors/explainable.py b/gtime/regressors/explainable.py index 24e4c5d..9e0b295 100644 --- a/gtime/regressors/explainable.py +++ b/gtime/regressors/explainable.py @@ -53,8 +53,8 @@ def __init__(self, estimator: RegressorMixin, explainer_type: str): self.explainer = self._initialize_explainer() def _check_estimator(self, estimator: RegressorMixin) -> RegressorMixin: - if not hasattr(estimator, 'fit') or not hasattr(estimator, 'predict'): - raise TypeError(f'Estimator not compatible: {estimator}') + if not hasattr(estimator, "fit") or not hasattr(estimator, "predict"): + raise TypeError(f"Estimator not compatible: {estimator}") return estimator def _initialize_explainer(self) -> Union[_LimeExplainer, _ShapExplainer]: @@ -82,7 +82,9 @@ def fit(self, X: np.ndarray, y: np.ndarray, feature_names: List[str] = None): Fitted `ExplainableRegressor` """ self.estimator_ = self.estimator.fit(X, y) - self.explainer_ = self.explainer.fit(self.estimator_, X, feature_names=feature_names) + self.explainer_ = self.explainer.fit( + self.estimator_, X, feature_names=feature_names + ) return self def predict(self, X: np.ndarray): diff --git a/gtime/regressors/tests/test_explainable.py b/gtime/regressors/tests/test_explainable.py index 0bccad4..d4f6379 100644 --- a/gtime/regressors/tests/test_explainable.py +++ b/gtime/regressors/tests/test_explainable.py @@ -39,7 +39,7 @@ def test_constructor(self, estimator, explainer_type): @given(estimator=regressors()) def test_constructor_bad_explainer(self, estimator): with pytest.raises(ValueError): - ExplainableRegressor(estimator, 'bad') + ExplainableRegressor(estimator, "bad") @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(bad_estimator=bad_regressors()) @@ -84,7 +84,7 @@ def test_fit_values(self, estimator, explainer_type, X_y): ) def test_predict_values(self, estimator, explainer_type, X_y): X, y = X_y - X_test = X[:2, :] + X_test = X[:1, :] regressor = ExplainableRegressor(estimator, explainer_type) regressor_predictions = regressor.fit(X, y).predict(X_test) @@ -92,4 +92,3 @@ def test_predict_values(self, estimator, explainer_type, X_y): estimator_predictions = cloned_estimator.fit(X, y).predict(X_test) assert regressor_predictions.shape == estimator_predictions.shape - diff --git a/gtime/regressors/tests/test_multi_output.py b/gtime/regressors/tests/test_multi_output.py index f069c8e..c18b9f3 100644 --- a/gtime/regressors/tests/test_multi_output.py +++ b/gtime/regressors/tests/test_multi_output.py @@ -68,7 +68,11 @@ def test_constructor(self, estimator): @given( data=data(), - X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000), + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ), ) def test_fit_bad_y(self, data, estimator, X_y): X, y = X_y @@ -84,7 +88,13 @@ def test_fit_bad_y(self, data, estimator, X_y): X, y, target_to_features_dict=target_to_feature_dict ) - @given(X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000)) + @given( + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ) + ) def test_fit_as_multi_output_regressor_if_target_to_feature_none( self, estimator, X_y ): @@ -110,7 +120,11 @@ def test_error_predict_with_no_fit(self, estimator, X): @given( data=data(), - X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000), + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ), ) def test_fit_target_to_feature_dict_working(self, data, X_y, estimator): X, y = X_y @@ -126,7 +140,11 @@ def test_fit_target_to_feature_dict_working(self, data, X_y, estimator): @given( data=data(), - X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000), + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ), ) def test_fit_target_to_feature_dict_consistent(self, data, X_y, estimator): X, y = X_y @@ -147,7 +165,11 @@ def test_fit_target_to_feature_dict_consistent(self, data, X_y, estimator): @given( data=data(), - X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000), + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ), ) def test_predict_target_to_feature_dict(self, data, X_y, estimator): X, y = X_y @@ -165,7 +187,11 @@ def test_predict_target_to_feature_dict(self, data, X_y, estimator): @given( data=data(), - X_y=numpy_X_y_matrices(X_y_shapes=shape_X_y_matrices(y_as_vector=False), min_value=-10000, max_value=10000), + X_y=numpy_X_y_matrices( + X_y_shapes=shape_X_y_matrices(y_as_vector=False), + min_value=-10000, + max_value=10000, + ), ) def test_error_predict_target_to_feature_dict_wrong_X_shape( self, data, X_y, estimator diff --git a/gtime/utils/fixtures.py b/gtime/utils/fixtures.py index f1201ab..cad0f84 100644 --- a/gtime/utils/fixtures.py +++ b/gtime/utils/fixtures.py @@ -69,6 +69,7 @@ def _single_element_lazy_fixtures(*args): def lazy_fixtures(*args): if isinstance(args[0], tuple): - return [tuple([pytest.lazy_fixture(arg[0].__name__), *arg[1:]]) for arg in args] + raise NotImplementedError + # return [tuple([pytest.lazy_fixture(arg[0].__name__), *arg[1:]]) for arg in args] else: return _single_element_lazy_fixtures(*args) diff --git a/gtime/utils/hypothesis/general_strategies.py b/gtime/utils/hypothesis/general_strategies.py index 2af4c8f..7e96591 100644 --- a/gtime/utils/hypothesis/general_strategies.py +++ b/gtime/utils/hypothesis/general_strategies.py @@ -21,12 +21,6 @@ def ordered_pair(min_value: int, max_value: int): ) -def shape_vector(min_shape=30, max_shape=200): - return tuples( - integers(min_shape, max_shape) - ) - - def shape_matrix(min_shape_0=30, max_shape_0=200, min_shape_1=5, max_shape_1=10): return tuples( integers(min_shape_0, max_shape_0), integers(min_shape_1, max_shape_1) diff --git a/requirements.txt b/requirements.txt index 38a0b25..3ab2413 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ -lime pandas>=0.25.3 workalendar>=7.1.1 scipy>=0.17.0 scikit-learn>=0.22.0 -matplotlib>=3.1.0 \ No newline at end of file +matplotlib>=3.1.0 +lime>=0.2.0.0 +shap>=0.35