diff --git a/doc/whats_new/v0.12.rst b/doc/whats_new/v0.12.rst index 40bd1056e..6585c3b58 100644 --- a/doc/whats_new/v0.12.rst +++ b/doc/whats_new/v0.12.rst @@ -1,5 +1,20 @@ .. _changes_0_12: +Version 0.12.1 +============== + +**In progress** + +Changelog +--------- + +Bug fixes +......... + +- Fix a bug in :class:`~imblearn.under_sampling.InstanceHardnessThreshold` where + `estimator` could not be a :class:`~sklearn.pipeline.Pipeline` object. + :pr:`1049` by :user:`Gonenc Mogol `. + Version 0.12.0 ============== diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py index 52d9280b6..dac3f3c33 100644 --- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py @@ -10,7 +10,7 @@ from collections import Counter import numpy as np -from sklearn.base import ClassifierMixin, clone +from sklearn.base import clone, is_classifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble._base import _set_random_states from sklearn.model_selection import StratifiedKFold, cross_val_predict @@ -140,7 +140,7 @@ def _validate_estimator(self, random_state): if ( self.estimator is not None - and isinstance(self.estimator, ClassifierMixin) + and is_classifier(self.estimator) and hasattr(self.estimator, "predict_proba") ): self.estimator_ = clone(self.estimator) diff --git a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py index 5d7008747..a63bb45a0 100644 --- a/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py +++ b/imblearn/under_sampling/_prototype_selection/tests/test_instance_hardness_threshold.py @@ -6,6 +6,7 @@ import numpy as np from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.naive_bayes import GaussianNB as NB +from sklearn.pipeline import make_pipeline from sklearn.utils._testing import assert_array_equal from imblearn.under_sampling import InstanceHardnessThreshold @@ -93,3 +94,19 @@ def test_iht_fit_resample_default_estimator(): assert isinstance(iht.estimator_, RandomForestClassifier) assert X_resampled.shape == (12, 2) assert y_resampled.shape == (12,) + + +def test_iht_estimator_pipeline(): + """Check that we can pass a pipeline containing a classifier. + + Checking if we have a classifier should not be based on inheriting from + `ClassifierMixin`. + + Non-regression test for: + https://github.com/scikit-learn-contrib/imbalanced-learn/pull/1049 + """ + model = make_pipeline(GradientBoostingClassifier(random_state=RND_SEED)) + iht = InstanceHardnessThreshold(estimator=model, random_state=RND_SEED) + X_resampled, y_resampled = iht.fit_resample(X, Y) + assert X_resampled.shape == (12, 2) + assert y_resampled.shape == (12,)