diff --git a/doc/whats_new/v0.11.rst b/doc/whats_new/v0.11.rst index 666be580c..8b3b54d73 100644 --- a/doc/whats_new/v0.11.rst +++ b/doc/whats_new/v0.11.rst @@ -14,6 +14,10 @@ Bug fixes they are plugged into an Euclidean distance computation. :pr:`1014` by :user:`Guillaume Lemaitre `. +- Raise an informative error message when all support vectors are tagged as noise in + :class:`~imblearn.over_sampling.SVMSMOTE`. + :pr:`1016` by :user:`Guillaume Lemaitre `. + - Fix a bug in :class:`~imblearn.over_sampling.SMOTENC` where the median of standard deviation of the continuous features was only computed on the minority class. Now, we are computing this statistic for each class that is up-sampled. diff --git a/imblearn/over_sampling/_smote/filter.py b/imblearn/over_sampling/_smote/filter.py index 4966b211b..b02fc7be2 100644 --- a/imblearn/over_sampling/_smote/filter.py +++ b/imblearn/over_sampling/_smote/filter.py @@ -495,6 +495,11 @@ def _fit_resample(self, X, y): support_vector = _safe_indexing( support_vector, np.flatnonzero(np.logical_not(noise_bool)) ) + if support_vector.shape[0] == 0: + raise ValueError( + "All support vectors are considered as noise. SVM-SMOTE is not " + "adapted to your dataset. Try another SMOTE variant." + ) danger_bool = self._in_danger_noise( self.nn_m_, support_vector, class_sample, y, kind="danger" ) diff --git a/imblearn/over_sampling/_smote/tests/test_svm_smote.py b/imblearn/over_sampling/_smote/tests/test_svm_smote.py index cf753b275..49e01f6b9 100644 --- a/imblearn/over_sampling/_smote/tests/test_svm_smote.py +++ b/imblearn/over_sampling/_smote/tests/test_svm_smote.py @@ -1,5 +1,6 @@ import numpy as np import pytest +from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression from sklearn.neighbors import NearestNeighbors from sklearn.svm import SVC @@ -61,3 +62,27 @@ def test_svm_smote_not_svm(data): err_msg = "`svm_estimator` is required to exposed a `support_` fitted attribute." with pytest.raises(RuntimeError, match=err_msg): SVMSMOTE(svm_estimator=LogisticRegression()).fit_resample(*data) + + +def test_svm_smote_all_noise(data): + """Check that we raise a proper error message when all support vectors are + detected as noise and there is nothing that we can do. + + Non-regression test for: + https://github.com/scikit-learn-contrib/imbalanced-learn/issues/742 + """ + X, y = make_classification( + n_classes=3, + class_sep=0.001, + weights=[0.004, 0.451, 0.545], + n_informative=3, + n_redundant=0, + flip_y=0, + n_features=3, + n_clusters_per_class=2, + n_samples=1000, + random_state=10, + ) + + with pytest.raises(ValueError, match="SVM-SMOTE is not adapted to your dataset"): + SVMSMOTE(k_neighbors=4, random_state=42).fit_resample(X, y)