Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOC add test for numpydoc validation and documented param/attributes #869

Merged
merged 10 commits into from
Oct 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_tools/azure/install.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ IF "%PYTHON_ARCH%"=="64" (
call deactivate
@rem Clean up any left-over from a previous build
conda remove --all -q -y -n %VIRTUALENV%
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython wheel joblib git -c conda-forge

call activate %VIRTUALENV%

Expand Down
4 changes: 2 additions & 2 deletions build_tools/azure/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ UNAMESTR=`uname`

make_conda() {
TO_INSTALL="$@"
conda create -n $VIRTUALENV --yes $TO_INSTALL
conda create -n $VIRTUALENV --yes $TO_INSTALL -c conda-forge
source activate $VIRTUALENV
}

Expand Down Expand Up @@ -65,7 +65,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
fi

if [[ -n "$TO_INSTALL" ]]; then
conda install --yes $TO_INSTALL
conda install --yes $TO_INSTALL -c conda-forge
fi

if [[ -n "$KERAS_VERSION" ]]; then
Expand Down
31 changes: 30 additions & 1 deletion imblearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,24 @@ def _identity(X, y):
return X, y


def is_sampler(estimator):
"""Return True if the given estimator is a sampler, False otherwise.

Parameters
----------
estimator : object
Estimator to test.

Returns
-------
is_sampler : bool
True if estimator is a sampler, otherwise False.
"""
if estimator._estimator_type == "sampler":
return True
return False


class FunctionSampler(BaseSampler):
"""Construct a sampler from calling an arbitrary callable.

Expand All @@ -166,9 +184,20 @@ class FunctionSampler(BaseSampler):

.. versionadded:: 0.6

Attributes
----------
sampling_strategy_ : dict
Dictionary containing the information to sample the dataset. The keys
corresponds to the class labels from which to sample and the values
are the number of samples to sample.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------

sklearn.preprocessing.FunctionTransfomer : Stateless transformer.

Notes
Expand Down
19 changes: 19 additions & 0 deletions imblearn/combine/_smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@ class SMOTEENN(BaseSampler):

{n_jobs}

Attributes
----------
sampling_strategy_ : dict
Dictionary containing the information to sample the dataset. The keys
corresponds to the class labels from which to sample and the values
are the number of samples to sample.

smote_ : sampler object
The validated :class:`~imblearn.over_sampling.SMOTE` instance.

enn_ : sampler object
The validated :class:`~imblearn.under_sampling.EditedNearestNeighbours`
instance.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------
SMOTETomek : Over-sample using SMOTE followed by under-sampling removing
Expand Down
18 changes: 18 additions & 0 deletions imblearn/combine/_smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ class SMOTETomek(BaseSampler):

{n_jobs}

Attributes
----------
sampling_strategy_ : dict
Dictionary containing the information to sample the dataset. The keys
corresponds to the class labels from which to sample and the values
are the number of samples to sample.

smote_ : sampler object
The validated :class:`~imblearn.over_sampling.SMOTE` instance.

tomek_ : sampler object
The validated :class:`~imblearn.under_sampling.TomekLinks` instance.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------
SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited
Expand Down
7 changes: 3 additions & 4 deletions imblearn/datasets/_imbalance.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
def make_imbalance(
X, y, *, sampling_strategy=None, random_state=None, verbose=False, **kwargs
):
"""Turns a dataset into an imbalanced dataset with a specific sampling
strategy.
"""Turn a dataset into an imbalanced dataset with a specific sampling strategy.

A simple toy dataset to visualize clustering and classification
algorithms.
Expand Down Expand Up @@ -52,7 +51,7 @@ def make_imbalance(
verbose : bool, default=False
Show information regarding the sampling.

kwargs : dict
**kwargs : dict
Dictionary of additional keyword arguments to pass to
``sampling_strategy``.

Expand All @@ -62,7 +61,7 @@ def make_imbalance(
The array containing the imbalanced data.

y_resampled : ndarray of shape (n_samples_new)
The corresponding label of `X_resampled`
The corresponding label of `X_resampled`.

Notes
-----
Expand Down
9 changes: 4 additions & 5 deletions imblearn/datasets/_zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
.. [1] Ding, Zejin, "Diversified Ensemble Classifiers for Highly
Imbalanced Data Learning and their Application in Bioinformatics."
Dissertation, Georgia State University, (2011).

"""

# Author: Guillaume Lemaitre
Expand Down Expand Up @@ -147,12 +146,12 @@ def fetch_datasets(
The ordered is defined by ``filter_data``. Each Bunch object ---
referred as dataset --- have the following attributes:

dataset.data : ndarray of shape (n_samples, n_features)
dataset.data : ndarray of shape (n_samples, n_features)

dataset.target : ndarray of shape (n_samples,)
dataset.target : ndarray of shape (n_samples,)

dataset.DESCR : str
Description of the each dataset.
dataset.DESCR : str
Description of the each dataset.

Notes
-----
Expand Down
8 changes: 8 additions & 0 deletions imblearn/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ class BalancedBaggingClassifier(BaggingClassifier):
estimators_ : list of estimators
The collection of fitted base estimators.

sampler_ : sampler object
The validate sampler created from the `sampler` parameter.

estimators_samples_ : list of ndarray
The subset of drawn samples (i.e., the in-bag samples) for each base
estimator. Each subset is defined by a boolean mask.
Expand All @@ -133,6 +136,11 @@ class BalancedBaggingClassifier(BaggingClassifier):
was never left out during the bootstrap. In this case,
``oob_decision_function_`` might contain NaN.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------
BalancedRandomForestClassifier : Random forest applying random-under
Expand Down
11 changes: 11 additions & 0 deletions imblearn/ensemble/_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,23 @@ class EasyEnsembleClassifier(BaggingClassifier):
estimators_ : list of estimators
The collection of fitted base estimators.

estimators_samples_ : list of arrays
The subset of drawn samples for each base estimator.

estimators_features_ : list of arrays
The subset of drawn features for each base estimator.

classes_ : array, shape (n_classes,)
The classes labels.

n_classes_ : int or list
The number of classes.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------
BalancedBaggingClassifier : Bagging classifier for which each base
Expand Down
18 changes: 15 additions & 3 deletions imblearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,17 @@ class BalancedRandomForestClassifier(RandomForestClassifier):

Attributes
----------
estimators_ : list of DecisionTreeClassifier
base_estimator_ : :class:`~sklearn.tree.DecisionTreeClassifier` instance
The child estimator template used to create the collection of fitted
sub-estimators.

estimators_ : list of :class:`~sklearn.tree.DecisionTreeClassifier`
The collection of fitted sub-estimators.

samplers_ : list of RandomUnderSampler
base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
The base sampler used to construct the subsequent list of samplers.

samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
The collection of fitted samplers.

pipelines_ : list of Pipeline.
Expand All @@ -250,6 +257,11 @@ class labels (multi-output problem).
n_features_ : int
The number of features when ``fit`` is performed.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

n_outputs_ : int
The number of outputs when ``fit`` is performed.

Expand Down Expand Up @@ -628,7 +640,7 @@ def _set_oob_score(self, X, y):
@property
def n_features_(self):
"""Number of features when fitting the estimator."""
return getattr(self.n_features_in_, self._n_features)
return getattr(self.n_features_in_, "n_features_", self._n_features)

def _more_tags(self):
return {
Expand Down
10 changes: 9 additions & 1 deletion imblearn/ensemble/_weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ class RUSBoostClassifier(AdaBoostClassifier):
estimators_ : list of classifiers
The collection of fitted sub-estimators.

samplers_ : list of RandomUnderSampler
base_sampler_ : :class:`~imblearn.under_sampling.RandomUnderSampler`
The base sampler used to generate the subsequent samplers.

samplers_ : list of :class:`~imblearn.under_sampling.RandomUnderSampler`
The collection of fitted samplers.

pipelines_ : list of Pipeline
Expand All @@ -90,6 +93,11 @@ class RUSBoostClassifier(AdaBoostClassifier):
feature_importances_ : ndarray of shape (n_features,)
The feature importances if supported by the ``base_estimator``.

n_features_in_ : int
Number of features in the input dataset.

.. versionadded:: 0.9

See Also
--------
BalancedBaggingClassifier : Bagging classifier for which each base
Expand Down
18 changes: 18 additions & 0 deletions imblearn/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@


def raise_isinstance_error(variable_name, possible_type, variable):
"""Raise consistent error message for isinstance() function.

Parameters
----------
variable_name : str
The name of the variable.

possible_type : type
The possible type of the variable.

variable : object
The variable to check.

Raises
------
ValueError
If the instance is not of the possible type.
"""
raise ValueError(
f"{variable_name} has to be one of {possible_type}. "
f"Got {type(variable)} instead."
Expand Down
9 changes: 6 additions & 3 deletions imblearn/keras/_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ def import_keras():

def import_from_keras():
try:
import keras
import keras # noqa

return (keras.utils.Sequence,), True
return (keras.utils.data_utils.Sequence,), True
except ImportError:
return tuple(), False

Expand All @@ -33,7 +33,10 @@ def import_from_tensforflow():
ParentClassTensorflow, has_keras_tf = import_from_tensforflow()
has_keras = has_keras_k or has_keras_tf
if has_keras:
ParentClass = ParentClassKeras + ParentClassTensorflow
if has_keras_tf:
ParentClass = ParentClassTensorflow
else:
ParentClass = ParentClassKeras
else:
ParentClass = (object,)
return ParentClass, has_keras
Expand Down
Loading