Skip to content

Commit

Permalink
Rename parallel_clustering_{n_jobs, prefer}
Browse files Browse the repository at this point in the history
With n_jobs and parallel_backend_prefer respectively. Also update documentation and mapper_quickstart.ipynb accordingly.
  • Loading branch information
Umberto committed Jan 20, 2020
1 parent fb7b75b commit be5b6c7
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 27 deletions.
8 changes: 4 additions & 4 deletions examples/mapper_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,15 @@
"clusterer = DBSCAN()\n",
"\n",
"# configure parallelism of clustering step\n",
"parallel_clustering_n_jobs = 1\n",
"n_jobs = 1\n",
"\n",
"# initialise pipeline\n",
"pipe = make_mapper_pipeline(\n",
" filter_func=filter_func,\n",
" cover=cover,\n",
" clusterer=clusterer,\n",
" verbose=False,\n",
" parallel_clustering_n_jobs=parallel_clustering_n_jobs,\n",
" n_jobs=n_jobs,\n",
")"
]
},
Expand Down Expand Up @@ -430,7 +430,7 @@
" cover=cover,\n",
" clusterer=clusterer,\n",
" verbose=True,\n",
" parallel_clustering_n_jobs=parallel_clustering_n_jobs,\n",
" n_jobs=n_jobs,\n",
")"
]
},
Expand Down Expand Up @@ -473,7 +473,7 @@
" cover=cover,\n",
" clusterer=clusterer,\n",
" verbose=True,\n",
" parallel_clustering_n_jobs=parallel_clustering_n_jobs,\n",
" n_jobs=n_jobs,\n",
")"
]
},
Expand Down
16 changes: 8 additions & 8 deletions giotto/mapper/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ class ParallelClustering(BaseEstimator):
:class:`sklearn.base.ClusterMixin`. ``None`` means that the default
:class:`sklearn.cluster.DBSCAN` is used.
parallel_clustering_n_jobs : int or None, optional, default: ``None``
n_jobs : int or None, optional, default: ``None``
The number of jobs to use for the computation. ``None`` means 1
unless in a :obj:`joblib.parallel_backend` context. ``-1`` means
using all processors.
parallel_clustering_prefer : ``'processes'`` | ``'threads'``, optional, \
parallel_backend_prefer : ``'processes'`` | ``'threads'``, optional, \
default: ``'threads'``
Selects the default joblib backend. The default process-based backend
is 'loky' and the default thread-based backend is 'threading'.
Expand All @@ -64,11 +64,11 @@ class ParallelClustering(BaseEstimator):
"""

def __init__(self, clusterer=None,
parallel_clustering_n_jobs=None,
parallel_clustering_prefer='threads'):
n_jobs=None,
parallel_backend_prefer='threads'):
self.clusterer = clusterer
self.parallel_clustering_n_jobs = parallel_clustering_n_jobs
self.parallel_clustering_prefer = parallel_clustering_prefer
self.n_jobs = n_jobs
self.parallel_backend_prefer = parallel_backend_prefer

def _validate_clusterer(self, default=DBSCAN()):
"""Set :attr:`clusterer_` depending on the value of `clusterer`.
Expand Down Expand Up @@ -136,8 +136,8 @@ def fit(self, X, y=None, sample_weight=None):
else:
single_fitter = self._fit_single_abs_labels

self.clusterers_ = Parallel(n_jobs=self.parallel_clustering_n_jobs,
prefer=self.parallel_clustering_prefer)(
self.clusterers_ = Parallel(n_jobs=self.n_jobs,
prefer=self.parallel_backend_prefer)(
delayed(single_fitter)(
X_tot, np.flatnonzero(mask),
mask_num, sample_weight=sample_weights[mask_num])
Expand Down
31 changes: 16 additions & 15 deletions giotto/mapper/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
global_pipeline_params = ('memory', 'verbose')
nodes_params = ('scaler', 'filter_func', 'cover')
clust_prepr_params = ('clustering_preprocessing',)
clust_params = ('clusterer', 'parallel_clustering_n_jobs',
'parallel_clustering_prefer')
clust_params = ('clusterer', 'n_jobs',
'parallel_backend_prefer')
nerve_params = ('min_intersection',)
clust_prepr_params_prefix = 'pullback_cover__'
nodes_params_prefix = 'pullback_cover__map_and_cover__'
Expand Down Expand Up @@ -145,8 +145,8 @@ def make_mapper_pipeline(scaler=None,
cover=None,
clustering_preprocessing=None,
clusterer=None,
parallel_clustering_n_jobs=None,
parallel_clustering_prefer='threads',
n_jobs=None,
parallel_backend_prefer='threads',
graph_step=True,
min_intersection=1,
memory=None,
Expand Down Expand Up @@ -186,18 +186,19 @@ def make_mapper_pipeline(scaler=None,
Clustering object. ``None`` means using DBSCAN
(:meth:`sklearn.cluster.DBSCAN`) with its default parameters.
parallel_clustering_n_jobs : int or None, optional, default: ``None``
n_jobs : int or None, optional, default: ``None``
The number of jobs to use in a joblib-parallel application of the
clustering step across pullback cover sets. ``None`` means 1 unless
clustering step across pullback cover sets. To be used in
conjunction with `parallel_backend_prefer`. ``None`` means 1 unless
in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
processors.
parallel_clustering_prefer : ``'processes'`` | ``'threads'``, optional, \
parallel_backend_prefer : ``'processes'`` | ``'threads'``, optional, \
default: ``'threads'``
Selects the default joblib backend to use in a joblib-parallel
application of the clustering step across pullback cover sets.
The default process-based backend is 'loky' and the default
thread-based backend is 'threading'. See [2]_.
Soft hint for the default joblib backend to use in a joblib-parallel
application of the clustering step across pullback cover sets. To be
used in conjunction with `n_jobs`. The default process-based backend is
'loky' and the default thread-based backend is 'threading'. See [2]_.
graph_step : bool, optional, default: ``True``
Whether the resulting pipeline should stop at the calculation of the
Expand Down Expand Up @@ -287,7 +288,7 @@ def make_mapper_pipeline(scaler=None,
>>> # clustering across the pullback cover sets can be beneficial
>>> from sklearn.cluster import DBSCAN
>>> mapper = make_mapper_pipeline(clusterer=DBSCAN(),
... parallel_clustering_n_jobs=6,
... n_jobs=6,
... memory=mkdtemp(),
... verbose=True)
>>> X = np.random.random((100000, 4))
Expand All @@ -298,7 +299,7 @@ def make_mapper_pipeline(scaler=None,
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total= 0.7s
[Pipeline] ........ (step 2 of 3) Processing clustering, total= 1.9s
[Pipeline] ............. (step 3 of 3) Processing nerve, total= 0.3s
>>> mapper.set_params(parallel_clustering_n_jobs=1)
>>> mapper.set_params(n_jobs=1)
>>> mapper.fit_transform(X)
[Pipeline] ........ (step 2 of 3) Processing clustering, total= 5.3s
[Pipeline] ............. (step 3 of 3) Processing nerve, total= 0.3s
Expand Down Expand Up @@ -366,8 +367,8 @@ def make_mapper_pipeline(scaler=None,
('map_and_cover', map_and_cover)])),
('clustering', ParallelClustering(
clusterer=_clusterer,
parallel_clustering_n_jobs=parallel_clustering_n_jobs,
parallel_clustering_prefer=parallel_clustering_prefer))
n_jobs=n_jobs,
parallel_backend_prefer=parallel_backend_prefer))
]

if graph_step:
Expand Down

0 comments on commit be5b6c7

Please sign in to comment.