From 199c7d44a7edf2445f393fcbcb0808d1f283b99c Mon Sep 17 00:00:00 2001 From: William Hicks Date: Mon, 9 Jan 2023 16:09:58 -0500 Subject: [PATCH] Make all CPU/GPU only imports "safe" for respective package (#5117) Authors: - William Hicks (https://github.com/wphicks) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cuml/pull/5117 --- cpp/examples/kmeans/prepare_input.py | 7 +- cpp/examples/symreg/prepare_input.py | 33 +- .../preprocessing/_column_transformer.py | 42 +- .../sklearn/preprocessing/_data.py | 62 +- .../sklearn/preprocessing/_discretization.py | 32 +- .../sklearn/preprocessing/_imputation.py | 36 +- .../cuml/_thirdparty/sklearn/utils/extmath.py | 8 +- .../_thirdparty/sklearn/utils/sparsefuncs.py | 13 +- .../_thirdparty/sklearn/utils/validation.py | 15 +- python/cuml/benchmark/algorithms.py | 46 +- .../cuml/benchmark/automated/utils/utils.py | 7 +- python/cuml/benchmark/bench_helper_funcs.py | 19 +- python/cuml/benchmark/ci_benchmark.py | 5 +- python/cuml/benchmark/datagen.py | 28 +- python/cuml/benchmark/run_benchmarks.py | 5 +- python/cuml/benchmark/runners.py | 12 +- python/cuml/cluster/agglomerative.pyx | 3 +- python/cuml/cluster/dbscan.pyx | 6 +- python/cuml/cluster/hdbscan/hdbscan.pyx | 6 +- python/cuml/cluster/hdbscan/prediction.pyx | 6 +- python/cuml/cluster/kmeans.pyx | 6 +- python/cuml/cluster/kmeans_mg.pyx | 6 +- python/cuml/common/kernel_utils.py | 12 +- python/cuml/common/numba_utils.py | 3 +- python/cuml/common/opg_data_utils_mg.pyx | 3 +- python/cuml/common/sparse_utils.py | 3 +- python/cuml/common/sparsefuncs.py | 24 +- python/cuml/dask/cluster/dbscan.py | 21 +- python/cuml/dask/cluster/kmeans.py | 26 +- python/cuml/dask/common/base.py | 40 +- python/cuml/dask/common/dask_arr_utils.py | 26 +- python/cuml/dask/common/input_utils.py | 53 +- python/cuml/dask/common/part_utils.py | 27 +- python/cuml/dask/common/utils.py | 33 +- python/cuml/dask/datasets/classification.py | 8 +- python/cuml/dask/datasets/regression.py | 19 +- python/cuml/dask/datasets/utils.py | 3 +- python/cuml/dask/ensemble/base.py | 804 +++++++++--------- .../dask/ensemble/randomforestclassifier.py | 23 +- .../linear_model/logistic_regression.py | 10 +- python/cuml/dask/metrics/confusion_matrix.py | 18 +- python/cuml/dask/metrics/utils.py | 3 +- python/cuml/dask/naive_bayes/naive_bayes.py | 36 +- .../dask/neighbors/kneighbors_classifier.py | 13 +- .../cuml/dask/preprocessing/LabelEncoder.py | 10 +- python/cuml/dask/preprocessing/encoders.py | 5 +- python/cuml/dask/preprocessing/label.py | 8 +- python/cuml/datasets/arima.pyx | 3 +- python/cuml/datasets/blobs.py | 20 +- python/cuml/datasets/classification.py | 18 +- python/cuml/datasets/regression.pyx | 3 +- python/cuml/datasets/utils.py | 3 +- python/cuml/decomposition/base_mg.pyx | 6 +- python/cuml/decomposition/incremental_pca.py | 21 +- python/cuml/decomposition/pca.pyx | 12 +- python/cuml/decomposition/pca_mg.pyx | 6 +- python/cuml/decomposition/tsvd.pyx | 6 +- python/cuml/decomposition/tsvd_mg.pyx | 6 +- python/cuml/ensemble/randomforest_common.pyx | 6 +- python/cuml/ensemble/randomforest_shared.pyx | 3 +- .../cuml/ensemble/randomforestclassifier.pyx | 9 +- .../cuml/ensemble/randomforestregressor.pyx | 9 +- .../hyperopt_utils/plotting_utils.py | 7 +- .../cuml/experimental/linear_model/lars.pyx | 6 +- python/cuml/explainer/base.pyx | 11 +- python/cuml/explainer/common.py | 8 +- python/cuml/explainer/kernel_shap.pyx | 6 +- python/cuml/explainer/permutation_shap.pyx | 14 +- python/cuml/explainer/sampling.py | 13 +- python/cuml/explainer/tree_shap.pyx | 3 +- python/cuml/feature_extraction/_tfidf.py | 13 +- .../feature_extraction/_tfidf_vectorizer.py | 4 +- .../cuml/feature_extraction/_vectorizers.py | 27 +- python/cuml/fil/fil.pyx | 8 +- python/cuml/internals/base.pyx | 3 +- python/cuml/internals/type_utils.py | 3 +- python/cuml/kernel_ridge/kernel_ridge.pyx | 10 +- python/cuml/linear_model/base.pyx | 9 +- python/cuml/linear_model/base_mg.pyx | 6 +- .../cuml/linear_model/linear_regression.pyx | 9 +- .../linear_model/linear_regression_mg.pyx | 6 +- .../cuml/linear_model/logistic_regression.pyx | 6 +- python/cuml/linear_model/ridge.pyx | 6 +- python/cuml/linear_model/ridge_mg.pyx | 6 +- python/cuml/manifold/simpl_set.pyx | 6 +- python/cuml/manifold/t_sne.pyx | 10 +- python/cuml/manifold/umap.pyx | 20 +- python/cuml/manifold/umap_utils.pyx | 6 +- python/cuml/metrics/_classification.py | 8 +- python/cuml/metrics/_ranking.py | 14 +- python/cuml/metrics/accuracy.pyx | 3 +- .../metrics/cluster/adjusted_rand_index.pyx | 3 +- python/cuml/metrics/cluster/entropy.pyx | 6 +- .../cuml/metrics/cluster/silhouette_score.pyx | 6 +- python/cuml/metrics/cluster/utils.pyx | 3 +- python/cuml/metrics/confusion_matrix.py | 21 +- python/cuml/metrics/hinge_loss.pyx | 6 +- python/cuml/metrics/kl_divergence.pyx | 6 +- python/cuml/metrics/pairwise_distances.pyx | 14 +- python/cuml/metrics/pairwise_kernels.py | 15 +- python/cuml/metrics/regression.pyx | 6 +- python/cuml/metrics/trustworthiness.pyx | 6 +- python/cuml/metrics/utils.py | 3 +- python/cuml/model_selection/_split.py | 19 +- python/cuml/naive_bayes/naive_bayes.py | 42 +- python/cuml/neighbors/kernel_density.py | 21 +- .../cuml/neighbors/kneighbors_classifier.pyx | 11 +- .../cuml/neighbors/kneighbors_regressor.pyx | 9 +- python/cuml/neighbors/nearest_neighbors.pyx | 13 +- .../cuml/neighbors/nearest_neighbors_mg.pyx | 3 +- python/cuml/preprocessing/LabelEncoder.py | 14 +- python/cuml/preprocessing/TargetEncoder.py | 13 +- python/cuml/preprocessing/encoders.py | 22 +- python/cuml/preprocessing/label.py | 16 +- python/cuml/preprocessing/onehotencoder_mg.py | 8 +- .../preprocessing/text/stem/porter_stemmer.py | 37 +- .../porter_stemmer_rules.py | 3 +- .../stem/porter_stemmer_utils/suffix_utils.py | 11 +- python/cuml/prims/array.py | 6 +- python/cuml/prims/label/classlabels.py | 15 +- python/cuml/prims/stats/covariance.py | 9 +- .../random_projection/random_projection.pyx | 3 +- python/cuml/solvers/cd.pyx | 6 +- python/cuml/solvers/cd_mg.pyx | 6 +- python/cuml/solvers/qn.pyx | 6 +- python/cuml/solvers/sgd.pyx | 9 +- python/cuml/svm/linear.pyx | 3 +- python/cuml/svm/svc.pyx | 14 +- python/cuml/svm/svm_base.pyx | 9 +- python/cuml/svm/svr.pyx | 9 +- python/cuml/testing/dask/utils.py | 13 +- python/cuml/testing/strategies.py | 18 +- python/cuml/testing/test_preproc_utils.py | 30 +- python/cuml/testing/utils.py | 48 +- python/cuml/tests/conftest.py | 23 +- python/cuml/tests/dask/test_dask_arr_utils.py | 17 +- python/cuml/tests/dask/test_dask_base.py | 31 +- .../dask/test_dask_coordinate_descent.py | 3 +- python/cuml/tests/dask/test_dask_datasets.py | 21 +- python/cuml/tests/dask/test_dask_dbscan.py | 27 +- python/cuml/tests/dask/test_dask_doctest.py | 10 +- .../cuml/tests/dask/test_dask_input_utils.py | 7 +- python/cuml/tests/dask/test_dask_kmeans.py | 21 +- .../dask/test_dask_kneighbors_classifier.py | 13 +- .../dask/test_dask_kneighbors_regressor.py | 13 +- .../tests/dask/test_dask_label_binarizer.py | 6 +- .../tests/dask/test_dask_label_encoder.py | 48 +- .../tests/dask/test_dask_linear_regression.py | 10 +- .../dask/test_dask_logistic_regression.py | 12 +- python/cuml/tests/dask/test_dask_metrics.py | 16 +- .../cuml/tests/dask/test_dask_naive_bayes.py | 15 +- .../tests/dask/test_dask_nearest_neighbors.py | 25 +- .../tests/dask/test_dask_one_hot_encoder.py | 22 +- python/cuml/tests/dask/test_dask_pca.py | 9 +- .../tests/dask/test_dask_random_forest.py | 40 +- .../tests/dask/test_dask_ridge_regression.py | 10 +- .../tests/dask/test_dask_serialization.py | 13 +- python/cuml/tests/dask/test_dask_sql.py | 18 +- python/cuml/tests/dask/test_dask_tfidf.py | 21 +- python/cuml/tests/dask/test_dask_tsvd.py | 11 +- python/cuml/tests/dask/test_dask_umap.py | 18 +- .../tests/explainer/test_explainer_base.py | 16 +- .../tests/explainer/test_explainer_common.py | 36 +- .../explainer/test_explainer_kernel_shap.py | 29 +- .../test_explainer_permutation_shap.py | 17 +- .../cuml/tests/explainer/test_gpu_treeshap.py | 28 +- python/cuml/tests/explainer/test_sampling.py | 16 +- .../tests/stemmer_tests/test_len_utils.py | 6 +- .../test_porter_stemmer_rules.py | 6 +- .../cuml/tests/stemmer_tests/test_stemmer.py | 8 +- python/cuml/tests/stemmer_tests/test_steps.py | 3 +- .../tests/stemmer_tests/test_suffix_utils.py | 3 +- python/cuml/tests/test_adapters.py | 39 +- python/cuml/tests/test_agglomerative.py | 3 +- python/cuml/tests/test_allocator.py | 14 +- python/cuml/tests/test_api.py | 15 +- python/cuml/tests/test_arima.py | 27 +- python/cuml/tests/test_array_sparse.py | 22 +- python/cuml/tests/test_auto_arima.py | 8 +- python/cuml/tests/test_base.py | 3 +- python/cuml/tests/test_batched_lbfgs.py | 4 +- python/cuml/tests/test_benchmark.py | 20 +- python/cuml/tests/test_compose.py | 49 +- python/cuml/tests/test_coordinate_descent.py | 19 +- python/cuml/tests/test_cuml_descr_decor.py | 18 +- .../tests/test_dataset_generator_types.py | 15 +- python/cuml/tests/test_dbscan.py | 34 +- python/cuml/tests/test_device_selection.py | 140 +-- python/cuml/tests/test_doctest.py | 8 +- python/cuml/tests/test_fil.py | 32 +- python/cuml/tests/test_hdbscan.py | 34 +- python/cuml/tests/test_holtwinters.py | 10 +- python/cuml/tests/test_incremental_pca.py | 20 +- python/cuml/tests/test_input_utils.py | 26 +- python/cuml/tests/test_kernel_density.py | 13 +- python/cuml/tests/test_kernel_ridge.py | 29 +- python/cuml/tests/test_kmeans.py | 22 +- .../cuml/tests/test_kneighbors_classifier.py | 20 +- .../cuml/tests/test_kneighbors_regressor.py | 23 +- python/cuml/tests/test_label_binarizer.py | 6 +- python/cuml/tests/test_label_encoder.py | 65 +- python/cuml/tests/test_lars.py | 17 +- python/cuml/tests/test_linear_model.py | 72 +- python/cuml/tests/test_linear_svm.py | 47 +- python/cuml/tests/test_make_blobs.py | 3 +- python/cuml/tests/test_make_classification.py | 11 +- python/cuml/tests/test_mbsgd_classifier.py | 20 +- python/cuml/tests/test_mbsgd_regressor.py | 20 +- python/cuml/tests/test_meta_estimators.py | 23 +- python/cuml/tests/test_metrics.py | 117 +-- python/cuml/tests/test_module_config.py | 15 +- python/cuml/tests/test_multiclass.py | 9 +- python/cuml/tests/test_naive_bayes.py | 41 +- python/cuml/tests/test_nearest_neighbors.py | 36 +- python/cuml/tests/test_one_hot_encoder.py | 21 +- python/cuml/tests/test_pca.py | 24 +- python/cuml/tests/test_pickle.py | 21 +- python/cuml/tests/test_preprocessing.py | 10 +- python/cuml/tests/test_prims.py | 6 +- python/cuml/tests/test_qn.py | 35 +- python/cuml/tests/test_random_forest.py | 52 +- python/cuml/tests/test_random_projection.py | 22 +- python/cuml/tests/test_serialize.py | 11 +- python/cuml/tests/test_sgd.py | 14 +- python/cuml/tests/test_simpl_set.py | 25 +- python/cuml/tests/test_sparsefuncs.py | 10 +- python/cuml/tests/test_stationarity.py | 10 +- python/cuml/tests/test_stats.py | 10 +- python/cuml/tests/test_strategies.py | 16 +- python/cuml/tests/test_stratified_kfold.py | 8 +- python/cuml/tests/test_svm.py | 36 +- python/cuml/tests/test_target_encoder.py | 22 +- .../tests/test_text_feature_extraction.py | 25 +- python/cuml/tests/test_tfidf.py | 12 +- python/cuml/tests/test_thirdparty.py | 55 +- python/cuml/tests/test_train_test_split.py | 15 +- python/cuml/tests/test_trustworthiness.py | 6 +- python/cuml/tests/test_tsne.py | 31 +- python/cuml/tests/test_tsvd.py | 17 +- python/cuml/tests/test_umap.py | 49 +- python/cuml/tests/test_utils.py | 13 +- python/cuml/thirdparty_adapters/adapters.py | 32 +- .../thirdparty_adapters/sparsefuncs_fast.py | 8 +- python/cuml/tsa/arima.pyx | 3 +- python/cuml/tsa/auto_arima.pyx | 6 +- python/cuml/tsa/batched_lbfgs.py | 8 +- python/cuml/tsa/holtwinters.pyx | 9 +- python/cuml/tsa/seasonality.pyx | 3 +- python/cuml/tsa/stationarity.pyx | 3 +- 249 files changed, 2667 insertions(+), 2355 deletions(-) diff --git a/cpp/examples/kmeans/prepare_input.py b/cpp/examples/kmeans/prepare_input.py index 1a2b65972d..aac78813ba 100755 --- a/cpp/examples/kmeans/prepare_input.py +++ b/cpp/examples/kmeans/prepare_input.py @@ -18,10 +18,11 @@ # Based on # https://github.com/h2oai/h2o4gpu/blob/master/examples/py/demos/H2O4GPU_KMeans_Homesite.ipynb # as received on December 6th 2018 -import pandas as pd -import numpy as np -import sys import os +import sys +from cuml.internals.safe_imports import cpu_only_import +pd = cpu_only_import('pandas') +np = cpu_only_import('numpy') # read the data train_file = "train.csv" diff --git a/cpp/examples/symreg/prepare_input.py b/cpp/examples/symreg/prepare_input.py index 4e53e131e9..8c673b7f43 100644 --- a/cpp/examples/symreg/prepare_input.py +++ b/cpp/examples/symreg/prepare_input.py @@ -13,8 +13,9 @@ # limitations under the License. # -import numpy as np from sklearn.model_selection import train_test_split +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') rng = np.random.RandomState(seed=2021) @@ -26,21 +27,21 @@ X_test = rng.uniform(-1, 1, 100).reshape(50, 2) y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1 -print("Training set has n_rows=%d n_cols=%d" %(X_train.shape)) -print("Test set has n_rows=%d n_cols=%d" %(X_test.shape)) +print("Training set has n_rows=%d n_cols=%d" % (X_train.shape)) +print("Test set has n_rows=%d n_cols=%d" % (X_test.shape)) -train_data = "train_data.txt" -test_data = "test_data.txt" -train_labels = "train_labels.txt" -test_labels = "test_labels.txt" +train_data = "train_data.txt" +test_data = "test_data.txt" +train_labels = "train_labels.txt" +test_labels = "test_labels.txt" # Save all datasets in col-major format -np.savetxt(train_data, X_train.T,fmt='%.7f') -np.savetxt(test_data, X_test.T,fmt='%.7f') -np.savetxt(train_labels, y_train,fmt='%.7f') -np.savetxt(test_labels, y_test,fmt='%.7f') - -print("Wrote %d values to %s"%(X_train.size,train_data)) -print("Wrote %d values to %s"%(X_test.size,test_data)) -print("Wrote %d values to %s"%(y_train.size,train_labels)) -print("Wrote %d values to %s"%(y_test.size,test_labels)) +np.savetxt(train_data, X_train.T, fmt='%.7f') +np.savetxt(test_data, X_test.T, fmt='%.7f') +np.savetxt(train_labels, y_train, fmt='%.7f') +np.savetxt(test_labels, y_test, fmt='%.7f') + +print("Wrote %d values to %s" % (X_train.size, train_data)) +print("Wrote %d values to %s" % (X_test.size, test_data)) +print("Wrote %d values to %s" % (y_train.size, train_labels)) +print("Wrote %d values to %s" % (y_test.size, test_labels)) diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py b/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py index 573e3cad35..ded00ed619 100644 --- a/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py +++ b/python/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py @@ -9,6 +9,15 @@ # Authors mentioned above do not endorse or promote this production. +from ..preprocessing import FunctionTransformer +from ....thirdparty_adapters import check_array +from ..utils.validation import check_is_fitted +from ..utils.skl_dependencies import TransformerMixin, BaseComposition, \ + BaseEstimator +from cuml.internals import _deprecate_pos_args +from cuml.internals.array_sparse import SparseCumlArray +from cuml.internals.global_settings import _global_settings_data +import cuml from itertools import chain from itertools import compress from joblib import Parallel @@ -24,25 +33,18 @@ from collections import defaultdict import warnings -from scipy import sparse as sp_sparse -from cupyx.scipy import sparse as cu_sparse -import numpy as cpu_np -import cupy as np -import numba +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import -import pandas as pd -import cudf - -import cuml -from cuml.internals.global_settings import _global_settings_data -from cuml.internals.array_sparse import SparseCumlArray -from cuml.internals.import_utils import has_sklearn -from cuml.internals import _deprecate_pos_args -from ..utils.skl_dependencies import TransformerMixin, BaseComposition, \ - BaseEstimator -from ..utils.validation import check_is_fitted -from ....thirdparty_adapters import check_array -from ..preprocessing import FunctionTransformer +cpu_np = cpu_only_import('numpy') +cu_sparse = gpu_only_import_from('cupyx.scipy', 'sparse') +np = gpu_only_import('cupy') +numba = gpu_only_import('numba') +pd = cpu_only_import('pandas') +sp_sparse = cpu_only_import_from('scipy', 'sparse') +cudf = gpu_only_import('cudf') _ERR_MSG_1DCOLUMN = ("1D data passed to a transformer that expects 2D data. " @@ -361,6 +363,7 @@ def delayed_function(*args, **kwargs): class _FuncWrapper: """"Load the global configuration before calling the function.""" + def __init__(self, function): self.function = function self.config = _global_settings_data.shared_state @@ -825,7 +828,7 @@ def _fit_transform(self, X, y, func, fitted=False): message_clsname='ColumnTransformer', message=self._log_message(name, idx, len(transformers))) for idx, (name, trans, column, weight) in enumerate( - self._iter(fitted=fitted, replace_strings=True), 1)) + self._iter(fitted=fitted, replace_strings=True), 1)) except ValueError as e: if "Expected 2D array, got 1D array instead" in str(e): raise ValueError(_ERR_MSG_1DCOLUMN) from e @@ -1161,6 +1164,7 @@ class make_column_selector: [-0.30151134, 0. , 1. , 0. ], [ 0.90453403, 0. , 0. , 1. ]]) """ + def __init__(self, pattern=None, *, dtype_include=None, dtype_exclude=None): self.pattern = pattern diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py index 606332df82..ac4dd23b4e 100644 --- a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py +++ b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py @@ -15,42 +15,42 @@ # Authors mentioned above do not endorse or promote this production. +from ....internals.memory_utils import using_output_type +from ....internals import _deprecate_pos_args +from ....internals import api_return_generic +from ....common.array_descriptor import CumlArrayDescriptor +from ....internals.array_sparse import SparseCumlArray +from ....internals.array import CumlArray +from ....thirdparty_adapters.sparsefuncs_fast import \ + (inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2, + csr_polynomial_expansion) +from ..utils.sparsefuncs import (inplace_column_scale, + min_max_axis, + mean_variance_axis) +from ..utils.validation import (check_is_fitted, FLOAT_DTYPES, + check_random_state) +from ..utils.extmath import _incremental_mean_and_var +from ..utils.extmath import row_norms +from ....thirdparty_adapters import check_array +from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \ + StatelessTagMixin +from ..utils.skl_dependencies import BaseEstimator, TransformerMixin +from scipy.special import boxcox +from scipy import optimize +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import from itertools import chain, combinations import numbers import warnings from itertools import combinations_with_replacement as combinations_w_r -import numpy as cpu_np -import cupy as np -from cupyx.scipy import sparse -from scipy import stats -from scipy import optimize -from scipy.special import boxcox - -from ..utils.skl_dependencies import BaseEstimator, TransformerMixin -from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \ - StatelessTagMixin -from ....thirdparty_adapters import check_array -from ..utils.extmath import row_norms -from ..utils.extmath import _incremental_mean_and_var -from ..utils.validation import (check_is_fitted, FLOAT_DTYPES, - check_random_state) - -from ..utils.sparsefuncs import (inplace_column_scale, - min_max_axis, - mean_variance_axis) +from cuml.internals.safe_imports import cpu_only_import +cpu_np = cpu_only_import('numpy') +np = gpu_only_import('cupy') +sparse = gpu_only_import_from('cupyx.scipy', 'sparse') +stats = cpu_only_import_from('scipy', 'stats') -from ....thirdparty_adapters.sparsefuncs_fast import \ - (inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2, - csr_polynomial_expansion) - -from ....internals.array import CumlArray -from ....internals.array_sparse import SparseCumlArray -from ....common.array_descriptor import CumlArrayDescriptor -from ....internals import api_return_generic -from ....internals import _deprecate_pos_args - -from ....internals.memory_utils import using_output_type BOUNDS_THRESHOLD = 1e-7 @@ -723,7 +723,7 @@ def partial_fit(self, X, y=None) -> "StandardScaler": if not hasattr(self, 'n_samples_seen_'): self.n_samples_seen_ = ( - X.shape[0] - counts_nan).astype(np.int64, copy=False) + X.shape[0] - counts_nan).astype(np.int64, copy=False) if self.with_std: # First pass diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py b/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py index 6e99b9a740..ed85c5262f 100644 --- a/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py +++ b/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py @@ -11,23 +11,23 @@ # Authors mentioned above do not endorse or promote this production. -import numbers -import cupy as np -import numpy as cpu_np -import warnings - -from cuml.preprocessing import OneHotEncoder -from cuml.cluster import KMeans - -from ..utils.skl_dependencies import BaseEstimator, TransformerMixin -from cuml.internals.mixins import SparseInputTagMixin -from ..utils.validation import check_is_fitted -from ..utils.validation import FLOAT_DTYPES -from ....thirdparty_adapters import check_array -from ....internals.array_sparse import SparseCumlArray -from ....common.array_descriptor import CumlArrayDescriptor -from ....internals.memory_utils import using_output_type from ....internals import _deprecate_pos_args +from ....internals.memory_utils import using_output_type +from ....common.array_descriptor import CumlArrayDescriptor +from ....internals.array_sparse import SparseCumlArray +from ....thirdparty_adapters import check_array +from ..utils.validation import FLOAT_DTYPES +from ..utils.validation import check_is_fitted +from cuml.internals.mixins import SparseInputTagMixin +from ..utils.skl_dependencies import BaseEstimator, TransformerMixin +from cuml.cluster import KMeans +from cuml.preprocessing import OneHotEncoder +import warnings +from cuml.internals.safe_imports import cpu_only_import +import numbers +from cuml.internals.safe_imports import gpu_only_import +np = gpu_only_import('cupy') +cpu_np = cpu_only_import('numpy') def digitize(x, bins): diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py index bbba20d96f..3ab9f0ddce 100644 --- a/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py +++ b/python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py @@ -10,26 +10,28 @@ # Authors mentioned above do not endorse or promote this production. -import numbers -import warnings - -import numpy -import cupy as np -import cuml -from cupyx.scipy import sparse - +from ....internals import _deprecate_pos_args +from ....common.array_descriptor import CumlArrayDescriptor +from ....internals.array_sparse import SparseCumlArray +from ..utils.validation import FLOAT_DTYPES +from ..utils.validation import check_is_fitted +from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \ + StringInputTagMixin +from ..utils.skl_dependencies import BaseEstimator, TransformerMixin from ....thirdparty_adapters import (_get_mask, _masked_column_median, _masked_column_mean, _masked_column_mode) -from ..utils.skl_dependencies import BaseEstimator, TransformerMixin -from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \ - StringInputTagMixin -from ..utils.validation import check_is_fitted -from ..utils.validation import FLOAT_DTYPES -from ....internals.array_sparse import SparseCumlArray -from ....common.array_descriptor import CumlArrayDescriptor -from ....internals import _deprecate_pos_args +from cuml.internals.safe_imports import gpu_only_import_from +import cuml +from cuml.internals.safe_imports import gpu_only_import +import numbers +import warnings + +from cuml.internals.safe_imports import cpu_only_import +numpy = cpu_only_import('numpy') +np = gpu_only_import('cupy') +sparse = gpu_only_import_from('cupyx.scipy', 'sparse') def is_scalar_nan(x): @@ -136,7 +138,7 @@ def _concatenate_indicator(self, X_imputed, X_indicator): "Data from the missing indicator are not provided. Call " "_fit_indicator and _transform_indicator in the imputer " "implementation." - ) + ) return hstack((X_imputed, X_indicator)) diff --git a/python/cuml/_thirdparty/sklearn/utils/extmath.py b/python/cuml/_thirdparty/sklearn/utils/extmath.py index 7203eca6e6..bf1aa761fe 100644 --- a/python/cuml/_thirdparty/sklearn/utils/extmath.py +++ b/python/cuml/_thirdparty/sklearn/utils/extmath.py @@ -16,9 +16,11 @@ # Authors mentioned above do not endorse or promote this production. -import cupy as np -import cupyx -from cupyx.scipy import sparse +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +np = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +sparse = gpu_only_import_from('cupyx.scipy', 'sparse') def row_norms(X, squared=False): diff --git a/python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py index bf7d81aaa3..e0697b98ce 100644 --- a/python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py +++ b/python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py @@ -12,14 +12,17 @@ # Authors mentioned above do not endorse or promote this production. -from scipy import sparse as cpu_sp -from cupyx.scipy import sparse as gpu_sp -import cupy as np -import numpy as cpu_np - from ....thirdparty_adapters.sparsefuncs_fast import ( csr_mean_variance_axis0 as _csr_mean_var_axis0, csc_mean_variance_axis0 as _csc_mean_var_axis0) +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import_from +cpu_sp = cpu_only_import_from('scipy', 'sparse') +gpu_sp = gpu_only_import_from('cupyx.scipy', 'sparse') +np = gpu_only_import('cupy') +cpu_np = cpu_only_import('numpy') def iscsr(X): diff --git a/python/cuml/_thirdparty/sklearn/utils/validation.py b/python/cuml/_thirdparty/sklearn/utils/validation.py index 88b1cba485..ddb84319fa 100644 --- a/python/cuml/_thirdparty/sklearn/utils/validation.py +++ b/python/cuml/_thirdparty/sklearn/utils/validation.py @@ -15,14 +15,15 @@ # Authors mentioned above do not endorse or promote this production. -import numbers -import numpy as np -import cupy as cp -import cupyx.scipy.sparse as sp -from inspect import isclass - -from ....common.exceptions import NotFittedError from ....thirdparty_adapters import check_array +from ....common.exceptions import NotFittedError +from inspect import isclass +from cuml.internals.safe_imports import gpu_only_import +import numbers +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +sp = gpu_only_import('cupyx.scipy.sparse') FLOAT_DTYPES = (np.float64, np.float32, np.float16) diff --git a/python/cuml/benchmark/algorithms.py b/python/cuml/benchmark/algorithms.py index 48411c5793..a733975804 100644 --- a/python/cuml/benchmark/algorithms.py +++ b/python/cuml/benchmark/algorithms.py @@ -13,6 +13,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import treelite_runtime +import treelite +from cuml.benchmark.bench_helper_funcs import ( + fit, + transform, + predict, + fit_transform, + fit_predict, + fit_kneighbors, + _build_cpu_skl_classifier, + _build_fil_skl_classifier, + _build_fil_classifier, + _build_treelite_classifier, + _treelite_fil_accuracy_score, + _build_mnmg_umap +) +from cuml.preprocessing import StandardScaler, MinMaxScaler, \ + MaxAbsScaler, Normalizer, \ + SimpleImputer, RobustScaler, \ + PolynomialFeatures +import tempfile import cuml import sklearn @@ -29,30 +50,9 @@ from cuml.dask import neighbors, cluster, manifold, \ decomposition, linear_model # noqa: F401 from cuml.internals.import_utils import has_umap -import numpy as np -import tempfile +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from cuml.preprocessing import StandardScaler, MinMaxScaler, \ - MaxAbsScaler, Normalizer, \ - SimpleImputer, RobustScaler, \ - PolynomialFeatures - -from cuml.benchmark.bench_helper_funcs import ( - fit, - transform, - predict, - fit_transform, - fit_predict, - fit_kneighbors, - _build_cpu_skl_classifier, - _build_fil_skl_classifier, - _build_fil_classifier, - _build_treelite_classifier, - _treelite_fil_accuracy_score, - _build_mnmg_umap -) -import treelite -import treelite_runtime if has_umap(): import umap diff --git a/python/cuml/benchmark/automated/utils/utils.py b/python/cuml/benchmark/automated/utils/utils.py index 340f5f60bb..53ec71f5e2 100644 --- a/python/cuml/benchmark/automated/utils/utils.py +++ b/python/cuml/benchmark/automated/utils/utils.py @@ -34,9 +34,7 @@ def setFixtureParamNames(*args, **kwargs): import math import itertools as it import warnings -import numpy as np -import cupy as cp -import cudf +from cuml.internals.safe_imports import cpu_only_import, gpu_only_import import pytest from cuml.benchmark import datagen, algorithms @@ -50,6 +48,9 @@ def setFixtureParamNames(*args, **kwargs): transform, kneighbors, \ fit_predict, fit_transform, \ fit_kneighbors +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cudf = gpu_only_import('cudf') def distribute(client, data): diff --git a/python/cuml/benchmark/bench_helper_funcs.py b/python/cuml/benchmark/bench_helper_funcs.py index 03342cb51f..10629c9100 100644 --- a/python/cuml/benchmark/bench_helper_funcs.py +++ b/python/cuml/benchmark/bench_helper_funcs.py @@ -13,17 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.manifold import UMAP +from cuml.benchmark import datagen +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +import sklearn.ensemble as skl_ensemble +import pickle as pickle import os import cuml from cuml.internals import input_utils -import numpy as np -import pandas as pd -import pickle as pickle -import sklearn.ensemble as skl_ensemble -import cudf -from numba import cuda -from cuml.benchmark import datagen -from cuml.manifold import UMAP +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cudf = gpu_only_import('cudf') +cuda = gpu_only_import_from('numba', 'cuda') def call(m, func_name, X, y=None): diff --git a/python/cuml/benchmark/ci_benchmark.py b/python/cuml/benchmark/ci_benchmark.py index 666fb39a7c..1b66171617 100644 --- a/python/cuml/benchmark/ci_benchmark.py +++ b/python/cuml/benchmark/ci_benchmark.py @@ -20,8 +20,9 @@ """ from cuml.benchmark.runners import run_variations from cuml.benchmark import algorithms -import numpy as np -import pandas as pd +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') def log_range(start, end, n): diff --git a/python/cuml/benchmark/datagen.py b/python/cuml/benchmark/datagen.py index f1b6664e47..bf3818e7cf 100644 --- a/python/cuml/benchmark/datagen.py +++ b/python/cuml/benchmark/datagen.py @@ -34,22 +34,24 @@ """ -import cudf -import gzip -import functools -import os -import numpy as np -import cupy as cp -import pandas as pd - -import cuml.datasets +from cuml.internals.import_utils import has_scipy +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals import input_utils +from urllib.request import urlretrieve import sklearn.model_selection +import cuml.datasets +from cuml.internals.safe_imports import cpu_only_import +import os +import functools +import gzip +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +pd = cpu_only_import('pandas') -from urllib.request import urlretrieve -from cuml.internals import input_utils -from numba import cuda -from cuml.internals.import_utils import has_scipy +cuda = gpu_only_import_from('numba', 'cuda') def _gen_data_regression(n_samples, n_features, random_state=42, diff --git a/python/cuml/benchmark/run_benchmarks.py b/python/cuml/benchmark/run_benchmarks.py index e93afe80ab..118b770102 100644 --- a/python/cuml/benchmark/run_benchmarks.py +++ b/python/cuml/benchmark/run_benchmarks.py @@ -15,9 +15,10 @@ # """Command-line ML benchmark runner""" -from cuml.benchmark import algorithms, datagen, runners -import numpy as np import json +from cuml.benchmark import algorithms, datagen, runners +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') PrecisionMap = { diff --git a/python/cuml/benchmark/runners.py b/python/cuml/benchmark/runners.py index 46b6dbcc36..68278ffbb2 100644 --- a/python/cuml/benchmark/runners.py +++ b/python/cuml/benchmark/runners.py @@ -15,14 +15,16 @@ # """Wrappers to run ML benchmarks""" +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.benchmark import datagen +import warnings import time import itertools -import numpy as np -import pandas as pd -import warnings +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') -from cuml.benchmark import datagen -from cudf import Series +Series = gpu_only_import_from('cudf', 'Series') class BenchmarkTimer: diff --git a/python/cuml/cluster/agglomerative.pyx b/python/cuml/cluster/agglomerative.pyx index 575273c9da..84cc579201 100644 --- a/python/cuml/cluster/agglomerative.pyx +++ b/python/cuml/cluster/agglomerative.pyx @@ -18,7 +18,8 @@ from libc.stdint cimport uintptr_t -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from cuml.internals.array import CumlArray from cuml.internals.base import Base diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx index f86e726fb5..0871f5b1b0 100644 --- a/python/cuml/cluster/dbscan.pyx +++ b/python/cuml/cluster/dbscan.pyx @@ -17,8 +17,10 @@ # distutils: language = c++ import ctypes -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from libcpp cimport bool from libc.stdint cimport uintptr_t, int64_t diff --git a/python/cuml/cluster/hdbscan/hdbscan.pyx b/python/cuml/cluster/hdbscan/hdbscan.pyx index 496fe114aa..228a5ffb14 100644 --- a/python/cuml/cluster/hdbscan/hdbscan.pyx +++ b/python/cuml/cluster/hdbscan/hdbscan.pyx @@ -21,8 +21,10 @@ from libc.stdlib cimport free from cython.operator cimport dereference as deref -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from warnings import warn from cuml.internals.array import CumlArray diff --git a/python/cuml/cluster/hdbscan/prediction.pyx b/python/cuml/cluster/hdbscan/prediction.pyx index 9521edc239..d5e829e2ad 100644 --- a/python/cuml/cluster/hdbscan/prediction.pyx +++ b/python/cuml/cluster/hdbscan/prediction.pyx @@ -19,8 +19,10 @@ from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from cuml.internals.array import CumlArray from cuml.internals.base import Base diff --git a/python/cuml/cluster/kmeans.pyx b/python/cuml/cluster/kmeans.pyx index d605dcfa71..b92f9e6ec0 100644 --- a/python/cuml/cluster/kmeans.pyx +++ b/python/cuml/cluster/kmeans.pyx @@ -17,8 +17,10 @@ # distutils: language = c++ import ctypes -import numpy as np -import rmm +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') import warnings import typing diff --git a/python/cuml/cluster/kmeans_mg.pyx b/python/cuml/cluster/kmeans_mg.pyx index 931a454e30..7a5787d93a 100644 --- a/python/cuml/cluster/kmeans_mg.pyx +++ b/python/cuml/cluster/kmeans_mg.pyx @@ -17,10 +17,12 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import warnings -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from cython.operator cimport dereference as deref from libcpp cimport bool diff --git a/python/cuml/common/kernel_utils.py b/python/cuml/common/kernel_utils.py index fce5e25733..606bf0fbd6 100644 --- a/python/cuml/common/kernel_utils.py +++ b/python/cuml/common/kernel_utils.py @@ -14,14 +14,14 @@ # limitations under the License. # -import cupy as cp -import numpy as np - -import functools - +import cuml.internals.logger as logger from uuid import uuid1 +import functools +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -import cuml.internals.logger as logger # Mapping of common PyData dtypes to their corresponding C-primitive dtype_str_map = {cp.dtype("float32"): "float", diff --git a/python/cuml/common/numba_utils.py b/python/cuml/common/numba_utils.py index f2869ebc70..5177b1c418 100644 --- a/python/cuml/common/numba_utils.py +++ b/python/cuml/common/numba_utils.py @@ -17,8 +17,9 @@ # DEPRECATED: to be removed once full migration to CumlArray is done # remaining usages: blobs.pyx, regression.pyx -from numba import cuda from numba.cuda.cudadrv.driver import driver +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') @cuda.jit diff --git a/python/cuml/common/opg_data_utils_mg.pyx b/python/cuml/common/opg_data_utils_mg.pyx index 28c5b2c4b2..0a600a3dc2 100644 --- a/python/cuml/common/opg_data_utils_mg.pyx +++ b/python/cuml/common/opg_data_utils_mg.pyx @@ -14,7 +14,8 @@ # limitations under the License. # -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from cuml.common.opg_data_utils_mg cimport * from libc.stdlib cimport malloc, free diff --git a/python/cuml/common/sparse_utils.py b/python/cuml/common/sparse_utils.py index 0055267b1d..52b05e2448 100644 --- a/python/cuml/common/sparse_utils.py +++ b/python/cuml/common/sparse_utils.py @@ -15,7 +15,8 @@ # from cuml.internals.import_utils import has_scipy -import cupyx +from cuml.internals.safe_imports import gpu_only_import +cupyx = gpu_only_import('cupyx') if has_scipy(): import scipy.sparse diff --git a/python/cuml/common/sparsefuncs.py b/python/cuml/common/sparsefuncs.py index 6fb11854b2..dd0e7bbaba 100644 --- a/python/cuml/common/sparsefuncs.py +++ b/python/cuml/common/sparsefuncs.py @@ -13,17 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import math -import numpy as np -import cupy as cp -import cupyx -from cuml.internals.input_utils import input_to_cuml_array -from cuml.internals.memory_utils import with_cupy_rmm -from cuml.internals.import_utils import has_scipy -import cuml.internals +from cuml.internals.safe_imports import gpu_only_import_from from cuml.common.kernel_utils import cuda_kernel_factory -from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix,\ - coo_matrix as cp_coo_matrix, csc_matrix as cp_csc_matrix +import cuml.internals +from cuml.internals.import_utils import has_scipy +from cuml.internals.memory_utils import with_cupy_rmm +from cuml.internals.input_utils import input_to_cuml_array +from cuml.internals.safe_imports import gpu_only_import +import math +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +cp_csr_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csr_matrix') +cp_coo_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'coo_matrix') +cp_csc_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csc_matrix') def _map_l1_norm_kernel(dtype): diff --git a/python/cuml/dask/cluster/dbscan.py b/python/cuml/dask/cluster/dbscan.py index d23b1c04a9..c7e6a0c2d2 100644 --- a/python/cuml/dask/cluster/dbscan.py +++ b/python/cuml/dask/cluster/dbscan.py @@ -13,19 +13,16 @@ # limitations under the License. # -import numpy as np - -from cuml.dask.common.base import BaseEstimator -from cuml.dask.common.base import DelayedPredictionMixin -from cuml.dask.common.base import DelayedTransformMixin -from cuml.dask.common.base import mnmg_import - -from raft_dask.common.comms import Comms -from raft_dask.common.comms import get_raft_comm_state - -from cuml.dask.common.utils import wait_and_raise_from_futures - from cuml.internals.memory_utils import with_cupy_rmm +from cuml.dask.common.utils import wait_and_raise_from_futures +from raft_dask.common.comms import get_raft_comm_state +from raft_dask.common.comms import Comms +from cuml.dask.common.base import mnmg_import +from cuml.dask.common.base import DelayedTransformMixin +from cuml.dask.common.base import DelayedPredictionMixin +from cuml.dask.common.base import BaseEstimator +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') class DBSCAN(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin): diff --git a/python/cuml/dask/cluster/kmeans.py b/python/cuml/dask/cluster/kmeans.py index 4fda844c6a..4242b5d203 100644 --- a/python/cuml/dask/cluster/kmeans.py +++ b/python/cuml/dask/cluster/kmeans.py @@ -13,22 +13,18 @@ # limitations under the License. # -import cupy as cp - -from cuml.dask.common.base import BaseEstimator -from cuml.dask.common.base import DelayedPredictionMixin -from cuml.dask.common.base import DelayedTransformMixin -from cuml.dask.common.base import mnmg_import - -from cuml.dask.common.input_utils import concatenate -from cuml.dask.common.input_utils import DistributedDataHandler - -from raft_dask.common.comms import Comms -from raft_dask.common.comms import get_raft_comm_state - -from cuml.dask.common.utils import wait_and_raise_from_futures - from cuml.internals.memory_utils import with_cupy_rmm +from cuml.dask.common.utils import wait_and_raise_from_futures +from raft_dask.common.comms import get_raft_comm_state +from raft_dask.common.comms import Comms +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.dask.common.input_utils import concatenate +from cuml.dask.common.base import mnmg_import +from cuml.dask.common.base import DelayedTransformMixin +from cuml.dask.common.base import DelayedPredictionMixin +from cuml.dask.common.base import BaseEstimator +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') class KMeans(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin): diff --git a/python/cuml/dask/common/base.py b/python/cuml/dask/common/base.py index 60a5cf7da3..d47fa5d867 100644 --- a/python/cuml/dask/common/base.py +++ b/python/cuml/dask/common/base.py @@ -14,28 +14,29 @@ # limitations under the License. # -import cudf.comm.serialize # noqa: F401 -import cupy as cp -import dask -import numpy as np -from toolz import first -from collections.abc import Iterable - -from cuml.dask.common.utils import get_client - -from cuml.internals.array import CumlArray -from cuml.dask.common.utils import wait_and_raise_from_futures -from raft_dask.common.comms import Comms -from cuml.dask.common.input_utils import DistributedDataHandler -from cuml.dask.common import parts_to_ranks -from cuml.internals import BaseMetaClass +from distributed.client import Future +from functools import wraps +from dask_cudf.core import Series as dcSeries +from cuml.internals.safe_imports import gpu_only_import_from from cuml.internals.base import Base +from cuml.internals import BaseMetaClass +from cuml.dask.common import parts_to_ranks +from cuml.dask.common.input_utils import DistributedDataHandler +from raft_dask.common.comms import Comms +from cuml.dask.common.utils import wait_and_raise_from_futures +from cuml.internals.array import CumlArray +from cuml.dask.common.utils import get_client +from collections.abc import Iterable +from toolz import first +from cuml.internals.safe_imports import cpu_only_import +import dask +import cudf.comm.serialize # noqa: F401 +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from dask_cudf.core import DataFrame as dcDataFrame -from dask_cudf.core import Series as dcSeries -from functools import wraps -from distributed.client import Future +dcDataFrame = gpu_only_import_from('dask_cudf.core', 'DataFrame') class BaseEstimator(object, metaclass=BaseMetaClass): @@ -80,7 +81,6 @@ def get_combined_model(self): return internal_model def _set_internal_model(self, model): - """ Assigns model (a Future or list of futures containins a single-GPU model) to be an internal model. diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index 0e55d83452..9726a11c4b 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -14,21 +14,19 @@ # -import numpy as np -import cupy as cp -import cupyx -import cudf -import dask -import dask.dataframe as dd - -from cuml.internals.memory_utils import with_cupy_rmm - -from cuml.dask.common.dask_df_utils import to_dask_cudf as df_to_dask_cudf -from dask.distributed import default_client - -from cuml.dask.common.part_utils import _extract_partitions - from cuml.common import rmm_cupy_ary, has_scipy +from cuml.dask.common.part_utils import _extract_partitions +from dask.distributed import default_client +from cuml.dask.common.dask_df_utils import to_dask_cudf as df_to_dask_cudf +from cuml.internals.memory_utils import with_cupy_rmm +import dask.dataframe as dd +import dask +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +cudf = gpu_only_import('cudf') def validate_dask_array(darray, client=None): diff --git a/python/cuml/dask/common/input_utils.py b/python/cuml/dask/common/input_utils.py index 74f07a255d..cef064ce98 100644 --- a/python/cuml/dask/common/input_utils.py +++ b/python/cuml/dask/common/input_utils.py @@ -15,36 +15,34 @@ # -import cudf -import cuml.internals.logger as logger -import cupy as cp -import numpy as np -import dask.array as da - -from collections.abc import Sequence - -from cuml.internals.memory_utils import with_cupy_rmm - -from collections import OrderedDict -from cudf import DataFrame -from cudf import Series -from dask.dataframe import DataFrame as daskDataFrame -from dask.dataframe import Series as daskSeries -from dask_cudf.core import DataFrame as dcDataFrame -from dask_cudf.core import Series as dcSeries - -from cuml.dask.common.utils import get_client -from cuml.dask.common.dask_df_utils import to_dask_cudf -from cuml.dask.common.dask_arr_utils import validate_dask_array -from cuml.dask.common.part_utils import _extract_partitions - -from dask.distributed import wait -from dask.distributed import default_client +import dask.dataframe as dd +from functools import reduce from toolz import first +from dask.distributed import default_client +from dask.distributed import wait +from cuml.dask.common.part_utils import _extract_partitions +from cuml.dask.common.dask_arr_utils import validate_dask_array +from cuml.dask.common.dask_df_utils import to_dask_cudf +from cuml.dask.common.utils import get_client +from dask_cudf.core import Series as dcSeries +from dask.dataframe import Series as daskSeries +from dask.dataframe import DataFrame as daskDataFrame +from cudf import Series +from cuml.internals.safe_imports import gpu_only_import_from +from collections import OrderedDict +from cuml.internals.memory_utils import with_cupy_rmm +from collections.abc import Sequence +import dask.array as da +from cuml.internals.safe_imports import cpu_only_import +import cuml.internals.logger as logger +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from functools import reduce -import dask.dataframe as dd +DataFrame = gpu_only_import_from('cudf', 'DataFrame') +dcDataFrame = gpu_only_import_from('dask_cudf.core', 'DataFrame') class DistributedDataHandler: @@ -148,7 +146,6 @@ def calculate_parts_to_sizes(self, comms=None, ranks=None): def _get_datatype_from_inputs(data): - """ Gets the datatype from a distributed data input. diff --git a/python/cuml/dask/common/part_utils.py b/python/cuml/dask/common/part_utils.py index 6a0fda7216..24b06535f0 100644 --- a/python/cuml/dask/common/part_utils.py +++ b/python/cuml/dask/common/part_utils.py @@ -13,22 +13,23 @@ # limitations under the License. # -import numpy as np -from collections import OrderedDict - -from functools import reduce -from tornado import gen -from collections.abc import Sequence -from dask.distributed import futures_of, default_client, wait +from cuml.dask.common.utils import parse_host_port +from dask_cudf.core import Series as dcSeries +from cuml.internals.safe_imports import gpu_only_import_from +from dask.dataframe import Series as daskSeries +from dask.dataframe import DataFrame as daskDataFrame +from dask.array.core import Array as daskArray from toolz import first +from dask.distributed import futures_of, default_client, wait +from collections.abc import Sequence +from tornado import gen +from functools import reduce +from collections import OrderedDict +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from dask.array.core import Array as daskArray -from dask.dataframe import DataFrame as daskDataFrame -from dask.dataframe import Series as daskSeries -from dask_cudf.core import DataFrame as dcDataFrame -from dask_cudf.core import Series as dcSeries -from cuml.dask.common.utils import parse_host_port +dcDataFrame = gpu_only_import_from('dask_cudf.core', 'DataFrame') def hosts_to_parts(futures): diff --git a/python/cuml/dask/common/utils.py b/python/cuml/dask/common/utils.py index c6f13c7ae8..46f77bfdb7 100644 --- a/python/cuml/dask/common/utils.py +++ b/python/cuml/dask/common/utils.py @@ -13,21 +13,18 @@ # limitations under the License. # +from threading import Lock +from asyncio import InvalidStateError +from cuml.internals.import_utils import check_min_dask_version +from cuml.common import device_of_gpu_matrix +from dask.distributed import default_client, wait +import time +import random import dask import logging import os -import numba.cuda -import random -import time - -from dask.distributed import default_client, wait - -from cuml.common import device_of_gpu_matrix -from cuml.internals.import_utils import check_min_dask_version - -from asyncio import InvalidStateError - -from threading import Lock +from cuml.internals.safe_imports import gpu_only_import +numba_cuda = gpu_only_import('numba.cuda') def get_visible_devices(): @@ -73,14 +70,14 @@ def select_device(dev, close=True): :param dev: int device to select :param close: bool close the cuda context and create new one? """ - if numba.cuda.get_current_device().id != dev: + if numba_cuda.get_current_device().id != dev: logging.warning("Selecting device " + str(dev)) if close: - numba.cuda.close() - numba.cuda.select_device(dev) - if dev != numba.cuda.get_current_device().id: + numba_cuda.close() + numba_cuda.select_device(dev) + if dev != numba_cuda.get_current_device().id: logging.warning("Current device " + - str(numba.cuda.get_current_device()) + + str(numba_cuda.get_current_device()) + " does not match expected " + str(dev)) @@ -150,7 +147,7 @@ def raise_exception_from_futures(futures): if errs: raise RuntimeError("%d of %d worker jobs failed: %s" % ( len(errs), len(futures), ", ".join(map(str, errs)) - )) + )) def wait_and_raise_from_futures(futures): diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index 28977f0842..719f7b2a66 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -13,9 +13,10 @@ # limitations under the License. # +from cuml.internals.safe_imports import cpu_only_import from cuml.datasets.classification import _generate_hypercube from cuml.datasets.classification import make_classification \ - as sg_make_classification + as sg_make_classification from cuml.datasets.utils import _create_rs_generator from cuml.dask.datasets.utils import _get_X from cuml.dask.datasets.utils import _get_labels @@ -25,8 +26,9 @@ import dask.array as da -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def _create_covariance(dims, seed, dtype='float32'): diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 3259df69fa..9fdb11d68a 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -14,16 +14,17 @@ # limitations under the License. # -import dask.array as da -import numpy as np -import cupy as cp - -from cuml.dask.datasets.utils import _get_X -from cuml.dask.datasets.utils import _get_labels -from cuml.dask.datasets.utils import _create_delayed -from cuml.common import with_cupy_rmm -from cuml.dask.common.input_utils import DistributedDataHandler from cuml.dask.common.utils import get_client +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.common import with_cupy_rmm +from cuml.dask.datasets.utils import _create_delayed +from cuml.dask.datasets.utils import _get_labels +from cuml.dask.datasets.utils import _get_X +from cuml.internals.safe_imports import gpu_only_import +import dask.array as da +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') def _create_rs_generator(random_state): diff --git a/python/cuml/dask/datasets/utils.py b/python/cuml/dask/datasets/utils.py index b2377bb4e9..cbf9c217f1 100644 --- a/python/cuml/dask/datasets/utils.py +++ b/python/cuml/dask/datasets/utils.py @@ -16,7 +16,8 @@ import dask.array as da import dask.delayed -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def _get_X(t): diff --git a/python/cuml/dask/ensemble/base.py b/python/cuml/dask/ensemble/base.py index 761d2e6204..970869dc5d 100644 --- a/python/cuml/dask/ensemble/base.py +++ b/python/cuml/dask/ensemble/base.py @@ -1,402 +1,402 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import dask -import json -import math -import numpy as np -import cupy as cp -import warnings - -from cuml import using_output_type -from collections.abc import Iterable -from dask.distributed import Future - -from cuml.dask.common.input_utils import DistributedDataHandler, \ - concatenate -from cuml.dask.common.utils import get_client, wait_and_raise_from_futures -from cuml.fil.fil import TreeliteModel - - -class BaseRandomForestModel(object): - """ - BaseRandomForestModel defines functions used in both Random Forest - Classifier and Regressor for Multi Node and Multi GPU models. The common - functions are defined here and called from the main Random Forest Multi - Node Multi GPU APIs. The functions defined here are not meant to be used - as a part of the public API. - """ - - def _create_model(self, model_func, - client, - workers, - n_estimators, - base_seed, - ignore_empty_partitions, - **kwargs): - - self.client = get_client(client) - if workers is None: - # Default to all workers - workers = list(self.client.scheduler_info()['workers'].keys()) - self.workers = workers - self._set_internal_model(None) - self.active_workers = list() - self.ignore_empty_partitions = ignore_empty_partitions - self.n_estimators = n_estimators - - self.n_estimators_per_worker = \ - self._estimators_per_worker(n_estimators) - if base_seed is None: - base_seed = 0 - seeds = [base_seed] - for i in range(1, len(self.n_estimators_per_worker)): - sd = self.n_estimators_per_worker[i-1] + seeds[i-1] - seeds.append(sd) - - self.rfs = { - worker: self.client.submit( - model_func, - n_estimators=self.n_estimators_per_worker[n], - random_state=seeds[n], - **kwargs, - pure=False, - workers=[worker], - ) - for n, worker in enumerate(self.workers) - } - - wait_and_raise_from_futures(list(self.rfs.values())) - - def _estimators_per_worker(self, n_estimators): - n_workers = len(self.workers) - if n_estimators < n_workers: - raise ValueError( - "n_estimators cannot be lower than number of dask workers." - ) - - n_est_per_worker = math.floor(n_estimators / n_workers) - n_estimators_per_worker = \ - [n_est_per_worker for i in range(n_workers)] - remaining_est = n_estimators - (n_est_per_worker * n_workers) - for i in range(remaining_est): - n_estimators_per_worker[i] = ( - n_estimators_per_worker[i] + 1 - ) - return n_estimators_per_worker - - def _fit(self, model, dataset, convert_dtype, broadcast_data): - data = DistributedDataHandler.create(dataset, client=self.client) - self.active_workers = data.workers - self.datatype = data.datatype - if self.datatype == 'cudf': - has_float64 = (dataset[0].dtypes == np.float64).any() - else: - has_float64 = (dataset[0].dtype == np.float64) - if has_float64: - raise TypeError("To use Dask RF data should have dtype float32.") - - labels = self.client.persist(dataset[1]) - if self.datatype == 'cudf': - self.num_classes = len(labels.unique()) - else: - self.num_classes = \ - len(dask.array.unique(labels).compute()) - - combined_data = list(map(lambda x: x[1], data.gpu_futures)) \ - if broadcast_data else None - - futures = list() - for idx, (worker, worker_data) in \ - enumerate(data.worker_to_parts.items()): - futures.append( - self.client.submit( - _func_fit, - model[worker], - combined_data if broadcast_data else worker_data, - convert_dtype, - workers=[worker], - pure=False) - ) - - self.n_active_estimators_per_worker = [] - for worker in data.worker_to_parts.keys(): - n = self.workers.index(worker) - n_est = self.n_estimators_per_worker[n] - self.n_active_estimators_per_worker.append(n_est) - - if len(self.workers) > len(self.active_workers): - if self.ignore_empty_partitions: - curent_estimators = self.n_estimators / \ - len(self.workers) * \ - len(self.active_workers) - warn_text = ( - f"Data was not split among all workers " - f"using only {self.active_workers} workers to fit." - f"This will only train {curent_estimators}" - f" estimators instead of the requested " - f"{self.n_estimators}" - ) - warnings.warn(warn_text) - else: - raise ValueError("Data was not split among all workers. " - "Re-run the code or " - "use ignore_empty_partitions=True" - " while creating model") - wait_and_raise_from_futures(futures) - return self - - def _concat_treelite_models(self): - """ - Convert the cuML Random Forest model present in different workers to - the treelite format and then concatenate the different treelite models - to create a single model. The concatenated model is then converted to - bytes format. - """ - model_serialized_futures = list() - for w in self.active_workers: - model_serialized_futures.append( - dask.delayed(_get_serialized_model) - (self.rfs[w])) - mod_bytes = self.client.compute(model_serialized_futures, sync=True) - last_worker = w - model = self.rfs[last_worker].result() - all_tl_mod_handles = [ - model._tl_handle_from_bytes(indiv_worker_model_bytes) - for indiv_worker_model_bytes in mod_bytes - ] - - model._concatenate_treelite_handle(all_tl_mod_handles) - for tl_handle in all_tl_mod_handles: - TreeliteModel.free_treelite_model(tl_handle) - return model - - def _partial_inference(self, X, op_type, delayed, **kwargs): - data = DistributedDataHandler.create(X, client=self.client) - combined_data = list(map(lambda x: x[1], data.gpu_futures)) - - func = _func_predict_partial if op_type == 'regression' \ - else _func_predict_proba_partial - - partial_infs = list() - for worker in self.active_workers: - partial_infs.append( - self.client.submit( - func, - self.rfs[worker], - combined_data, - **kwargs, - workers=[worker], - pure=False) - ) - partial_infs = dask.delayed(dask.array.concatenate)( - partial_infs, axis=1, allow_unknown_chunksizes=True) - return partial_infs - - def _predict_using_fil(self, X, delayed, **kwargs): - if self._get_internal_model() is None: - self._set_internal_model(self._concat_treelite_models()) - data = DistributedDataHandler.create(X, client=self.client) - if self._get_internal_model() is None: - self._set_internal_model(self._concat_treelite_models()) - return self._predict(X, delayed=delayed, - output_collection_type=data.datatype, - **kwargs) - - def _get_params(self, deep): - model_params = list() - for idx, worker in enumerate(self.workers): - model_params.append( - self.client.submit( - _func_get_params, - self.rfs[worker], - deep, - workers=[worker] - ) - ) - params_of_each_model = self.client.gather(model_params, errors="raise") - return params_of_each_model - - def _set_params(self, **params): - model_params = list() - for idx, worker in enumerate(self.workers): - model_params.append( - self.client.submit( - _func_set_params, - self.rfs[worker], - **params, - workers=[worker] - ) - ) - wait_and_raise_from_futures(model_params) - return self - - def _get_summary_text(self): - """ - Obtain the summary of the forest as text - """ - futures = list() - for n, w in enumerate(self.workers): - futures.append( - self.client.submit( - _get_summary_text_func, - self.rfs[w], - workers=[w], - ) - ) - all_dump = self.client.gather(futures, errors='raise') - return '\n'.join(all_dump) - - def _get_detailed_text(self): - """ - Obtain the detailed information of the forest as text - """ - futures = list() - for n, w in enumerate(self.workers): - futures.append( - self.client.submit( - _get_detailed_text_func, - self.rfs[w], - workers=[w], - ) - ) - all_dump = self.client.gather(futures, errors='raise') - return '\n'.join(all_dump) - - def _get_json(self): - """ - Export the Random Forest model as a JSON string - """ - dump = list() - for n, w in enumerate(self.workers): - dump.append( - self.client.submit( - _get_json_func, - self.rfs[w], - workers=[w], - ) - ) - all_dump = self.client.gather(dump, errors='raise') - combined_dump = [] - for e in all_dump: - obj = json.loads(e) - combined_dump.extend(obj) - return json.dumps(combined_dump) - - def get_combined_model(self): - """ - Return single-GPU model for serialization. - - Returns - ------- - - model : Trained single-GPU model or None if the model has not - yet been trained. - """ - - # set internal model if it hasn't been accessed before - if self._get_internal_model() is None: - self._set_internal_model(self._concat_treelite_models()) - - internal_model = self._check_internal_model(self._get_internal_model()) - - if isinstance(self.internal_model, Iterable): - # This function needs to return a single instance of cuml.Base, - # even if the class is just a composite. - raise ValueError("Expected a single instance of cuml.Base " - "but got %s instead." % type(self.internal_model)) - - elif isinstance(self.internal_model, Future): - internal_model = self.internal_model.result() - - return internal_model - - def apply_reduction(self, reduce, partial_infs, datatype, delayed): - """ - Reduces the partial inferences to obtain the final result. The workers - didn't have the same number of trees to form their predictions. To - correct for this worker's predictions are weighted differently during - reduction. - """ - workers_weights = np.array(self.n_active_estimators_per_worker) - workers_weights = workers_weights[workers_weights != 0] - workers_weights = workers_weights / workers_weights.sum() - workers_weights = cp.array(workers_weights) - unique_classes = None if not hasattr(self, 'unique_classes') \ - else self.unique_classes - delayed_local_array = dask.delayed(reduce)(partial_infs, - workers_weights, - unique_classes) - delayed_res = dask.array.from_delayed(delayed_local_array, - shape=(np.nan, np.nan), - dtype=np.float32) - if delayed: - return delayed_res - else: - return delayed_res.persist() - - -def _func_fit(model, input_data, convert_dtype): - X = concatenate([item[0] for item in input_data]) - y = concatenate([item[1] for item in input_data]) - return model.fit(X, y, convert_dtype) - - -def _func_predict_partial(model, input_data, **kwargs): - """ - Whole dataset inference with part of the model (trees at disposal locally). - Transfer dataset instead of model. Interesting when model is larger - than dataset. - """ - X = concatenate(input_data) - with using_output_type('cupy'): - prediction = model.predict(X, **kwargs) - return cp.expand_dims(prediction, axis=1) - - -def _func_predict_proba_partial(model, input_data, **kwargs): - """ - Whole dataset inference with part of the model (trees at disposal locally). - Transfer dataset instead of model. Interesting when model is larger - than dataset. - """ - X = concatenate(input_data) - with using_output_type('cupy'): - prediction = model.predict_proba(X, **kwargs) - return cp.expand_dims(prediction, axis=1) - - -def _get_summary_text_func(model): - return model.get_summary_text() - - -def _get_detailed_text_func(model): - return model.get_detailed_text() - - -def _get_json_func(model): - return model.get_json() - - -def _func_get_params(model, deep): - return model.get_params(deep) - - -def _func_set_params(model, **params): - return model.set_params(**params) - - -def _get_serialized_model(model): - return model._get_serialized_model() +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cuml.fil.fil import TreeliteModel +from cuml.dask.common.utils import get_client, wait_and_raise_from_futures +from cuml.dask.common.input_utils import DistributedDataHandler, \ + concatenate +from dask.distributed import Future +from collections.abc import Iterable +from cuml import using_output_type +import warnings +from cuml.internals.safe_imports import gpu_only_import +import dask +import json +import math +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') + + +class BaseRandomForestModel(object): + """ + BaseRandomForestModel defines functions used in both Random Forest + Classifier and Regressor for Multi Node and Multi GPU models. The common + functions are defined here and called from the main Random Forest Multi + Node Multi GPU APIs. The functions defined here are not meant to be used + as a part of the public API. + """ + + def _create_model(self, model_func, + client, + workers, + n_estimators, + base_seed, + ignore_empty_partitions, + **kwargs): + + self.client = get_client(client) + if workers is None: + # Default to all workers + workers = list(self.client.scheduler_info()['workers'].keys()) + self.workers = workers + self._set_internal_model(None) + self.active_workers = list() + self.ignore_empty_partitions = ignore_empty_partitions + self.n_estimators = n_estimators + + self.n_estimators_per_worker = \ + self._estimators_per_worker(n_estimators) + if base_seed is None: + base_seed = 0 + seeds = [base_seed] + for i in range(1, len(self.n_estimators_per_worker)): + sd = self.n_estimators_per_worker[i-1] + seeds[i-1] + seeds.append(sd) + + self.rfs = { + worker: self.client.submit( + model_func, + n_estimators=self.n_estimators_per_worker[n], + random_state=seeds[n], + **kwargs, + pure=False, + workers=[worker], + ) + for n, worker in enumerate(self.workers) + } + + wait_and_raise_from_futures(list(self.rfs.values())) + + def _estimators_per_worker(self, n_estimators): + n_workers = len(self.workers) + if n_estimators < n_workers: + raise ValueError( + "n_estimators cannot be lower than number of dask workers." + ) + + n_est_per_worker = math.floor(n_estimators / n_workers) + n_estimators_per_worker = \ + [n_est_per_worker for i in range(n_workers)] + remaining_est = n_estimators - (n_est_per_worker * n_workers) + for i in range(remaining_est): + n_estimators_per_worker[i] = ( + n_estimators_per_worker[i] + 1 + ) + return n_estimators_per_worker + + def _fit(self, model, dataset, convert_dtype, broadcast_data): + data = DistributedDataHandler.create(dataset, client=self.client) + self.active_workers = data.workers + self.datatype = data.datatype + if self.datatype == 'cudf': + has_float64 = (dataset[0].dtypes == np.float64).any() + else: + has_float64 = (dataset[0].dtype == np.float64) + if has_float64: + raise TypeError("To use Dask RF data should have dtype float32.") + + labels = self.client.persist(dataset[1]) + if self.datatype == 'cudf': + self.num_classes = len(labels.unique()) + else: + self.num_classes = \ + len(dask.array.unique(labels).compute()) + + combined_data = list(map(lambda x: x[1], data.gpu_futures)) \ + if broadcast_data else None + + futures = list() + for idx, (worker, worker_data) in \ + enumerate(data.worker_to_parts.items()): + futures.append( + self.client.submit( + _func_fit, + model[worker], + combined_data if broadcast_data else worker_data, + convert_dtype, + workers=[worker], + pure=False) + ) + + self.n_active_estimators_per_worker = [] + for worker in data.worker_to_parts.keys(): + n = self.workers.index(worker) + n_est = self.n_estimators_per_worker[n] + self.n_active_estimators_per_worker.append(n_est) + + if len(self.workers) > len(self.active_workers): + if self.ignore_empty_partitions: + curent_estimators = self.n_estimators / \ + len(self.workers) * \ + len(self.active_workers) + warn_text = ( + f"Data was not split among all workers " + f"using only {self.active_workers} workers to fit." + f"This will only train {curent_estimators}" + f" estimators instead of the requested " + f"{self.n_estimators}" + ) + warnings.warn(warn_text) + else: + raise ValueError("Data was not split among all workers. " + "Re-run the code or " + "use ignore_empty_partitions=True" + " while creating model") + wait_and_raise_from_futures(futures) + return self + + def _concat_treelite_models(self): + """ + Convert the cuML Random Forest model present in different workers to + the treelite format and then concatenate the different treelite models + to create a single model. The concatenated model is then converted to + bytes format. + """ + model_serialized_futures = list() + for w in self.active_workers: + model_serialized_futures.append( + dask.delayed(_get_serialized_model) + (self.rfs[w])) + mod_bytes = self.client.compute(model_serialized_futures, sync=True) + last_worker = w + model = self.rfs[last_worker].result() + all_tl_mod_handles = [ + model._tl_handle_from_bytes(indiv_worker_model_bytes) + for indiv_worker_model_bytes in mod_bytes + ] + + model._concatenate_treelite_handle(all_tl_mod_handles) + for tl_handle in all_tl_mod_handles: + TreeliteModel.free_treelite_model(tl_handle) + return model + + def _partial_inference(self, X, op_type, delayed, **kwargs): + data = DistributedDataHandler.create(X, client=self.client) + combined_data = list(map(lambda x: x[1], data.gpu_futures)) + + func = _func_predict_partial if op_type == 'regression' \ + else _func_predict_proba_partial + + partial_infs = list() + for worker in self.active_workers: + partial_infs.append( + self.client.submit( + func, + self.rfs[worker], + combined_data, + **kwargs, + workers=[worker], + pure=False) + ) + partial_infs = dask.delayed(dask.array.concatenate)( + partial_infs, axis=1, allow_unknown_chunksizes=True) + return partial_infs + + def _predict_using_fil(self, X, delayed, **kwargs): + if self._get_internal_model() is None: + self._set_internal_model(self._concat_treelite_models()) + data = DistributedDataHandler.create(X, client=self.client) + if self._get_internal_model() is None: + self._set_internal_model(self._concat_treelite_models()) + return self._predict(X, delayed=delayed, + output_collection_type=data.datatype, + **kwargs) + + def _get_params(self, deep): + model_params = list() + for idx, worker in enumerate(self.workers): + model_params.append( + self.client.submit( + _func_get_params, + self.rfs[worker], + deep, + workers=[worker] + ) + ) + params_of_each_model = self.client.gather(model_params, errors="raise") + return params_of_each_model + + def _set_params(self, **params): + model_params = list() + for idx, worker in enumerate(self.workers): + model_params.append( + self.client.submit( + _func_set_params, + self.rfs[worker], + **params, + workers=[worker] + ) + ) + wait_and_raise_from_futures(model_params) + return self + + def _get_summary_text(self): + """ + Obtain the summary of the forest as text + """ + futures = list() + for n, w in enumerate(self.workers): + futures.append( + self.client.submit( + _get_summary_text_func, + self.rfs[w], + workers=[w], + ) + ) + all_dump = self.client.gather(futures, errors='raise') + return '\n'.join(all_dump) + + def _get_detailed_text(self): + """ + Obtain the detailed information of the forest as text + """ + futures = list() + for n, w in enumerate(self.workers): + futures.append( + self.client.submit( + _get_detailed_text_func, + self.rfs[w], + workers=[w], + ) + ) + all_dump = self.client.gather(futures, errors='raise') + return '\n'.join(all_dump) + + def _get_json(self): + """ + Export the Random Forest model as a JSON string + """ + dump = list() + for n, w in enumerate(self.workers): + dump.append( + self.client.submit( + _get_json_func, + self.rfs[w], + workers=[w], + ) + ) + all_dump = self.client.gather(dump, errors='raise') + combined_dump = [] + for e in all_dump: + obj = json.loads(e) + combined_dump.extend(obj) + return json.dumps(combined_dump) + + def get_combined_model(self): + """ + Return single-GPU model for serialization. + + Returns + ------- + + model : Trained single-GPU model or None if the model has not + yet been trained. + """ + + # set internal model if it hasn't been accessed before + if self._get_internal_model() is None: + self._set_internal_model(self._concat_treelite_models()) + + internal_model = self._check_internal_model(self._get_internal_model()) + + if isinstance(self.internal_model, Iterable): + # This function needs to return a single instance of cuml.Base, + # even if the class is just a composite. + raise ValueError("Expected a single instance of cuml.Base " + "but got %s instead." % type(self.internal_model)) + + elif isinstance(self.internal_model, Future): + internal_model = self.internal_model.result() + + return internal_model + + def apply_reduction(self, reduce, partial_infs, datatype, delayed): + """ + Reduces the partial inferences to obtain the final result. The workers + didn't have the same number of trees to form their predictions. To + correct for this worker's predictions are weighted differently during + reduction. + """ + workers_weights = np.array(self.n_active_estimators_per_worker) + workers_weights = workers_weights[workers_weights != 0] + workers_weights = workers_weights / workers_weights.sum() + workers_weights = cp.array(workers_weights) + unique_classes = None if not hasattr(self, 'unique_classes') \ + else self.unique_classes + delayed_local_array = dask.delayed(reduce)(partial_infs, + workers_weights, + unique_classes) + delayed_res = dask.array.from_delayed(delayed_local_array, + shape=(np.nan, np.nan), + dtype=np.float32) + if delayed: + return delayed_res + else: + return delayed_res.persist() + + +def _func_fit(model, input_data, convert_dtype): + X = concatenate([item[0] for item in input_data]) + y = concatenate([item[1] for item in input_data]) + return model.fit(X, y, convert_dtype) + + +def _func_predict_partial(model, input_data, **kwargs): + """ + Whole dataset inference with part of the model (trees at disposal locally). + Transfer dataset instead of model. Interesting when model is larger + than dataset. + """ + X = concatenate(input_data) + with using_output_type('cupy'): + prediction = model.predict(X, **kwargs) + return cp.expand_dims(prediction, axis=1) + + +def _func_predict_proba_partial(model, input_data, **kwargs): + """ + Whole dataset inference with part of the model (trees at disposal locally). + Transfer dataset instead of model. Interesting when model is larger + than dataset. + """ + X = concatenate(input_data) + with using_output_type('cupy'): + prediction = model.predict_proba(X, **kwargs) + return cp.expand_dims(prediction, axis=1) + + +def _get_summary_text_func(model): + return model.get_summary_text() + + +def _get_detailed_text_func(model): + return model.get_detailed_text() + + +def _get_json_func(model): + return model.get_json() + + +def _func_get_params(model, deep): + return model.get_params(deep) + + +def _func_set_params(model, **params): + return model.set_params(**params) + + +def _get_serialized_model(model): + return model._get_serialized_model() diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index 80e52fde12..c1d3c8ab4e 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -14,19 +14,19 @@ # limitations under the License. # -import numpy as np -import cupy as cp - -from cuml.dask.common.base import BaseEstimator -from cuml.ensemble import RandomForestClassifier as cuRFC -from cuml.dask.common.input_utils import DistributedDataHandler -from cuml.dask.common.base import DelayedPredictionMixin, \ - DelayedPredictionProbaMixin +import dask +from dask.distributed import default_client from cuml.dask.ensemble.base import \ BaseRandomForestModel -from dask.distributed import default_client - -import dask +from cuml.dask.common.base import DelayedPredictionMixin, \ + DelayedPredictionProbaMixin +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.ensemble import RandomForestClassifier as cuRFC +from cuml.dask.common.base import BaseEstimator +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') class RandomForestClassifier(BaseRandomForestModel, DelayedPredictionMixin, @@ -409,6 +409,7 @@ def predict_using_fil(self, X, delayed, **kwargs): TODO : Update function names used for CPU predict. Cuml issue #1854 has been created to track this. """ + def predict_model_on_cpu(self, X, convert_dtype=True): """ Predicts the labels for X. diff --git a/python/cuml/dask/extended/linear_model/logistic_regression.py b/python/cuml/dask/extended/linear_model/logistic_regression.py index 2ea5e20d3e..4b95c12d31 100644 --- a/python/cuml/dask/extended/linear_model/logistic_regression.py +++ b/python/cuml/dask/extended/linear_model/logistic_regression.py @@ -13,14 +13,16 @@ # limitations under the License. # +from dask.utils import is_dataframe_like, is_series_like, is_arraylike +from cuml.internals.safe_imports import cpu_only_import from cuml.dask.common.base import BaseEstimator from cuml.common import with_cupy_rmm from cuml.internals.import_utils import has_daskglm -import cupy as cp -import numpy as np -from dask.utils import is_dataframe_like, is_series_like, is_arraylike -import cudf +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') class LogisticRegression(BaseEstimator): diff --git a/python/cuml/dask/metrics/confusion_matrix.py b/python/cuml/dask/metrics/confusion_matrix.py index 6ccb8729be..871e7a8e24 100644 --- a/python/cuml/dask/metrics/confusion_matrix.py +++ b/python/cuml/dask/metrics/confusion_matrix.py @@ -14,16 +14,16 @@ # limitations under the License. # -import numpy as np -import cupy as cp -import cupyx -from cuml.dask.common.input_utils import DistributedDataHandler - -from cuml.dask.common.utils import get_client - -from cuml.internals.memory_utils import with_cupy_rmm -from cuml.dask.metrics.utils import sorted_unique_labels from cuml.prims.label import make_monotonic +from cuml.dask.metrics.utils import sorted_unique_labels +from cuml.internals.memory_utils import with_cupy_rmm +from cuml.dask.common.utils import get_client +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @with_cupy_rmm diff --git a/python/cuml/dask/metrics/utils.py b/python/cuml/dask/metrics/utils.py index 011b3d8e5c..548f88e848 100644 --- a/python/cuml/dask/metrics/utils.py +++ b/python/cuml/dask/metrics/utils.py @@ -14,7 +14,8 @@ # limitations under the License. # -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def sorted_unique_labels(*ys): diff --git a/python/cuml/dask/naive_bayes/naive_bayes.py b/python/cuml/dask/naive_bayes/naive_bayes.py index 7e9b8810b8..83720ca9a8 100644 --- a/python/cuml/dask/naive_bayes/naive_bayes.py +++ b/python/cuml/dask/naive_bayes/naive_bayes.py @@ -14,27 +14,20 @@ # limitations under the License. # -import cupy as cp -import dask -from toolz import first - -import dask.array - - -from cuml.common import with_cupy_rmm - -from cuml.dask.common.base import BaseEstimator -from cuml.dask.common.base import DelayedPredictionMixin - -from cuml.dask.common.utils import wait_and_raise_from_futures - -from cuml.dask.common.func import reduce -from cuml.dask.common.func import tree_reduce - -from cuml.dask.common.input_utils import DistributedDataHandler -from cuml.common import rmm_cupy_ary - from cuml.naive_bayes import MultinomialNB as MNB +from cuml.common import rmm_cupy_ary +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.dask.common.func import tree_reduce +from cuml.dask.common.func import reduce +from cuml.dask.common.utils import wait_and_raise_from_futures +from cuml.dask.common.base import DelayedPredictionMixin +from cuml.dask.common.base import BaseEstimator +from cuml.common import with_cupy_rmm +import dask.array +from toolz import first +import dask +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') class MultinomialNB(BaseEstimator, @@ -88,8 +81,8 @@ class MultinomialNB(BaseEstimator, >>> cluster.close() """ - def __init__(self, *, client=None, verbose=False, **kwargs): + def __init__(self, *, client=None, verbose=False, **kwargs): """ Create new multinomial distributed Naive Bayes classifier instance @@ -138,7 +131,6 @@ def _update_log_probs(model): @with_cupy_rmm def fit(self, X, y, classes=None): - """ Fit distributed Naive Bayes classifier model diff --git a/python/cuml/dask/neighbors/kneighbors_classifier.py b/python/cuml/dask/neighbors/kneighbors_classifier.py index f704908c08..2038497bfe 100644 --- a/python/cuml/dask/neighbors/kneighbors_classifier.py +++ b/python/cuml/dask/neighbors/kneighbors_classifier.py @@ -14,6 +14,7 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import from cuml.dask.common.input_utils import DistributedDataHandler from cuml.dask.common.input_utils import to_output from cuml.dask.common import parts_to_ranks @@ -25,9 +26,10 @@ from dask.dataframe import Series as DaskSeries import dask.array as da from uuid import uuid1 -import numpy as np -import pandas as pd -import cudf +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cudf = gpu_only_import('cudf') class KNeighborsClassifier(NearestNeighbors): @@ -59,6 +61,7 @@ class KNeighborsClassifier(NearestNeighbors): Sets logging level. It must be one of `cuml.common.logger.level_*`. See :ref:`verbosity-levels` for more info. """ + def __init__(self, *, client=None, streams_per_handle=0, verbose=False, **kwargs): super().__init__(client=client, @@ -357,8 +360,8 @@ def predict_proba(self, X, convert_dtype=True): True, key="%s-%s" % (key, idx), workers=[worker])) - for idx, worker in enumerate(comms.worker_addresses) - ]) + for idx, worker in enumerate(comms.worker_addresses) + ]) wait_and_raise_from_futures(list(knn_prob_res.values())) diff --git a/python/cuml/dask/preprocessing/LabelEncoder.py b/python/cuml/dask/preprocessing/LabelEncoder.py index de0b1a1fdf..05befe6763 100644 --- a/python/cuml/dask/preprocessing/LabelEncoder.py +++ b/python/cuml/dask/preprocessing/LabelEncoder.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.preprocessing import LabelEncoder as LE +from cuml.common.exceptions import NotFittedError +from dask_cudf.core import Series as daskSeries from cuml.dask.common.base import BaseEstimator from cuml.dask.common.base import DelayedTransformMixin from cuml.dask.common.base import DelayedInverseTransformMixin @@ -19,10 +22,8 @@ from toolz import first from collections.abc import Sequence -from dask_cudf.core import DataFrame as dcDataFrame -from dask_cudf.core import Series as daskSeries -from cuml.common.exceptions import NotFittedError -from cuml.preprocessing import LabelEncoder as LE +from cuml.internals.safe_imports import gpu_only_import_from +dcDataFrame = gpu_only_import_from('dask_cudf.core', 'DataFrame') class LabelEncoder(BaseEstimator, @@ -119,6 +120,7 @@ class LabelEncoder(BaseEstimator, >>> cluster.close() """ + def __init__(self, *, client=None, verbose=False, **kwargs): super().__init__(client=client, verbose=verbose, diff --git a/python/cuml/dask/preprocessing/encoders.py b/python/cuml/dask/preprocessing/encoders.py index 6fdbad1825..cc55cf4711 100644 --- a/python/cuml/dask/preprocessing/encoders.py +++ b/python/cuml/dask/preprocessing/encoders.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from dask_cudf.core import Series as daskSeries from cuml.common import with_cupy_rmm from cuml.dask.common.base import BaseEstimator @@ -21,8 +22,8 @@ from toolz import first from collections.abc import Sequence -from dask_cudf.core import DataFrame as dcDataFrame -from dask_cudf.core import Series as daskSeries +from cuml.internals.safe_imports import gpu_only_import_from +dcDataFrame = gpu_only_import_from('dask_cudf.core', 'DataFrame') class OneHotEncoder(BaseEstimator, DelayedTransformMixin, diff --git a/python/cuml/dask/preprocessing/label.py b/python/cuml/dask/preprocessing/label.py index c592b6ddb3..ab51259479 100644 --- a/python/cuml/dask/preprocessing/label.py +++ b/python/cuml/dask/preprocessing/label.py @@ -20,8 +20,9 @@ from cuml.common import rmm_cupy_ary import dask -import cupy as cp -import cupyx +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') class LabelBinarizer(BaseEstimator): @@ -74,6 +75,7 @@ class LabelBinarizer(BaseEstimator): >>> cluster.close() """ + def __init__(self, *, client=None, **kwargs): super().__init__(client=client, **kwargs) @@ -217,7 +219,7 @@ def inverse_transform(self, y, threshold=None): f = [dask.array.from_delayed( inv_func(internal_model, part, threshold), dtype=dtype, shape=(cp.nan,), meta=meta) - for w, part in parts] + for w, part in parts] arr = dask.array.concatenate(f, axis=0, allow_unknown_chunksizes=True) diff --git a/python/cuml/datasets/arima.pyx b/python/cuml/datasets/arima.pyx index e773d75dd2..099e66153f 100644 --- a/python/cuml/datasets/arima.pyx +++ b/python/cuml/datasets/arima.pyx @@ -18,7 +18,8 @@ import warnings -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from cuml.internals.array import CumlArray as cumlArray import cuml.internals diff --git a/python/cuml/datasets/blobs.py b/python/cuml/datasets/blobs.py index 74141196db..a951be481f 100644 --- a/python/cuml/datasets/blobs.py +++ b/python/cuml/datasets/blobs.py @@ -15,13 +15,15 @@ # +from cuml.datasets.utils import _create_rs_generator +import cuml.internals +from cuml.internals.safe_imports import cpu_only_import import nvtx import numbers from collections.abc import Iterable -import cupy as cp -import numpy as np -import cuml.internals -from cuml.datasets.utils import _create_rs_generator +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def _get_centers(rs, centers, center_box, n_samples, n_features, dtype): @@ -178,11 +180,11 @@ def make_blobs(n_samples=100, n_features=2, centers=None, cluster_std=1.0, np_seed = int(generator.randint(n_samples, size=1)) np.random.seed(np_seed) shuffled_sample_indices = cp.array(np.random.choice( - n_centers, - n_samples, - replace=True, - p=proba_samples_per_center - )) + n_centers, + n_samples, + replace=True, + p=proba_samples_per_center + )) for i, (n, std) in enumerate(zip(n_samples_per_center, cluster_std)): center_indices = cp.where(shuffled_sample_indices == i) diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index ab6907292c..7e5108f748 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -13,13 +13,15 @@ # limitations under the License. # +import nvtx +from cuml.internals.safe_imports import cpu_only_import import cuml.internals from cuml.internals.import_utils import has_sklearn from cuml.datasets.utils import _create_rs_generator -import cupy as cp -import numpy as np -import nvtx +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def _generate_hypercube(samples, dimensions, rng): @@ -268,11 +270,11 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum( n_samples_per_cluster) shuffled_sample_indices = cp.array(np.random.choice( - n_clusters, - n_samples, - replace=True, - p=proba_samples_per_cluster - )) + n_clusters, + n_samples, + replace=True, + p=proba_samples_per_cluster + )) for k, centroid in enumerate(centroids): centroid_indices = cp.where(shuffled_sample_indices == k) y[centroid_indices[0]] = k % n_classes diff --git a/python/cuml/datasets/regression.pyx b/python/cuml/datasets/regression.pyx index c881597164..0076bbe3a0 100644 --- a/python/cuml/datasets/regression.pyx +++ b/python/cuml/datasets/regression.pyx @@ -19,7 +19,8 @@ import typing import nvtx -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import cuml.internals from cuml.internals.array import CumlArray diff --git a/python/cuml/datasets/utils.py b/python/cuml/datasets/utils.py index d6e0c3c16d..4af7639d0c 100644 --- a/python/cuml/datasets/utils.py +++ b/python/cuml/datasets/utils.py @@ -13,7 +13,8 @@ # limitations under the License. # -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def _create_rs_generator(random_state): diff --git a/python/cuml/decomposition/base_mg.pyx b/python/cuml/decomposition/base_mg.pyx index 7d70b162b6..309dc892b4 100644 --- a/python/cuml/decomposition/base_mg.pyx +++ b/python/cuml/decomposition/base_mg.pyx @@ -17,9 +17,11 @@ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free diff --git a/python/cuml/decomposition/incremental_pca.py b/python/cuml/decomposition/incremental_pca.py index 4f6c39ea1a..db4f252111 100644 --- a/python/cuml/decomposition/incremental_pca.py +++ b/python/cuml/decomposition/incremental_pca.py @@ -14,17 +14,19 @@ # limitations under the License. # +from cuml.decomposition import PCA +from cuml.internals.array import CumlArray +import cuml.internals +from cuml.internals.input_utils import input_to_cupy_array +from cuml.common import input_to_cuml_array +from cuml import Base +from cuml.internals.safe_imports import cpu_only_import import numbers -import cupy as cp -import cupyx -import scipy -from cuml import Base -from cuml.common import input_to_cuml_array -from cuml.internals.input_utils import input_to_cupy_array -import cuml.internals -from cuml.internals.array import CumlArray -from cuml.decomposition import PCA +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +scipy = cpu_only_import('scipy') class IncrementalPCA(PCA): @@ -191,6 +193,7 @@ class IncrementalPCA(PCA): >>> ipca.noise_variance_.item() # doctest: +SKIP 0.0037122774558343763 """ + def __init__(self, *, handle=None, n_components=None, whiten=False, copy=True, batch_size=None, verbose=False, output_type=None): diff --git a/python/cuml/decomposition/pca.pyx b/python/cuml/decomposition/pca.pyx index 64c2c2c392..308423da05 100644 --- a/python/cuml/decomposition/pca.pyx +++ b/python/cuml/decomposition/pca.pyx @@ -17,14 +17,16 @@ # distutils: language = c++ import ctypes -import numpy as np -import cupy as cp -import cupyx -import scipy +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +scipy = cpu_only_import('scipy') from enum import IntEnum -import rmm +rmm = gpu_only_import('rmm') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/decomposition/pca_mg.pyx b/python/cuml/decomposition/pca_mg.pyx index c76750e312..ba40171efc 100644 --- a/python/cuml/decomposition/pca_mg.pyx +++ b/python/cuml/decomposition/pca_mg.pyx @@ -17,10 +17,12 @@ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from enum import IntEnum -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free diff --git a/python/cuml/decomposition/tsvd.pyx b/python/cuml/decomposition/tsvd.pyx index 45c6fe5018..53b0a45a80 100644 --- a/python/cuml/decomposition/tsvd.pyx +++ b/python/cuml/decomposition/tsvd.pyx @@ -17,11 +17,13 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from enum import IntEnum -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/decomposition/tsvd_mg.pyx b/python/cuml/decomposition/tsvd_mg.pyx index 5dc10f807a..10be272821 100644 --- a/python/cuml/decomposition/tsvd_mg.pyx +++ b/python/cuml/decomposition/tsvd_mg.pyx @@ -16,9 +16,11 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx index 8714ed7c80..77fe304d92 100644 --- a/python/cuml/ensemble/randomforest_common.pyx +++ b/python/cuml/ensemble/randomforest_common.pyx @@ -14,13 +14,15 @@ # limitations under the License. # import ctypes -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import math import warnings import typing from inspect import signature -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from cuml import ForestInference from cuml.fil.fil import TreeliteModel from pylibraft.common.handle import Handle diff --git a/python/cuml/ensemble/randomforest_shared.pyx b/python/cuml/ensemble/randomforest_shared.pyx index 166289d0af..5e4fe49649 100644 --- a/python/cuml/ensemble/randomforest_shared.pyx +++ b/python/cuml/ensemble/randomforest_shared.pyx @@ -22,7 +22,8 @@ from cpython.object cimport PyObject from libc.stdint cimport uintptr_t from libcpp.memory cimport unique_ptr from typing import Tuple, Dict, List, Union -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') cdef extern from "treelite/tree.h" namespace "treelite": cdef struct PyBufferFrame: diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index 033648ff1b..608b25b439 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -16,9 +16,11 @@ # # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import nvtx -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') import warnings import cuml.internals.logger as logger @@ -45,7 +47,8 @@ from libcpp.vector cimport vector from libc.stdint cimport uintptr_t, uint64_t from libc.stdlib cimport calloc, malloc, free -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from cuml.prims.label.classlabels import check_labels, invert_labels from pylibraft.common.handle cimport handle_t diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index 247aed3401..fc34099b7e 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -16,9 +16,11 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import nvtx -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') import warnings import cuml.internals.logger as logger @@ -46,7 +48,8 @@ from libcpp.vector cimport vector from libc.stdint cimport uintptr_t, uint64_t from libc.stdlib cimport calloc, malloc, free -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from pylibraft.common.handle cimport handle_t cimport cuml.common.cuda diff --git a/python/cuml/experimental/hyperopt_utils/plotting_utils.py b/python/cuml/experimental/hyperopt_utils/plotting_utils.py index 887717ceb3..45782a0682 100644 --- a/python/cuml/experimental/hyperopt_utils/plotting_utils.py +++ b/python/cuml/experimental/hyperopt_utils/plotting_utils.py @@ -14,10 +14,11 @@ # limitations under the License. # -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd import seaborn as sns +import matplotlib.pyplot as plt +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') def plot_heatmap(df, col1, col2): diff --git a/python/cuml/experimental/linear_model/lars.pyx b/python/cuml/experimental/linear_model/lars.pyx index 5c1b1e0451..d609ac6b51 100644 --- a/python/cuml/experimental/linear_model/lars.pyx +++ b/python/cuml/experimental/linear_model/lars.pyx @@ -20,8 +20,10 @@ # cython: language_level = 3 import ctypes -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import warnings import cuml.internals.logger as logger import cuml.internals diff --git a/python/cuml/explainer/base.pyx b/python/cuml/explainer/base.pyx index 3c62520dbb..7ad1b54afa 100644 --- a/python/cuml/explainer/base.pyx +++ b/python/cuml/explainer/base.pyx @@ -14,11 +14,14 @@ # limitations under the License. # -import cudf +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') import cuml.internals -import cupy as cp -import numpy as np -import pandas +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pandas = cpu_only_import('pandas') import cuml.internals.logger as logger from cuml.internals.import_utils import has_shap diff --git a/python/cuml/explainer/common.py b/python/cuml/explainer/common.py index 026d89bbe5..f87b9819df 100644 --- a/python/cuml/explainer/common.py +++ b/python/cuml/explainer/common.py @@ -14,11 +14,11 @@ # limitations under the License. # -import cupy as cp -from pylibraft.common.handle import Handle - -from cuml.internals.input_utils import input_to_cupy_array from cuml.internals.base import Base +from cuml.internals.input_utils import input_to_cupy_array +from pylibraft.common.handle import Handle +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def get_tag_from_model_func(func, tag, default=None): diff --git a/python/cuml/explainer/kernel_shap.pyx b/python/cuml/explainer/kernel_shap.pyx index ecc8299594..fb3b49c216 100644 --- a/python/cuml/explainer/kernel_shap.pyx +++ b/python/cuml/explainer/kernel_shap.pyx @@ -15,8 +15,10 @@ # import cuml.internals.logger as logger -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import time from cuml.internals.import_utils import has_sklearn diff --git a/python/cuml/explainer/permutation_shap.pyx b/python/cuml/explainer/permutation_shap.pyx index e4b851347c..63506c4c82 100644 --- a/python/cuml/explainer/permutation_shap.pyx +++ b/python/cuml/explainer/permutation_shap.pyx @@ -15,11 +15,14 @@ # import cuml -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import time -from cudf import DataFrame as cu_df +from cuml.internals.safe_imports import gpu_only_import_from +cu_df = gpu_only_import_from('cudf', 'DataFrame') from cuml.internals.array import CumlArray from cuml.internals.input_utils import input_to_cupy_array from cuml.explainer.base import SHAPBase @@ -27,8 +30,9 @@ from cuml.explainer.common import get_cai_ptr from cuml.explainer.common import get_dtype_from_model_func from cuml.explainer.common import get_tag_from_model_func from cuml.explainer.common import model_func_call -from numba import cuda -from pandas import DataFrame as pd_df +cuda = gpu_only_import_from('numba', 'cuda') +from cuml.internals.safe_imports import cpu_only_import_from +pd_df = cpu_only_import_from('pandas', 'DataFrame') from pylibraft.common.handle cimport handle_t from libcpp cimport bool diff --git a/python/cuml/explainer/sampling.py b/python/cuml/explainer/sampling.py index d19bda2783..90bb5608f7 100644 --- a/python/cuml/explainer/sampling.py +++ b/python/cuml/explainer/sampling.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cupy as cp -from scipy.sparse import issparse - -import cuml -from cuml import KMeans +from cuml.preprocessing import SimpleImputer from cuml.internals.input_utils import ( determine_array_type, get_supported_input_type ) -from cuml.preprocessing import SimpleImputer +from cuml import KMeans +import cuml +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +issparse = cpu_only_import_from('scipy.sparse', 'issparse') @cuml.internals.api_return_generic() diff --git a/python/cuml/explainer/tree_shap.pyx b/python/cuml/explainer/tree_shap.pyx index b16cbd1261..16c611ff36 100644 --- a/python/cuml/explainer/tree_shap.pyx +++ b/python/cuml/explainer/tree_shap.pyx @@ -24,7 +24,8 @@ from cuml.ensemble import RandomForestClassifier as curfc from libc.stdint cimport uintptr_t import re -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import treelite if has_sklearn(): diff --git a/python/cuml/feature_extraction/_tfidf.py b/python/cuml/feature_extraction/_tfidf.py index 5056737c0f..13c8220309 100644 --- a/python/cuml/feature_extraction/_tfidf.py +++ b/python/cuml/feature_extraction/_tfidf.py @@ -13,14 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml import Base +from cuml.internals.array import CumlArray +from cuml.common.sparsefuncs import csr_diag_mul +from cuml.common.sparsefuncs import csr_row_normalize_l1, csr_row_normalize_l2 import cuml.internals from cuml.common.exceptions import NotFittedError -import cupy as cp -import cupyx -from cuml.common.sparsefuncs import csr_row_normalize_l1, csr_row_normalize_l2 -from cuml.common.sparsefuncs import csr_diag_mul -from cuml.internals.array import CumlArray -from cuml import Base +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') def _sparse_document_frequency(X): diff --git a/python/cuml/feature_extraction/_tfidf_vectorizer.py b/python/cuml/feature_extraction/_tfidf_vectorizer.py index 6a34ba45e2..2e1197d6c6 100644 --- a/python/cuml/feature_extraction/_tfidf_vectorizer.py +++ b/python/cuml/feature_extraction/_tfidf_vectorizer.py @@ -29,7 +29,8 @@ from cuml.feature_extraction._vectorizers import CountVectorizer from cuml.feature_extraction._tfidf import TfidfTransformer -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') class TfidfVectorizer(CountVectorizer): @@ -132,6 +133,7 @@ class TfidfVectorizer(CountVectorizer): This class is largely based on scikit-learn 0.23.1's TfIdfVectorizer code, which is provided under the BSD-3 license. """ + def __init__(self, input=None, encoding=None, decode_error=None, strip_accents=None, lowercase=True, preprocessor=None, tokenizer=None, stop_words=None, token_pattern=None, diff --git a/python/cuml/feature_extraction/_vectorizers.py b/python/cuml/feature_extraction/_vectorizers.py index ca36e1dceb..db8ad63b53 100644 --- a/python/cuml/feature_extraction/_vectorizers.py +++ b/python/cuml/feature_extraction/_vectorizers.py @@ -12,20 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from cudf import Series -from cuml.common.exceptions import NotFittedError -from cuml.feature_extraction._stop_words import ENGLISH_STOP_WORDS -from cuml.common.sparsefuncs import csr_row_normalize_l1, csr_row_normalize_l2 +from cuml.internals.safe_imports import cpu_only_import +import cuml.internals.logger as logger +from cudf.utils.dtypes import min_signed_type +from cuml.internals.type_utils import CUPY_SPARSE_DTYPES +import numbers +from cuml.internals.safe_imports import gpu_only_import +from functools import partial from cuml.common.sparsefuncs import create_csr_matrix_from_count_df +from cuml.common.sparsefuncs import csr_row_normalize_l1, csr_row_normalize_l2 +from cuml.feature_extraction._stop_words import ENGLISH_STOP_WORDS +from cuml.common.exceptions import NotFittedError +from cuml.internals.safe_imports import gpu_only_import_from +Series = gpu_only_import_from('cudf', 'Series') -from functools import partial -import cupy as cp -import numbers -import cudf -from cuml.internals.type_utils import CUPY_SPARSE_DTYPES -from cudf.utils.dtypes import min_signed_type -import cuml.internals.logger as logger -import pandas as pd +cp = gpu_only_import('cupy') +cudf = gpu_only_import('cudf') +pd = cpu_only_import('pandas') def _preprocess(doc, lower=False, remove_non_alphanumeric=False, delimiter=" ", diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index b1bc5e92f6..3d1b5f66b8 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -19,12 +19,14 @@ import copy import ctypes import math -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import warnings -import pandas as pd +pd = cpu_only_import('pandas') from inspect import getdoc -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/internals/base.pyx b/python/cuml/internals/base.pyx index d978f344a0..6533b46a1e 100644 --- a/python/cuml/internals/base.pyx +++ b/python/cuml/internals/base.pyx @@ -19,7 +19,8 @@ import os import inspect from importlib import import_module -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import nvtx import typing diff --git a/python/cuml/internals/type_utils.py b/python/cuml/internals/type_utils.py index 9e9bfd7ffe..95e7441e33 100644 --- a/python/cuml/internals/type_utils.py +++ b/python/cuml/internals/type_utils.py @@ -16,7 +16,8 @@ import functools import typing -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') # Those are the only data types supported by cupyx.scipy.sparse matrices. CUPY_SPARSE_DTYPES = [cp.float32, cp.float64, cp.complex64, cp.complex128] diff --git a/python/cuml/kernel_ridge/kernel_ridge.pyx b/python/cuml/kernel_ridge/kernel_ridge.pyx index 4d0ef3a885..0eaf0e53de 100644 --- a/python/cuml/kernel_ridge/kernel_ridge.pyx +++ b/python/cuml/kernel_ridge/kernel_ridge.pyx @@ -16,10 +16,10 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import import warnings -from cupy import linalg -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import from cupyx import lapack, geterr, seterr from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.internals.base import Base @@ -29,6 +29,10 @@ from cuml.common import input_to_cuml_array from cuml.metrics import pairwise_kernels +cp = gpu_only_import('cupy') +linalg = gpu_only_import_from('cupy', 'linalg') +np = cpu_only_import('numpy') + # cholesky solve with fallback to least squares for singular problems def _safe_solve(K, y): diff --git a/python/cuml/linear_model/base.pyx b/python/cuml/linear_model/base.pyx index 960df51807..99aef27977 100644 --- a/python/cuml/linear_model/base.pyx +++ b/python/cuml/linear_model/base.pyx @@ -18,11 +18,14 @@ import ctypes import cuml.internals -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import warnings -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from collections import defaultdict from libcpp cimport bool diff --git a/python/cuml/linear_model/base_mg.pyx b/python/cuml/linear_model/base_mg.pyx index 3efcd0c629..172fd18081 100644 --- a/python/cuml/linear_model/base_mg.pyx +++ b/python/cuml/linear_model/base_mg.pyx @@ -18,8 +18,10 @@ import ctypes import cuml.common.opg_data_utils_mg as opg -import numpy as np -import rmm +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref diff --git a/python/cuml/linear_model/linear_regression.pyx b/python/cuml/linear_model/linear_regression.pyx index 269e4a4226..e82355d164 100644 --- a/python/cuml/linear_model/linear_regression.pyx +++ b/python/cuml/linear_model/linear_regression.pyx @@ -17,11 +17,14 @@ # distutils: language = c++ import ctypes -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import warnings -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from collections import defaultdict from libcpp cimport bool diff --git a/python/cuml/linear_model/linear_regression_mg.pyx b/python/cuml/linear_model/linear_regression_mg.pyx index 3c0161bfd9..40a23229f2 100644 --- a/python/cuml/linear_model/linear_regression_mg.pyx +++ b/python/cuml/linear_model/linear_regression_mg.pyx @@ -17,8 +17,10 @@ import ctypes import cuml.common.opg_data_utils_mg as opg -import numpy as np -import rmm +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free diff --git a/python/cuml/linear_model/logistic_regression.pyx b/python/cuml/linear_model/logistic_regression.pyx index 3daf8814c0..b5a1282e48 100644 --- a/python/cuml/linear_model/logistic_regression.pyx +++ b/python/cuml/linear_model/logistic_regression.pyx @@ -16,8 +16,8 @@ # distutils: language = c++ -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import import pprint import cuml.internals @@ -32,6 +32,8 @@ from cuml.common import input_to_cuml_array from cuml.common import using_output_type from cuml.internals.api_decorators import device_interop_preparation from cuml.internals.api_decorators import enable_device_interop +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') supported_penalties = ["l1", "l2", "none", "elasticnet"] diff --git a/python/cuml/linear_model/ridge.pyx b/python/cuml/linear_model/ridge.pyx index 55482462c8..c0217b0fa7 100644 --- a/python/cuml/linear_model/ridge.pyx +++ b/python/cuml/linear_model/ridge.pyx @@ -17,9 +17,11 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from collections import defaultdict -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') import warnings from libcpp cimport bool diff --git a/python/cuml/linear_model/ridge_mg.pyx b/python/cuml/linear_model/ridge_mg.pyx index b8ff83b214..91df1639ca 100644 --- a/python/cuml/linear_model/ridge_mg.pyx +++ b/python/cuml/linear_model/ridge_mg.pyx @@ -16,9 +16,11 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free diff --git a/python/cuml/manifold/simpl_set.pyx b/python/cuml/manifold/simpl_set.pyx index ca5fe75e02..729da3792b 100644 --- a/python/cuml/manifold/simpl_set.pyx +++ b/python/cuml/manifold/simpl_set.pyx @@ -16,8 +16,10 @@ # distutils: language = c++ -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from cuml.manifold.umap_utils cimport * from cuml.manifold.umap_utils import GraphHolder, find_ab_params diff --git a/python/cuml/manifold/t_sne.pyx b/python/cuml/manifold/t_sne.pyx index 6514ae196f..d8e2aefb1f 100644 --- a/python/cuml/manifold/t_sne.pyx +++ b/python/cuml/manifold/t_sne.pyx @@ -19,11 +19,13 @@ # cython: wraparound = False import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import inspect -import pandas as pd +pd = cpu_only_import('pandas') import warnings -import cupy +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') import cuml.internals from cuml.common.array_descriptor import CumlArrayDescriptor @@ -39,7 +41,7 @@ from cuml.common import input_to_cuml_array from cuml.internals.mixins import CMajorInputTagMixin from cuml.common.sparsefuncs import extract_knn_graph from cuml.metrics.distance_type cimport DistanceType -import rmm +rmm = gpu_only_import('rmm') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx index 88d8ce52c2..843c003c43 100644 --- a/python/cuml/manifold/umap.pyx +++ b/python/cuml/manifold/umap.pyx @@ -18,23 +18,27 @@ import typing import ctypes -import numpy as np -import pandas as pd +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') import warnings import joblib -import cupy -import cupyx +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') -import numba.cuda as cuda +cuda = gpu_only_import('numba.cuda') from cuml.manifold.umap_utils cimport * from cuml.manifold.umap_utils import GraphHolder, find_ab_params from cuml.common.sparsefuncs import extract_knn_graph -from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix,\ - coo_matrix as cp_coo_matrix, csc_matrix as cp_csc_matrix +from cuml.internals.safe_imports import gpu_only_import_from +cp_csr_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csr_matrix') +cp_coo_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'coo_matrix') +cp_csc_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csc_matrix') import cuml.internals from cuml.common import using_output_type @@ -61,7 +65,7 @@ from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.internals.api_decorators import device_interop_preparation from cuml.internals.api_decorators import enable_device_interop -import rmm +rmm = gpu_only_import('rmm') from libc.stdint cimport uintptr_t from libc.stdlib cimport free diff --git a/python/cuml/manifold/umap_utils.pyx b/python/cuml/manifold/umap_utils.pyx index 54b814ffb8..bf9b725754 100644 --- a/python/cuml/manifold/umap_utils.pyx +++ b/python/cuml/manifold/umap_utils.pyx @@ -20,8 +20,10 @@ from rmm._lib.memory_resource cimport get_current_device_resource from pylibraft.common.handle cimport handle_t from cuml.manifold.umap_utils cimport * from libcpp.utility cimport move -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') cdef class GraphHolder: diff --git a/python/cuml/metrics/_classification.py b/python/cuml/metrics/_classification.py index 3490588946..8e51dccb4f 100644 --- a/python/cuml/metrics/_classification.py +++ b/python/cuml/metrics/_classification.py @@ -14,10 +14,12 @@ # limitations under the License. # -import cupy as cp -import numpy as np -import cuml.internals from cuml.internals.input_utils import input_to_cupy_array +import cuml.internals +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') @cuml.internals.api_return_any() diff --git a/python/cuml/metrics/_ranking.py b/python/cuml/metrics/_ranking.py index 41369ace2d..ff7535daed 100644 --- a/python/cuml/metrics/_ranking.py +++ b/python/cuml/metrics/_ranking.py @@ -14,13 +14,15 @@ # limitations under the License. # -import typing -import cupy as cp -import numpy as np -import cuml.internals -from cuml.internals.array import CumlArray -from cuml.internals.input_utils import input_to_cupy_array import math +from cuml.internals.input_utils import input_to_cupy_array +from cuml.internals.array import CumlArray +import cuml.internals +from cuml.internals.safe_imports import cpu_only_import +import typing +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') @cuml.internals.api_return_generic(get_output_type=True) diff --git a/python/cuml/metrics/accuracy.pyx b/python/cuml/metrics/accuracy.pyx index 9022c62a90..9d1ea47b96 100644 --- a/python/cuml/metrics/accuracy.pyx +++ b/python/cuml/metrics/accuracy.pyx @@ -16,7 +16,8 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libc.stdint cimport uintptr_t diff --git a/python/cuml/metrics/cluster/adjusted_rand_index.pyx b/python/cuml/metrics/cluster/adjusted_rand_index.pyx index 4081240fd8..9858710bad 100644 --- a/python/cuml/metrics/cluster/adjusted_rand_index.pyx +++ b/python/cuml/metrics/cluster/adjusted_rand_index.pyx @@ -16,7 +16,8 @@ # distutils: language = c++ -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import warnings from libc.stdint cimport uintptr_t diff --git a/python/cuml/metrics/cluster/entropy.pyx b/python/cuml/metrics/cluster/entropy.pyx index f81da455b7..c30eeb3b6e 100644 --- a/python/cuml/metrics/cluster/entropy.pyx +++ b/python/cuml/metrics/cluster/entropy.pyx @@ -18,8 +18,10 @@ import math import typing -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from libc.stdint cimport uintptr_t diff --git a/python/cuml/metrics/cluster/silhouette_score.pyx b/python/cuml/metrics/cluster/silhouette_score.pyx index b91fe80f7a..a4b0d38c59 100644 --- a/python/cuml/metrics/cluster/silhouette_score.pyx +++ b/python/cuml/metrics/cluster/silhouette_score.pyx @@ -14,8 +14,10 @@ # limitations under the License. # -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libc.stdint cimport uintptr_t diff --git a/python/cuml/metrics/cluster/utils.pyx b/python/cuml/metrics/cluster/utils.pyx index 27fff60d6b..9e7fe09fb9 100644 --- a/python/cuml/metrics/cluster/utils.pyx +++ b/python/cuml/metrics/cluster/utils.pyx @@ -16,12 +16,13 @@ # distutils: language = c++ -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import import cuml.internals from cuml.metrics.utils import sorted_unique_labels from cuml.prims.label import make_monotonic from cuml.common import input_to_cuml_array +cp = gpu_only_import('cupy') @cuml.internals.api_return_generic(get_output_type=True) diff --git a/python/cuml/metrics/confusion_matrix.py b/python/cuml/metrics/confusion_matrix.py index 86456031ad..935a2211f1 100644 --- a/python/cuml/metrics/confusion_matrix.py +++ b/python/cuml/metrics/confusion_matrix.py @@ -14,17 +14,18 @@ # limitations under the License. # -import numpy as np -import cupy as cp -import cupyx - -import cuml.internals -from cuml.common import input_to_cuml_array -from cuml.common import using_output_type -from cuml.internals.array import CumlArray -from cuml.internals.input_utils import input_to_cupy_array -from cuml.metrics.utils import sorted_unique_labels from cuml.prims.label import make_monotonic +from cuml.metrics.utils import sorted_unique_labels +from cuml.internals.input_utils import input_to_cupy_array +from cuml.internals.array import CumlArray +from cuml.common import using_output_type +from cuml.common import input_to_cuml_array +import cuml.internals +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @cuml.internals.api_return_any() diff --git a/python/cuml/metrics/hinge_loss.pyx b/python/cuml/metrics/hinge_loss.pyx index c5279c0553..bb800f836f 100644 --- a/python/cuml/metrics/hinge_loss.pyx +++ b/python/cuml/metrics/hinge_loss.pyx @@ -14,12 +14,14 @@ # limitations under the License. # -import cudf -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import import cuml.internals from cuml.internals.input_utils import determine_array_type from cuml.preprocessing import LabelEncoder, LabelBinarizer +cp = gpu_only_import('cupy') +cudf = gpu_only_import('cudf') + @cuml.internals.api_return_any() def hinge_loss(y_true, diff --git a/python/cuml/metrics/kl_divergence.pyx b/python/cuml/metrics/kl_divergence.pyx index a59af84ff2..f9d7ba80e3 100644 --- a/python/cuml/metrics/kl_divergence.pyx +++ b/python/cuml/metrics/kl_divergence.pyx @@ -14,8 +14,10 @@ # limitations under the License. # -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import cuml.internals from cuml.internals.input_utils import determine_array_type from cuml.common import (input_to_cuml_array, CumlArray) diff --git a/python/cuml/metrics/pairwise_distances.pyx b/python/cuml/metrics/pairwise_distances.pyx index faf3652aaa..d5a63dc188 100644 --- a/python/cuml/metrics/pairwise_distances.pyx +++ b/python/cuml/metrics/pairwise_distances.pyx @@ -22,12 +22,14 @@ from libcpp cimport bool from libc.stdint cimport uintptr_t from pylibraft.common.handle cimport handle_t from pylibraft.common.handle import Handle -import cupy as cp -import numpy as np -import pandas as pd -import cudf -import scipy -import cupyx +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cudf = gpu_only_import('cudf') +scipy = cpu_only_import('scipy') +cupyx = gpu_only_import('cupyx') import cuml.internals from cuml.internals.base import _determine_stateless_output_type from cuml.common import (input_to_cuml_array, CumlArray) diff --git a/python/cuml/metrics/pairwise_kernels.py b/python/cuml/metrics/pairwise_kernels.py index 3bdb1eed9a..9ce81408f2 100644 --- a/python/cuml/metrics/pairwise_kernels.py +++ b/python/cuml/metrics/pairwise_kernels.py @@ -13,13 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import inspect -from numba import cuda -import cupy as cp -import numpy as np -import cuml.internals -from cuml.metrics import pairwise_distances from cuml.internals.input_utils import input_to_cupy_array +from cuml.metrics import pairwise_distances +import cuml.internals +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +import inspect +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def linear_kernel(X, Y): diff --git a/python/cuml/metrics/regression.pyx b/python/cuml/metrics/regression.pyx index a7eabe560b..2a6d27564a 100644 --- a/python/cuml/metrics/regression.pyx +++ b/python/cuml/metrics/regression.pyx @@ -16,8 +16,10 @@ # distutils: language = c++ -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from libc.stdint cimport uintptr_t diff --git a/python/cuml/metrics/trustworthiness.pyx b/python/cuml/metrics/trustworthiness.pyx index a7c29de573..2386da87cd 100644 --- a/python/cuml/metrics/trustworthiness.pyx +++ b/python/cuml/metrics/trustworthiness.pyx @@ -16,10 +16,12 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import warnings -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from libc.stdint cimport uintptr_t import cuml.internals diff --git a/python/cuml/metrics/utils.py b/python/cuml/metrics/utils.py index efef1eb832..4c6733fde2 100644 --- a/python/cuml/metrics/utils.py +++ b/python/cuml/metrics/utils.py @@ -14,7 +14,8 @@ # limitations under the License. # -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def sorted_unique_labels(*ys): diff --git a/python/cuml/model_selection/_split.py b/python/cuml/model_selection/_split.py index 947c63bdec..452d10906b 100644 --- a/python/cuml/model_selection/_split.py +++ b/python/cuml/model_selection/_split.py @@ -13,15 +13,18 @@ # limitations under the License. # -import cudf -import cupy as cp -import cupyx -import numpy as np - -from cuml.internals.array import array_to_memory_order -from cuml.common import input_to_cuml_array -from numba import cuda from typing import Union +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.common import input_to_cuml_array +from cuml.internals.array import array_to_memory_order +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +np = cpu_only_import('numpy') + +cuda = gpu_only_import_from('numba', 'cuda') def _stratify_split(X, stratify, labels, n_train, n_test, x_numba, y_numba, diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index 85d35356f4..74142144dc 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -13,25 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.common.kernel_utils import cuda_kernel_factory +from cuml.internals.input_utils import input_to_cuml_array, input_to_cupy_array +from cuml.prims.array import binarize +from cuml.prims.label import invert_labels +from cuml.prims.label import check_labels +from cuml.prims.label import make_monotonic +from cuml.internals.import_utils import has_scipy +from cuml.common.doc_utils import generate_docstring +from cuml.internals.mixins import ClassifierMixin +from cuml.internals.base import Base +from cuml.common.array_descriptor import CumlArrayDescriptor +from cuml.common import CumlArray import math import warnings import nvtx -import cupy as cp -import cupyx -from cuml.common import CumlArray -from cuml.common.array_descriptor import CumlArrayDescriptor -from cuml.internals.base import Base -from cuml.internals.mixins import ClassifierMixin -from cuml.common.doc_utils import generate_docstring -from cuml.internals.import_utils import has_scipy -from cuml.prims.label import make_monotonic -from cuml.prims.label import check_labels -from cuml.prims.label import invert_labels -from cuml.prims.array import binarize - -from cuml.internals.input_utils import input_to_cuml_array, input_to_cupy_array -from cuml.common.kernel_utils import cuda_kernel_factory +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') def count_features_coo_kernel(float_dtype, int_dtype): @@ -659,7 +659,7 @@ def get_param_names(self): [ "priors", "var_smoothing" - ] + ] class _BaseDiscreteNB(_BaseNB): @@ -966,7 +966,7 @@ def get_param_names(self): "alpha", "fit_prior", "class_prior" - ] + ] class MultinomialNB(_BaseDiscreteNB): @@ -1069,6 +1069,7 @@ class MultinomialNB(_BaseDiscreteNB): 0.9245... """ + def __init__(self, *, alpha=1.0, fit_prior=True, @@ -1194,6 +1195,7 @@ class BernoulliNB(_BaseDiscreteNB): V. Metsis, I. Androutsopoulos and G. Paliouras (2006). Spam filtering with naive Bayes -- Which naive Bayes? 3rd Conf. on Email and Anti-Spam (CEAS). """ + def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True, class_prior=None, output_type=None, handle=None, verbose=False): @@ -1259,7 +1261,7 @@ def get_param_names(self): return super().get_param_names() + \ [ "binarize" - ] + ] class ComplementNB(_BaseDiscreteNB): @@ -1342,6 +1344,7 @@ class ComplementNB(_BaseDiscreteNB): (Vol. 3, pp. 616-623). https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf """ + def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, norm=False, output_type=None, handle=None, verbose=False): @@ -1412,7 +1415,7 @@ def get_param_names(self): return super().get_param_names() + \ [ "norm" - ] + ] class CategoricalNB(_BaseDiscreteNB): @@ -1488,6 +1491,7 @@ class and category of the specific feature. >>> print(clf.predict(X[2:3])) [3] """ + def __init__(self, *, alpha=1.0, fit_prior=True, class_prior=None, output_type=None, handle=None, verbose=False): super(CategoricalNB, self).__init__(alpha=alpha, diff --git a/python/cuml/neighbors/kernel_density.py b/python/cuml/neighbors/kernel_density.py index bd76c24e06..a3170de0be 100644 --- a/python/cuml/neighbors/kernel_density.py +++ b/python/cuml/neighbors/kernel_density.py @@ -14,16 +14,19 @@ # limitations under the License. # -import cupy as cp -import numpy as np -import math -from numba import cuda -from cuml.internals.input_utils import input_to_cupy_array -from cuml.internals.input_utils import input_to_cuml_array -from cuml.internals.base import Base -from cuml.metrics import pairwise_distances -from cuml.internals.import_utils import has_scipy from cuml.common.exceptions import NotFittedError +from cuml.internals.import_utils import has_scipy +from cuml.metrics import pairwise_distances +from cuml.internals.base import Base +from cuml.internals.input_utils import input_to_cuml_array +from cuml.internals.input_utils import input_to_cupy_array +from cuml.internals.safe_imports import gpu_only_import_from +import math +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +cuda = gpu_only_import_from('numba', 'cuda') if has_scipy(): from scipy.special import gammainc diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index c3266654d0..e080ed018d 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -28,8 +28,10 @@ from cuml.internals.mixins import ClassifierMixin from cuml.common.doc_utils import generate_docstring from cuml.internals.mixins import FMajorInputTagMixin -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') from cython.operator cimport dereference as deref @@ -40,13 +42,14 @@ from libcpp.vector cimport vector from libcpp cimport bool from libcpp.memory cimport shared_ptr -import rmm +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free from libc.stdint cimport uintptr_t, int64_t from libc.stdlib cimport calloc, malloc, free -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') import rmm cimport cuml.common.cuda diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index 1ab278ac7a..950cbda893 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -26,7 +26,8 @@ from cuml.internals.mixins import RegressorMixin from cuml.common.doc_utils import generate_docstring from cuml.internals.mixins import FMajorInputTagMixin -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from cython.operator cimport dereference as deref @@ -38,13 +39,15 @@ from pylibraft.common.handle cimport handle_t from libcpp cimport bool from libcpp.memory cimport shared_ptr -import rmm +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libc.stdlib cimport malloc, free from libc.stdint cimport uintptr_t, int64_t from libc.stdlib cimport calloc, malloc, free -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') import rmm cimport cuml.common.cuda diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index db57131434..8b0ac5926a 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -18,9 +18,11 @@ import typing -import numpy as np -import cupy as cp -import cupyx +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') import ctypes import warnings import math @@ -55,8 +57,9 @@ from libc.stdlib cimport calloc, malloc, free from libcpp.vector cimport vector -from numba import cuda -import rmm +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') +rmm = gpu_only_import('rmm') cimport cuml.common.cuda diff --git a/python/cuml/neighbors/nearest_neighbors_mg.pyx b/python/cuml/neighbors/nearest_neighbors_mg.pyx index c89ce63978..09e5cf4bfd 100644 --- a/python/cuml/neighbors/nearest_neighbors_mg.pyx +++ b/python/cuml/neighbors/nearest_neighbors_mg.pyx @@ -17,7 +17,8 @@ # distutils: language = c++ import typing -from cudf import DataFrame as cudfDataFrame +from cuml.internals.safe_imports import gpu_only_import_from +cudfDataFrame = gpu_only_import_from('cudf', 'DataFrame') from cuml.internals.array import CumlArray from cuml.common import input_to_cuml_array from cuml.internals import api_base_return_generic_skipall diff --git a/python/cuml/preprocessing/LabelEncoder.py b/python/cuml/preprocessing/LabelEncoder.py index d24be5cc37..6d44e4abf2 100644 --- a/python/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/preprocessing/LabelEncoder.py @@ -14,13 +14,15 @@ # limitations under the License. # -import cudf -import cupy as cp -import numpy as np -from cuml import Base -from pandas import Series as pdSeries - from cuml.common.exceptions import NotFittedError +from cuml.internals.safe_imports import cpu_only_import_from +from cuml import Base +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +pdSeries = cpu_only_import_from('pandas', 'Series') class LabelEncoder(Base): diff --git a/python/cuml/preprocessing/TargetEncoder.py b/python/cuml/preprocessing/TargetEncoder.py index 15ad0cff2f..5124c6ea9b 100644 --- a/python/cuml/preprocessing/TargetEncoder.py +++ b/python/cuml/preprocessing/TargetEncoder.py @@ -14,12 +14,14 @@ # limitations under the License. # -import cudf -import pandas -import cupy as cp -import numpy as np -from cuml.common.exceptions import NotFittedError import warnings +from cuml.common.exceptions import NotFittedError +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +pandas = cpu_only_import('pandas') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def get_stat_func(stat): @@ -88,6 +90,7 @@ class TargetEncoder: >>> print(test_encoded) [1. 0.75 0.5 1. ] """ + def __init__(self, n_folds=4, smooth=0, seed=42, split_method='interleaved', output_type='auto', stat='mean'): diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 582dfa6e13..65abdfe77f 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -12,18 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np -import cupy as cp -import cupyx -from cuml.common.exceptions import NotFittedError - -from cuml import Base -from cuml.preprocessing import LabelEncoder -from cudf import DataFrame, Series -from cudf import GenericIndex +import warnings import cuml.internals.logger as logger +from cuml.internals.safe_imports import gpu_only_import_from +from cudf import DataFrame, Series +from cuml.preprocessing import LabelEncoder +from cuml import Base +from cuml.common.exceptions import NotFittedError +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') -import warnings +GenericIndex = gpu_only_import_from('cudf', 'GenericIndex') class OneHotEncoder(Base): diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index f0d5bb79e9..42e43a10c3 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -13,15 +13,15 @@ # limitations under the License. # -import cupy as cp -import cupyx - -from cuml import Base -import cuml.internals -from cuml.common import CumlArray, has_scipy -from cuml.common.array_descriptor import CumlArrayDescriptor -from cuml.internals.array_sparse import SparseCumlArray from cuml.prims.label import check_labels, invert_labels, make_monotonic +from cuml.internals.array_sparse import SparseCumlArray +from cuml.common.array_descriptor import CumlArrayDescriptor +from cuml.common import CumlArray, has_scipy +import cuml.internals +from cuml import Base +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @cuml.internals.api_return_sparse_array() diff --git a/python/cuml/preprocessing/onehotencoder_mg.py b/python/cuml/preprocessing/onehotencoder_mg.py index 7891a9ea74..05aeae1272 100644 --- a/python/cuml/preprocessing/onehotencoder_mg.py +++ b/python/cuml/preprocessing/onehotencoder_mg.py @@ -14,11 +14,13 @@ # limitations under the License. # +from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import gpu_only_import_from from cuml.preprocessing.encoders import OneHotEncoder import dask -import cupy as cp -from cudf import DataFrame -from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +DataFrame = gpu_only_import_from('cudf', 'DataFrame') class OneHotEncoderMG(OneHotEncoder): diff --git a/python/cuml/preprocessing/text/stem/porter_stemmer.py b/python/cuml/preprocessing/text/stem/porter_stemmer.py index a146e92118..30f1a223ec 100644 --- a/python/cuml/preprocessing/text/stem/porter_stemmer.py +++ b/python/cuml/preprocessing/text/stem/porter_stemmer.py @@ -14,12 +14,18 @@ # limitations under the License. # -import cudf -import cupy as cp -from .porter_stemmer_utils.suffix_utils import ( - get_stem_series, - get_str_replacement_series, - replace_suffix, +from .porter_stemmer_utils.measure_utils import ( + has_positive_measure, + measure_gt_n, + measure_eq_n, +) +from .porter_stemmer_utils.len_flags_utils import ( + len_eq_n, + len_gt_n, +) +from .porter_stemmer_utils.consonant_vowel_utils import ( + contains_vowel, + is_consonant, ) from .porter_stemmer_utils.porter_stemmer_rules import ( ends_with_suffix, @@ -28,19 +34,14 @@ last_char_in, ends_cvc, ) -from .porter_stemmer_utils.consonant_vowel_utils import ( - contains_vowel, - is_consonant, -) -from .porter_stemmer_utils.len_flags_utils import ( - len_eq_n, - len_gt_n, -) -from .porter_stemmer_utils.measure_utils import ( - has_positive_measure, - measure_gt_n, - measure_eq_n, +from .porter_stemmer_utils.suffix_utils import ( + get_stem_series, + get_str_replacement_series, + replace_suffix, ) +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') # Implementation based on nltk//stem/porter.html diff --git a/python/cuml/preprocessing/text/stem/porter_stemmer_utils/porter_stemmer_rules.py b/python/cuml/preprocessing/text/stem/porter_stemmer_utils/porter_stemmer_rules.py index db901c5e69..e0868d930d 100644 --- a/python/cuml/preprocessing/text/stem/porter_stemmer_utils/porter_stemmer_rules.py +++ b/python/cuml/preprocessing/text/stem/porter_stemmer_utils/porter_stemmer_rules.py @@ -16,7 +16,8 @@ from .consonant_vowel_utils import is_vowel, is_consonant from .len_flags_utils import len_gt_n, len_eq_n -import cudf +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') def ends_with_double_constant(string_ser): diff --git a/python/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py b/python/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py index 66a53ea2c3..8beee6b412 100644 --- a/python/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py +++ b/python/cuml/preprocessing/text/stem/porter_stemmer_utils/suffix_utils.py @@ -14,10 +14,13 @@ # limitations under the License. # -from numba import cuda -import cudf -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') def get_str_replacement_series(replacement, bool_mask): diff --git a/python/cuml/prims/array.py b/python/cuml/prims/array.py index db74253751..b369218919 100644 --- a/python/cuml/prims/array.py +++ b/python/cuml/prims/array.py @@ -14,10 +14,10 @@ # limitations under the License. # -import cupy as cp -import math - from cuml.common.kernel_utils import cuda_kernel_factory +import math +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def _binarize_kernel(x_dtype): diff --git a/python/cuml/prims/label/classlabels.py b/python/cuml/prims/label/classlabels.py index 92158c43ca..ae6ee1099e 100644 --- a/python/cuml/prims/label/classlabels.py +++ b/python/cuml/prims/label/classlabels.py @@ -14,15 +14,14 @@ # limitations under the License. # -import typing -import cupy as cp -import math -from cuml.internals.input_utils import input_to_cupy_array - -import cuml.internals -from cuml.internals.array import CumlArray - from cuml.common.kernel_utils import cuda_kernel_factory +from cuml.internals.array import CumlArray +import cuml.internals +from cuml.internals.input_utils import input_to_cupy_array +import math +import typing +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') map_kernel_str = r''' diff --git a/python/cuml/prims/stats/covariance.py b/python/cuml/prims/stats/covariance.py index 10b53d5456..84cec03255 100644 --- a/python/cuml/prims/stats/covariance.py +++ b/python/cuml/prims/stats/covariance.py @@ -14,12 +14,13 @@ # limitations under the License. # -import cupy as cp -import cupyx +from cuml.common.kernel_utils import cuda_kernel_factory +import cuml.internals import math +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') -import cuml.internals -from cuml.common.kernel_utils import cuda_kernel_factory cov_kernel_str = r''' ({0} *cov_values, {0} *gram_matrix, {0} *mean_x, {0} *mean_y, int n_cols) { diff --git a/python/cuml/random_projection/random_projection.pyx b/python/cuml/random_projection/random_projection.pyx index d43889705a..7a67e22b1c 100644 --- a/python/cuml/random_projection/random_projection.pyx +++ b/python/cuml/random_projection/random_projection.pyx @@ -16,7 +16,8 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libc.stdint cimport uintptr_t from libcpp cimport bool diff --git a/python/cuml/solvers/cd.pyx b/python/cuml/solvers/cd.pyx index 88de331134..1567a61c82 100644 --- a/python/cuml/solvers/cd.pyx +++ b/python/cuml/solvers/cd.pyx @@ -16,9 +16,11 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/solvers/cd_mg.pyx b/python/cuml/solvers/cd_mg.pyx index 8342e0a610..e8c20f94cd 100644 --- a/python/cuml/solvers/cd_mg.pyx +++ b/python/cuml/solvers/cd_mg.pyx @@ -16,8 +16,10 @@ # distutils: language = c++ import ctypes -import numpy as np -import rmm +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +rmm = gpu_only_import('rmm') from libcpp cimport bool from libc.stdint cimport uintptr_t, uint32_t, uint64_t diff --git a/python/cuml/solvers/qn.pyx b/python/cuml/solvers/qn.pyx index 478db03883..8b334fdc29 100644 --- a/python/cuml/solvers/qn.pyx +++ b/python/cuml/solvers/qn.pyx @@ -15,8 +15,10 @@ # distutils: language = c++ -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index b7e084eb8e..7092447852 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -18,10 +18,13 @@ import typing import ctypes -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cuml/svm/linear.pyx b/python/cuml/svm/linear.pyx index d23d4b9271..0907660974 100644 --- a/python/cuml/svm/linear.pyx +++ b/python/cuml/svm/linear.pyx @@ -18,7 +18,8 @@ import re import inspect import typing -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import cuml from rmm._lib.cuda_stream_view cimport cuda_stream_view diff --git a/python/cuml/svm/svc.pyx b/python/cuml/svm/svc.pyx index 638160642b..172670f2d2 100644 --- a/python/cuml/svm/svc.pyx +++ b/python/cuml/svm/svc.pyx @@ -18,11 +18,15 @@ import typing import ctypes -import cudf -import cupy as cp -import numpy as np - -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') + +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from cython.operator cimport dereference as deref from libc.stdint cimport uintptr_t diff --git a/python/cuml/svm/svm_base.pyx b/python/cuml/svm/svm_base.pyx index 5c21fd5824..dc3564deb0 100644 --- a/python/cuml/svm/svm_base.pyx +++ b/python/cuml/svm/svm_base.pyx @@ -16,10 +16,13 @@ # distutils: language = c++ import ctypes -import cupy -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from cython.operator cimport dereference as deref from libc.stdint cimport uintptr_t diff --git a/python/cuml/svm/svr.pyx b/python/cuml/svm/svr.pyx index 4dd9d18603..02af929807 100644 --- a/python/cuml/svm/svr.pyx +++ b/python/cuml/svm/svr.pyx @@ -16,10 +16,13 @@ # distutils: language = c++ import ctypes -import cupy -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from numba import cuda +from cuml.internals.safe_imports import gpu_only_import_from +cuda = gpu_only_import_from('numba', 'cuda') from cython.operator cimport dereference as deref from libc.stdint cimport uintptr_t diff --git a/python/cuml/testing/dask/utils.py b/python/cuml/testing/dask/utils.py index e4e1c37cec..ab115733ab 100644 --- a/python/cuml/testing/dask/utils.py +++ b/python/cuml/testing/dask/utils.py @@ -1,14 +1,11 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. -import cupy as cp - -import dask - -from sklearn.feature_extraction.text import HashingVectorizer - -from cuml.dask.common import to_sparse_dask_array - from sklearn.datasets import fetch_20newsgroups +from cuml.dask.common import to_sparse_dask_array +from sklearn.feature_extraction.text import HashingVectorizer +import dask +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def load_text_corpus(client): diff --git a/python/cuml/testing/strategies.py b/python/cuml/testing/strategies.py index c3c745a1d5..aece829174 100644 --- a/python/cuml/testing/strategies.py +++ b/python/cuml/testing/strategies.py @@ -12,11 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cudf -import cupy as cp -import numpy as np -from hypothesis import assume -from hypothesis.extra.numpy import array_shapes, arrays, floating_dtypes +from cuml.internals.array import CumlArray +from sklearn.model_selection import train_test_split +from sklearn.datasets import make_regression from hypothesis.strategies import ( composite, integers, @@ -25,10 +23,14 @@ one_of, sampled_from, ) -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split +from hypothesis.extra.numpy import array_shapes, arrays, floating_dtypes +from hypothesis import assume +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from cuml.internals.array import CumlArray _CUML_ARRAY_INPUT_TYPES = ["numpy", "cupy", "series"] diff --git a/python/cuml/testing/test_preproc_utils.py b/python/cuml/testing/test_preproc_utils.py index 7757bb1360..31aa3de6d8 100644 --- a/python/cuml/testing/test_preproc_utils.py +++ b/python/cuml/testing/test_preproc_utils.py @@ -13,22 +13,26 @@ # limitations under the License. # +from cuml.common import input_to_cuml_array +from scipy.sparse import coo_matrix as cpu_coo_matrix +from scipy.sparse import csc_matrix as cpu_csc_matrix +from cupyx.scipy.sparse import coo_matrix as gpu_coo_matrix +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import import pytest from cuml.datasets import make_classification, make_blobs -from numpy.testing import assert_allclose as np_assert_allclose - -import numpy as np -import cupy as cp -import cupyx.scipy.sparse as gpu_sparse -import scipy.sparse as cpu_sparse -from cupyx.scipy.sparse import csr_matrix as gpu_csr_matrix -from cupyx.scipy.sparse import csc_matrix as gpu_csc_matrix -from cupyx.scipy.sparse import coo_matrix as gpu_coo_matrix -from scipy.sparse import csr_matrix as cpu_csr_matrix -from scipy.sparse import csc_matrix as cpu_csc_matrix -from scipy.sparse import coo_matrix as cpu_coo_matrix -from cuml.common import input_to_cuml_array +from cuml.internals.safe_imports import cpu_only_import_from +np_assert_allclose = cpu_only_import_from('numpy.testing', 'assert_allclose') + +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +gpu_sparse = gpu_only_import('cupyx.scipy.sparse') +cpu_sparse = cpu_only_import('scipy.sparse') +gpu_csr_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csr_matrix') +gpu_csc_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csc_matrix') +cpu_csr_matrix = cpu_only_import_from('scipy.sparse', 'csr_matrix') def to_output_type(array, output_type, order='F'): diff --git a/python/cuml/testing/utils.py b/python/cuml/testing/utils.py index a9265a1546..1b91f7aca2 100644 --- a/python/cuml/testing/utils.py +++ b/python/cuml/testing/utils.py @@ -11,34 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import pytest +from cuml.internals.mem_type import MemoryType +from cuml.internals.input_utils import input_to_cuml_array, is_array_like +from cuml.internals.base import Base +import cuml +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error, brier_score_loss +from sklearn.datasets import make_classification, make_regression +from sklearn import datasets +from pylibraft.common.cuda import Stream +from sklearn.datasets import make_regression as skl_make_reg +from numba.cuda.cudadrv.devicearray import DeviceNDArray +from numbers import Number +from cuml.internals.safe_imports import gpu_only_import_from +from itertools import dropwhile +from copy import deepcopy +from cuml.internals.safe_imports import cpu_only_import import inspect from textwrap import dedent, indent -import cupy as cp -import numpy as np -import pandas as pd -from copy import deepcopy -from itertools import dropwhile +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') -from numba import cuda -from numbers import Number -from numba.cuda.cudadrv.devicearray import DeviceNDArray - -from sklearn.datasets import make_regression as skl_make_reg +cuda = gpu_only_import_from('numba', 'cuda') -from pylibraft.common.cuda import Stream - -from sklearn import datasets -from sklearn.datasets import make_classification, make_regression -from sklearn.metrics import mean_squared_error, brier_score_loss -from sklearn.model_selection import train_test_split -import cudf -import cuml -from cuml.internals.base import Base -from cuml.internals.input_utils import input_to_cuml_array, is_array_like -from cuml.internals.mem_type import MemoryType -import pytest +cudf = gpu_only_import('cudf') def array_difference(a, b, with_sign=True): @@ -104,7 +105,7 @@ def _repr(self, threshold=None): f"total_tol={self.total_tol} ", f"with_sign={self.with_sign}", ">" - ] + ] def __repr__(self): return "".join(self._repr(threshold=5)) @@ -353,6 +354,7 @@ class ClassEnumerator: Instructs the class to recursively search submodules when True, otherwise only classes in the specified model will be enumerated """ + def __init__(self, module, exclude_classes=None, diff --git a/python/cuml/tests/conftest.py b/python/cuml/tests/conftest.py index c5087dffee..123971c02c 100644 --- a/python/cuml/tests/conftest.py +++ b/python/cuml/tests/conftest.py @@ -14,21 +14,22 @@ # limitations under the License. # +from cuml.testing.utils import create_synthetic_dataset +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.datasets import make_regression as skl_make_reg +from sklearn.datasets import make_classification as skl_make_clas +from sklearn.datasets import fetch_california_housing +from sklearn.datasets import fetch_20newsgroups +from math import ceil +import hypothesis +from cuml.internals.safe_imports import gpu_only_import import pytest import os import subprocess -import numpy as np -import cupy as cp -import hypothesis - -from math import ceil -from sklearn.datasets import fetch_20newsgroups -from sklearn.datasets import fetch_california_housing -from sklearn.datasets import make_classification as skl_make_clas -from sklearn.datasets import make_regression as skl_make_reg -from sklearn.feature_extraction.text import CountVectorizer -from cuml.testing.utils import create_synthetic_dataset +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') # Add the import here for any plugins that should be loaded EVERY TIME diff --git a/python/cuml/tests/dask/test_dask_arr_utils.py b/python/cuml/tests/dask/test_dask_arr_utils.py index df189aaf7e..325c059974 100644 --- a/python/cuml/tests/dask/test_dask_arr_utils.py +++ b/python/cuml/tests/dask/test_dask_arr_utils.py @@ -13,19 +13,18 @@ # limitations under the License. # +from cuml.dask.common.part_utils import _extract_partitions +import dask +from cuml.dask.common.dask_arr_utils import validate_dask_array import pytest from cuml.testing.utils import array_equal -import dask_cudf -import cudf -import cupy as cp -import cupyx - - -from cuml.dask.common.dask_arr_utils import validate_dask_array -import dask -from cuml.dask.common.part_utils import _extract_partitions +from cuml.internals.safe_imports import gpu_only_import +dask_cudf = gpu_only_import('dask_cudf') +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @pytest.mark.parametrize("input_type", ["dask_array", diff --git a/python/cuml/tests/dask/test_dask_base.py b/python/cuml/tests/dask/test_dask_base.py index b37541711f..d98918d368 100644 --- a/python/cuml/tests/dask/test_dask_base.py +++ b/python/cuml/tests/dask/test_dask_base.py @@ -13,26 +13,25 @@ # limitations under the License. # -import cupy - -from dask_ml.wrappers import ParallelPostFit - -from cuml.dask.cluster import KMeans -from cuml.dask.naive_bayes.naive_bayes import MultinomialNB -from cuml.testing.dask.utils import load_text_corpus - -from cuml.dask.datasets import make_blobs - -import cuml -import numpy as np +from sklearn.model_selection import train_test_split +from cuml.dask.datasets import make_regression +from cuml.dask.linear_model import LinearRegression +from cuml.internals.safe_imports import cpu_only_import_from import pytest +from cuml.internals.safe_imports import cpu_only_import +import cuml +from cuml.dask.datasets import make_blobs +from cuml.testing.dask.utils import load_text_corpus +from cuml.dask.naive_bayes.naive_bayes import MultinomialNB +from cuml.dask.cluster import KMeans +from dask_ml.wrappers import ParallelPostFit +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') -from numpy.testing import assert_equal -from cuml.dask.linear_model import LinearRegression +np = cpu_only_import('numpy') -from cuml.dask.datasets import make_regression -from sklearn.model_selection import train_test_split +assert_equal = cpu_only_import_from('numpy.testing', 'assert_equal') def make_dataset(datatype, nrows, ncols, n_info): diff --git a/python/cuml/tests/dask/test_dask_coordinate_descent.py b/python/cuml/tests/dask/test_dask_coordinate_descent.py index c9f24ef2d2..bd97a9f135 100644 --- a/python/cuml/tests/dask/test_dask_coordinate_descent.py +++ b/python/cuml/tests/dask/test_dask_coordinate_descent.py @@ -21,7 +21,8 @@ from cuml.metrics import r2_score from cuml.testing.utils import unit_param, quality_param, stress_param -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_datasets.py b/python/cuml/tests/dask/test_dask_datasets.py index fe73948b03..af7520e5b6 100644 --- a/python/cuml/tests/dask/test_dask_datasets.py +++ b/python/cuml/tests/dask/test_dask_datasets.py @@ -14,18 +14,17 @@ # limitations under the License. # +from cuml.dask.common.part_utils import _extract_partitions +from cuml.testing.utils import unit_param, quality_param, stress_param +from cuml.dask.common.input_utils import DistributedDataHandler +from cuml.dask.datasets.blobs import make_blobs +from cuml.internals.safe_imports import gpu_only_import import pytest import dask.array as da -import numpy as np -import cupy as cp - -from cuml.dask.datasets.blobs import make_blobs -from cuml.dask.common.input_utils import DistributedDataHandler - -from cuml.testing.utils import unit_param, quality_param, stress_param - -from cuml.dask.common.part_utils import _extract_partitions +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.parametrize('nrows', [unit_param(1e3), quality_param(1e5), @@ -118,7 +117,7 @@ def test_make_regression(n_samples, n_features, n_informative, if n_targets > 1: assert values.shape == (n_samples, n_targets), \ - "values shape mismatch" + "values shape mismatch" else: assert values.shape == (n_samples,), "values shape mismatch" @@ -128,7 +127,7 @@ def test_make_regression(n_samples, n_features, n_informative, if coef: if n_targets > 1: assert coefs.shape == (n_features, n_targets), \ - "coefs shape mismatch" + "coefs shape mismatch" assert len(coefs.chunks[1]) == 1 else: assert coefs.shape == (n_features,), "coefs shape mismatch" diff --git a/python/cuml/tests/dask/test_dask_dbscan.py b/python/cuml/tests/dask/test_dask_dbscan.py index 9a3c30ec4a..db82d3869f 100644 --- a/python/cuml/tests/dask/test_dask_dbscan.py +++ b/python/cuml/tests/dask/test_dask_dbscan.py @@ -13,16 +13,15 @@ # limitations under the License. # -import numpy as np -import pytest - +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import pairwise_distances +from sklearn.datasets import make_blobs +from sklearn.cluster import DBSCAN as skDBSCAN from cuml.testing.utils import get_pattern, unit_param, \ quality_param, stress_param, array_equal, assert_dbscan_equal - -from sklearn.cluster import DBSCAN as skDBSCAN -from sklearn.datasets import make_blobs -from sklearn.metrics import pairwise_distances -from sklearn.preprocessing import StandardScaler +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @pytest.mark.mg @@ -112,9 +111,9 @@ def test_dbscan_precomputed(datatype, nrows, max_mbytes_per_batch, out_dtype, @pytest.mark.mg @pytest.mark.parametrize("name", [ - 'noisy_moons', - 'blobs', - 'no_structure']) + 'noisy_moons', + 'blobs', + 'no_structure']) @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(500000)]) # Vary the eps to get a range of core point counts @@ -159,9 +158,9 @@ def test_dbscan_sklearn_comparison(name, nrows, eps, client): @pytest.mark.mg @pytest.mark.parametrize("name", [ - 'noisy_moons', - 'blobs', - 'no_structure']) + 'noisy_moons', + 'blobs', + 'no_structure']) def test_dbscan_default(name, client): from cuml.dask.cluster.dbscan import DBSCAN as cuDBSCAN diff --git a/python/cuml/tests/dask/test_dask_doctest.py b/python/cuml/tests/dask/test_dask_doctest.py index 456e405842..b8552b2c0d 100644 --- a/python/cuml/tests/dask/test_dask_doctest.py +++ b/python/cuml/tests/dask/test_dask_doctest.py @@ -13,6 +13,8 @@ # limitations under the License. # +import pytest +from cuml.internals.safe_imports import cpu_only_import import contextlib import doctest import inspect @@ -20,10 +22,10 @@ import cuml import cuml.dask -import dask_cudf -import numpy as np -import pytest -import cudf +from cuml.internals.safe_imports import gpu_only_import +dask_cudf = gpu_only_import('dask_cudf') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') def _name_in_all(parent, name): diff --git a/python/cuml/tests/dask/test_dask_input_utils.py b/python/cuml/tests/dask/test_dask_input_utils.py index c8742c661e..3ba3081cdf 100644 --- a/python/cuml/tests/dask/test_dask_input_utils.py +++ b/python/cuml/tests/dask/test_dask_input_utils.py @@ -14,12 +14,13 @@ # limitations under the License. # +from cuml.dask.common.dask_arr_utils import to_dask_cudf import pytest from cuml.dask.datasets.blobs import make_blobs from cuml.dask.common.input_utils import DistributedDataHandler import dask.array as da -import cupy as cp -from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') @pytest.mark.mg @@ -87,7 +88,7 @@ def test_extract_partitions_shape(nrows, ncols, n_parts, input_type, parts = [part.result() for worker, part in ddh.gpu_futures] for i in range(len(parts)): assert (parts[i][0].shape[0] == X_len_parts[i]) and ( - parts[i][1].shape[0] == y_len_parts[i]) + parts[i][1].shape[0] == y_len_parts[i]) else: ddh = DistributedDataHandler.create(X, client) parts = [part.result() for worker, part in ddh.gpu_futures] diff --git a/python/cuml/tests/dask/test_dask_kmeans.py b/python/cuml/tests/dask/test_dask_kmeans.py index 521da77321..42919829d3 100644 --- a/python/cuml/tests/dask/test_dask_kmeans.py +++ b/python/cuml/tests/dask/test_dask_kmeans.py @@ -13,19 +13,16 @@ # limitations under the License. # -import cupy as cp -import pytest - -from cuml.testing.utils import unit_param -from cuml.testing.utils import quality_param -from cuml.testing.utils import stress_param - -import dask.array as da - -from cuml.metrics import adjusted_rand_score -from sklearn.metrics import adjusted_rand_score as sk_adjusted_rand_score - from cuml.dask.common.dask_arr_utils import to_dask_cudf +from sklearn.metrics import adjusted_rand_score as sk_adjusted_rand_score +from cuml.metrics import adjusted_rand_score +import dask.array as da +from cuml.testing.utils import stress_param +from cuml.testing.utils import quality_param +from cuml.testing.utils import unit_param +import pytest +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_kneighbors_classifier.py b/python/cuml/tests/dask/test_dask_kneighbors_classifier.py index 62d68b8e8d..f240e6eac3 100644 --- a/python/cuml/tests/dask/test_dask_kneighbors_classifier.py +++ b/python/cuml/tests/dask/test_dask_kneighbors_classifier.py @@ -14,10 +14,12 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import import pytest from cuml.testing.utils import unit_param, \ - quality_param, \ - stress_param + quality_param, \ + stress_param from cuml.neighbors import KNeighborsClassifier as lKNNClf from cuml.dask.neighbors import KNeighborsClassifier as dKNNClf @@ -29,9 +31,10 @@ import dask.array as da import dask.dataframe as dd from cuml.dask.common.dask_arr_utils import to_dask_cudf -from cudf import DataFrame -import numpy as np -import cudf +from cuml.internals.safe_imports import gpu_only_import_from +DataFrame = gpu_only_import_from('cudf', 'DataFrame') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') def generate_dask_array(np_array, n_parts): diff --git a/python/cuml/tests/dask/test_dask_kneighbors_regressor.py b/python/cuml/tests/dask/test_dask_kneighbors_regressor.py index 5c76c6b2c1..9aa41fe3fa 100644 --- a/python/cuml/tests/dask/test_dask_kneighbors_regressor.py +++ b/python/cuml/tests/dask/test_dask_kneighbors_regressor.py @@ -14,10 +14,12 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import import pytest from cuml.testing.utils import unit_param, \ - quality_param, \ - stress_param + quality_param, \ + stress_param from cuml.neighbors import KNeighborsRegressor as lKNNReg from cuml.dask.neighbors import KNeighborsRegressor as dKNNReg @@ -30,9 +32,10 @@ import dask.array as da import dask.dataframe as dd from cuml.dask.common.dask_arr_utils import to_dask_cudf -from cudf.core.dataframe import DataFrame -import numpy as np -import cudf +from cuml.internals.safe_imports import gpu_only_import_from +DataFrame = gpu_only_import_from('cudf.core.dataframe', 'DataFrame') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') def generate_dask_array(np_array, n_parts): diff --git a/python/cuml/tests/dask/test_dask_label_binarizer.py b/python/cuml/tests/dask/test_dask_label_binarizer.py index 59251ea16d..6bd87fc29a 100644 --- a/python/cuml/tests/dask/test_dask_label_binarizer.py +++ b/python/cuml/tests/dask/test_dask_label_binarizer.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.dask.preprocessing import LabelBinarizer from cuml.testing.utils import array_equal import dask -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.parametrize( diff --git a/python/cuml/tests/dask/test_dask_label_encoder.py b/python/cuml/tests/dask/test_dask_label_encoder.py index 7689dc897d..c1145a2d58 100644 --- a/python/cuml/tests/dask/test_dask_label_encoder.py +++ b/python/cuml/tests/dask/test_dask_label_encoder.py @@ -11,14 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from cuml.common.exceptions import NotFittedError +import pytest +from cuml.internals.safe_imports import cpu_only_import import cuml from cuml.dask.preprocessing.LabelEncoder import LabelEncoder -import cudf -import numpy as np -import dask_cudf -import pytest -from cuml.common.exceptions import NotFittedError -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') +dask_cudf = gpu_only_import('dask_cudf') +cp = gpu_only_import('cupy') def _arr_to_similarity_mat(arr): @@ -89,23 +91,23 @@ def test_labelencoder_unfitted(client): @pytest.mark.parametrize("use_fit_transform", [False, True]) @pytest.mark.parametrize( - "orig_label, ord_label, expected_reverted, bad_ord_label", - [(cudf.Series(['a', 'b', 'c']), - cudf.Series([2, 1, 2, 0]), - cudf.Series(['c', 'b', 'c', 'a']), - cudf.Series([-1, 1, 2, 0])), - (cudf.Series(['Tokyo', 'Paris', 'Austin']), - cudf.Series([0, 2, 0]), - cudf.Series(['Austin', 'Tokyo', 'Austin']), - cudf.Series([0, 1, 2, 3])), - (cudf.Series(['a', 'b', 'c1']), - cudf.Series([2, 1]), - cudf.Series(['c1', 'b']), - cudf.Series([0, 1, 2, 3])), - (cudf.Series(['1.09', '0.09', '.09', '09']), - cudf.Series([0, 1, 2, 3]), - cudf.Series(['.09', '0.09', '09', '1.09']), - cudf.Series([0, 1, 2, 3, 4]))]) + "orig_label, ord_label, expected_reverted, bad_ord_label", + [(cudf.Series(['a', 'b', 'c']), + cudf.Series([2, 1, 2, 0]), + cudf.Series(['c', 'b', 'c', 'a']), + cudf.Series([-1, 1, 2, 0])), + (cudf.Series(['Tokyo', 'Paris', 'Austin']), + cudf.Series([0, 2, 0]), + cudf.Series(['Austin', 'Tokyo', 'Austin']), + cudf.Series([0, 1, 2, 3])), + (cudf.Series(['a', 'b', 'c1']), + cudf.Series([2, 1]), + cudf.Series(['c1', 'b']), + cudf.Series([0, 1, 2, 3])), + (cudf.Series(['1.09', '0.09', '.09', '09']), + cudf.Series([0, 1, 2, 3]), + cudf.Series(['.09', '0.09', '09', '1.09']), + cudf.Series([0, 1, 2, 3, 4]))]) def test_inverse_transform(orig_label, ord_label, expected_reverted, bad_ord_label, use_fit_transform, client): diff --git a/python/cuml/tests/dask/test_dask_linear_regression.py b/python/cuml/tests/dask/test_dask_linear_regression.py index 880e1da4d4..c44b2726a9 100644 --- a/python/cuml/tests/dask/test_dask_linear_regression.py +++ b/python/cuml/tests/dask/test_dask_linear_regression.py @@ -13,14 +13,16 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.dask.common import utils as dask_utils from sklearn.metrics import mean_squared_error from sklearn.datasets import make_regression -import pandas as pd -import numpy as np -import dask_cudf -import cudf +from cuml.internals.safe_imports import cpu_only_import +pd = cpu_only_import('pandas') +np = cpu_only_import('numpy') +dask_cudf = gpu_only_import('dask_cudf') +cudf = gpu_only_import('cudf') pytestmark = pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_logistic_regression.py b/python/cuml/tests/dask/test_dask_logistic_regression.py index d317ab6b90..f6c123630c 100644 --- a/python/cuml/tests/dask/test_dask_logistic_regression.py +++ b/python/cuml/tests/dask/test_dask_logistic_regression.py @@ -13,16 +13,18 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.dask.common import utils as dask_utils from sklearn.metrics import accuracy_score from sklearn.datasets import make_classification from sklearn.linear_model import LogisticRegression as skLR -import pandas as pd -import numpy as np -import cupy as cp -import dask_cudf -import cudf +from cuml.internals.safe_imports import cpu_only_import +pd = cpu_only_import('pandas') +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +dask_cudf = gpu_only_import('dask_cudf') +cudf = gpu_only_import('cudf') pytestmark = pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_metrics.py b/python/cuml/tests/dask/test_dask_metrics.py index 1b385496c8..ef6e5a6cc3 100644 --- a/python/cuml/tests/dask/test_dask_metrics.py +++ b/python/cuml/tests/dask/test_dask_metrics.py @@ -13,15 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import dask.array as da +from cuml.dask.metrics import confusion_matrix +from cuml.testing.utils import stress_param, generate_random_labels +from sklearn.metrics import confusion_matrix as sk_confusion_matrix +import pytest +from cuml.internals.safe_imports import gpu_only_import from itertools import chain, permutations -import numpy as np -import cupy as cp -import pytest -from sklearn.metrics import confusion_matrix as sk_confusion_matrix -from cuml.testing.utils import stress_param, generate_random_labels -from cuml.dask.metrics import confusion_matrix -import dask.array as da +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_naive_bayes.py b/python/cuml/tests/dask/test_dask_naive_bayes.py index 6d117f0af8..836bf4de72 100644 --- a/python/cuml/tests/dask/test_dask_naive_bayes.py +++ b/python/cuml/tests/dask/test_dask_naive_bayes.py @@ -14,15 +14,14 @@ # limitations under the License. # -import cupy as cp -import dask.array - -import pytest - -from cuml.dask.naive_bayes import MultinomialNB -from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB -from cuml.testing.dask.utils import load_text_corpus from sklearn.metrics import accuracy_score +from cuml.testing.dask.utils import load_text_corpus +from cuml.naive_bayes.naive_bayes import MultinomialNB as SGNB +from cuml.dask.naive_bayes import MultinomialNB +import pytest +import dask.array +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') def test_basic_fit_predict(client): diff --git a/python/cuml/tests/dask/test_dask_nearest_neighbors.py b/python/cuml/tests/dask/test_dask_nearest_neighbors.py index 538d0326c7..ed10d2868d 100644 --- a/python/cuml/tests/dask/test_dask_nearest_neighbors.py +++ b/python/cuml/tests/dask/test_dask_nearest_neighbors.py @@ -13,24 +13,21 @@ # limitations under the License. # -import pytest - -import cudf -import dask_cudf - -import pandas as pd - -import numpy as np - -from cuml.common import has_scipy - +from cuml.testing.utils import array_equal +from sklearn.neighbors import KNeighborsClassifier +from cuml.testing.utils import unit_param, quality_param, stress_param from cuml.dask.common import utils as dask_utils +from cuml.common import has_scipy +from cuml.internals.safe_imports import cpu_only_import +import pytest -from cuml.testing.utils import unit_param, quality_param, stress_param +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +dask_cudf = gpu_only_import('dask_cudf') -from sklearn.neighbors import KNeighborsClassifier +pd = cpu_only_import('pandas') -from cuml.testing.utils import array_equal +np = cpu_only_import('numpy') def predict(neigh_ind, _y, n_neighbors): diff --git a/python/cuml/tests/dask/test_dask_one_hot_encoder.py b/python/cuml/tests/dask/test_dask_one_hot_encoder.py index 4ed0d392b7..b31e4d6c1e 100644 --- a/python/cuml/tests/dask/test_dask_one_hot_encoder.py +++ b/python/cuml/tests/dask/test_dask_one_hot_encoder.py @@ -12,18 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import dask_cudf -import pytest -from cudf import DataFrame, Series -import cupy as cp -import numpy as np -import dask.array as da -from cuml.dask.preprocessing import OneHotEncoder +from cuml.internals.safe_imports import cpu_only_import_from +from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder from cuml.testing.utils import \ stress_param, \ generate_inputs_from_categories, assert_inverse_equal, from_df_to_numpy -from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder -from pandas.testing import assert_frame_equal +from cuml.dask.preprocessing import OneHotEncoder +import dask.array as da +from cuml.internals.safe_imports import cpu_only_import +from cudf import DataFrame, Series +import pytest +from cuml.internals.safe_imports import gpu_only_import +dask_cudf = gpu_only_import('dask_cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +assert_frame_equal = cpu_only_import_from( + 'pandas.testing', 'assert_frame_equal') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_pca.py b/python/cuml/tests/dask/test_dask_pca.py index 63c6ca968d..31634e407a 100644 --- a/python/cuml/tests/dask/test_dask_pca.py +++ b/python/cuml/tests/dask/test_dask_pca.py @@ -13,11 +13,12 @@ # limitations under the License. # -import pytest -import numpy as np -import cupy as cp - from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_random_forest.py b/python/cuml/tests/dask/test_dask_random_forest.py index df7ed87424..9446adf545 100644 --- a/python/cuml/tests/dask/test_dask_random_forest.py +++ b/python/cuml/tests/dask/test_dask_random_forest.py @@ -30,29 +30,27 @@ # limitations under the License. # -import cudf -import cupy as cp -import dask_cudf -import pytest -import json - -import numpy as np -import pandas as pd - -from cuml.dask.ensemble import RandomForestClassifier as cuRFC_mg -from cuml.dask.ensemble import RandomForestRegressor as cuRFR_mg -from cuml.dask.common import utils as dask_utils - -from cuml.ensemble import RandomForestClassifier as cuRFC_sg -from cuml.ensemble import RandomForestRegressor as cuRFR_sg - -from dask.array import from_array -from sklearn.datasets import make_regression, make_classification -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, r2_score, mean_squared_error +from dask.distributed import Client from sklearn.ensemble import RandomForestClassifier as skrfc +from sklearn.metrics import accuracy_score, r2_score, mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.datasets import make_regression, make_classification +from dask.array import from_array +from cuml.ensemble import RandomForestRegressor as cuRFR_sg +from cuml.ensemble import RandomForestClassifier as cuRFC_sg +from cuml.dask.common import utils as dask_utils +from cuml.dask.ensemble import RandomForestRegressor as cuRFR_mg +from cuml.dask.ensemble import RandomForestClassifier as cuRFC_mg +from cuml.internals.safe_imports import cpu_only_import +import json +import pytest +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +dask_cudf = gpu_only_import('dask_cudf') -from dask.distributed import Client +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') def _prep_training_data(c, X_train, y_train, partitions_per_worker): diff --git a/python/cuml/tests/dask/test_dask_ridge_regression.py b/python/cuml/tests/dask/test_dask_ridge_regression.py index 55305ab0c6..c37f48ddbf 100644 --- a/python/cuml/tests/dask/test_dask_ridge_regression.py +++ b/python/cuml/tests/dask/test_dask_ridge_regression.py @@ -13,14 +13,16 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.dask.common import utils as dask_utils from sklearn.metrics import mean_squared_error from sklearn.datasets import make_regression -import pandas as pd -import numpy as np -import dask_cudf -import cudf +from cuml.internals.safe_imports import cpu_only_import +pd = cpu_only_import('pandas') +np = cpu_only_import('numpy') +dask_cudf = gpu_only_import('dask_cudf') +cudf = gpu_only_import('cudf') pytestmark = pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_serialization.py b/python/cuml/tests/dask/test_dask_serialization.py index 55ee58e289..24524a437c 100644 --- a/python/cuml/tests/dask/test_dask_serialization.py +++ b/python/cuml/tests/dask/test_dask_serialization.py @@ -13,18 +13,15 @@ # limitations under the License. # -import cupy as cp -import cupyx - -from cuml.internals.array_sparse import SparseCumlArray - -from cuml.naive_bayes.naive_bayes import MultinomialNB - from distributed.protocol.serialize import serialize +from cuml.naive_bayes.naive_bayes import MultinomialNB +from cuml.internals.array_sparse import SparseCumlArray +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') def test_register_naive_bayes_serialization(): - """ Assuming here that the Dask serializers are well-tested. This test-case is only validating that register_serialization diff --git a/python/cuml/tests/dask/test_dask_sql.py b/python/cuml/tests/dask/test_dask_sql.py index d6524740b7..dfeeaa478b 100644 --- a/python/cuml/tests/dask/test_dask_sql.py +++ b/python/cuml/tests/dask/test_dask_sql.py @@ -13,16 +13,18 @@ # limitations under the License. # -import cuml -import cudf -import dask_cudf -import pytest -import numpy as np +from sklearn.model_selection import train_test_split +from sklearn.datasets import make_classification +from sklearn.linear_model import LogisticRegression from cuml.internals.import_utils import has_dask_sql +from cuml.internals.safe_imports import cpu_only_import +import pytest +import cuml +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +dask_cudf = gpu_only_import('dask_cudf') +np = cpu_only_import('numpy') -from sklearn.linear_model import LogisticRegression -from sklearn.datasets import make_classification -from sklearn.model_selection import train_test_split if has_dask_sql(): from dask_sql import Context diff --git a/python/cuml/tests/dask/test_dask_tfidf.py b/python/cuml/tests/dask/test_dask_tfidf.py index df0a45c827..5f690eae65 100644 --- a/python/cuml/tests/dask/test_dask_tfidf.py +++ b/python/cuml/tests/dask/test_dask_tfidf.py @@ -13,18 +13,21 @@ # limitations under the License. # -import pytest -import numpy as np -import cupy as cp -from scipy.sparse import csr_matrix as scipy_csr_matrix -from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix -import dask.array as da -import dask - -from cuml.dask.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import ( TfidfTransformer as SkTfidfTransformer, ) +from cuml.dask.feature_extraction.text import TfidfTransformer +import dask +import dask.array as da +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +scipy_csr_matrix = cpu_only_import_from('scipy.sparse', 'csr_matrix') +cp_csr_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csr_matrix') # Testing Util Functions diff --git a/python/cuml/tests/dask/test_dask_tsvd.py b/python/cuml/tests/dask/test_dask_tsvd.py index e775db1eb6..0110e10995 100644 --- a/python/cuml/tests/dask/test_dask_tsvd.py +++ b/python/cuml/tests/dask/test_dask_tsvd.py @@ -13,14 +13,15 @@ # limitations under the License. # -import pytest - -import numpy as np +from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import gpu_only_import from cuml.testing.utils import array_equal, \ unit_param, stress_param -import cupy as cp +import pytest -from cuml.dask.common.dask_arr_utils import to_dask_cudf +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.mg diff --git a/python/cuml/tests/dask/test_dask_umap.py b/python/cuml/tests/dask/test_dask_umap.py index 3457076e88..425c5c0f15 100644 --- a/python/cuml/tests/dask/test_dask_umap.py +++ b/python/cuml/tests/dask/test_dask_umap.py @@ -13,17 +13,17 @@ # limitations under the License. # -import pytest - -import cupy as cp -import numpy as np -from cuml.internals import logger -from cuml.metrics import trustworthiness - +from sklearn.datasets import load_iris +from sklearn.datasets import load_digits import math +from cuml.metrics import trustworthiness +from cuml.internals import logger +from cuml.internals.safe_imports import cpu_only_import +import pytest -from sklearn.datasets import load_digits -from sklearn.datasets import load_iris +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') def _load_dataset(dataset, n_rows): diff --git a/python/cuml/tests/explainer/test_explainer_base.py b/python/cuml/tests/explainer/test_explainer_base.py index 7acdf84edc..9b707ba598 100644 --- a/python/cuml/tests/explainer/test_explainer_base.py +++ b/python/cuml/tests/explainer/test_explainer_base.py @@ -14,15 +14,15 @@ # limitations under the License. # -import cudf -import cupy as cp -import numpy as np -import pytest - -from pylibraft.common.handle import Handle - -from cuml.explainer.base import SHAPBase from cuml import LinearRegression as cuLR +from cuml.explainer.base import SHAPBase +from pylibraft.common.handle import Handle +import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') @pytest.mark.parametrize("handle", [True, False]) diff --git a/python/cuml/tests/explainer/test_explainer_common.py b/python/cuml/tests/explainer/test_explainer_common.py index 28c807f1ac..5bf986047e 100644 --- a/python/cuml/tests/explainer/test_explainer_common.py +++ b/python/cuml/tests/explainer/test_explainer_common.py @@ -14,25 +14,25 @@ # limitations under the License. # -import cuml -import cupy as cp -import numpy as np -import pytest - -from cuml import LinearRegression as reg -from cuml import PCA -from cuml.explainer.common import get_cai_ptr -from cuml.explainer.common import get_dtype_from_model_func -from cuml.explainer.common import get_handle_from_cuml_model_func -from cuml.explainer.common import get_link_fn_from_str_or_fn -from cuml.explainer.common import get_tag_from_model_func -from cuml.explainer.common import link_dict -from cuml.explainer.common import model_func_call -from cuml.testing.utils import ClassEnumerator -from cuml.datasets import make_regression -from sklearn.linear_model import LinearRegression as skreg - from pylibraft.common.handle import Handle +from sklearn.linear_model import LinearRegression as skreg +from cuml.datasets import make_regression +from cuml.testing.utils import ClassEnumerator +from cuml.explainer.common import model_func_call +from cuml.explainer.common import link_dict +from cuml.explainer.common import get_tag_from_model_func +from cuml.explainer.common import get_link_fn_from_str_or_fn +from cuml.explainer.common import get_handle_from_cuml_model_func +from cuml.explainer.common import get_dtype_from_model_func +from cuml.explainer.common import get_cai_ptr +from cuml import PCA +from cuml import LinearRegression as reg +import pytest +from cuml.internals.safe_imports import cpu_only_import +import cuml +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') models_config = ClassEnumerator(module=cuml) diff --git a/python/cuml/tests/explainer/test_explainer_kernel_shap.py b/python/cuml/tests/explainer/test_explainer_kernel_shap.py index ec85625c9d..18b1356a73 100644 --- a/python/cuml/tests/explainer/test_explainer_kernel_shap.py +++ b/python/cuml/tests/explainer/test_explainer_kernel_shap.py @@ -14,23 +14,22 @@ # limitations under the License. # -import cuml -import cupy as cp -import numpy as np -import math -import pytest -import sklearn.neighbors - -from cuml import Lasso -from cuml import KernelExplainer -from cuml.internals.import_utils import has_scipy -from cuml.internals.import_utils import has_shap -from cuml.datasets import make_regression - +from sklearn.model_selection import train_test_split from cuml.testing.utils import create_synthetic_dataset, ClassEnumerator, \ get_shap_values - -from sklearn.model_selection import train_test_split +from cuml.datasets import make_regression +from cuml.internals.import_utils import has_shap +from cuml.internals.import_utils import has_scipy +from cuml import KernelExplainer +from cuml import Lasso +import sklearn.neighbors +import pytest +import math +from cuml.internals.safe_imports import cpu_only_import +import cuml +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') models_config = ClassEnumerator(module=cuml) diff --git a/python/cuml/tests/explainer/test_explainer_permutation_shap.py b/python/cuml/tests/explainer/test_explainer_permutation_shap.py index de378a4376..66909da5da 100644 --- a/python/cuml/tests/explainer/test_explainer_permutation_shap.py +++ b/python/cuml/tests/explainer/test_explainer_permutation_shap.py @@ -15,16 +15,17 @@ # -import cuml -import cupy as cp -import numpy as np -import pytest -import sklearn.neighbors - -from cuml import PermutationExplainer - from cuml.testing.utils import ClassEnumerator, get_shap_values, \ create_synthetic_dataset +from cuml import PermutationExplainer +import sklearn.neighbors +import pytest +from cuml.internals.safe_imports import cpu_only_import +import cuml +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') + models_config = ClassEnumerator(module=cuml) models = models_config.get_models() diff --git a/python/cuml/tests/explainer/test_gpu_treeshap.py b/python/cuml/tests/explainer/test_gpu_treeshap.py index 4f450f68fe..e423144302 100644 --- a/python/cuml/tests/explainer/test_gpu_treeshap.py +++ b/python/cuml/tests/explainer/test_gpu_treeshap.py @@ -14,22 +14,24 @@ # limitations under the License. # +from cuml.testing.utils import as_type +import cuml +from cuml.ensemble import RandomForestClassifier as curfc +from cuml.ensemble import RandomForestRegressor as curfr +from cuml.common.exceptions import NotFittedError +from cuml.internals.import_utils import has_sklearn +from cuml.internals.import_utils import has_lightgbm, has_shap +from cuml.explainer.tree_shap import TreeExplainer +from hypothesis import given, settings, assume, HealthCheck, strategies as st +from cuml.internals.safe_imports import gpu_only_import import json import pytest import treelite -import numpy as np -import pandas as pd -import cupy as cp -import cudf -from hypothesis import given, settings, assume, HealthCheck, strategies as st -from cuml.explainer.tree_shap import TreeExplainer -from cuml.internals.import_utils import has_lightgbm, has_shap -from cuml.internals.import_utils import has_sklearn -from cuml.common.exceptions import NotFittedError -from cuml.ensemble import RandomForestRegressor as curfr -from cuml.ensemble import RandomForestClassifier as curfc -import cuml -from cuml.testing.utils import as_type +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cp = gpu_only_import('cupy') +cudf = gpu_only_import('cudf') pytestmark = pytest.mark.skip diff --git a/python/cuml/tests/explainer/test_sampling.py b/python/cuml/tests/explainer/test_sampling.py index 123d52104c..d05c3d4a59 100644 --- a/python/cuml/tests/explainer/test_sampling.py +++ b/python/cuml/tests/explainer/test_sampling.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cudf -import cupy as cp -import numpy as np -import pandas as pd -import pytest -from numba import cuda - from cuml.explainer.sampling import kmeans_sampling +from cuml.internals.safe_imports import gpu_only_import_from +import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cuda = gpu_only_import_from('numba', 'cuda') @pytest.mark.parametrize('input_type', ["cudf-df", diff --git a/python/cuml/tests/stemmer_tests/test_len_utils.py b/python/cuml/tests/stemmer_tests/test_len_utils.py index 86c58af533..357aa4cb7e 100644 --- a/python/cuml/tests/stemmer_tests/test_len_utils.py +++ b/python/cuml/tests/stemmer_tests/test_len_utils.py @@ -14,12 +14,14 @@ # limitations under the License. # -import cudf -import numpy as np from cuml.preprocessing.text.stem.porter_stemmer_utils.len_flags_utils import ( len_eq_n, len_gt_n, ) +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') def test_len_gt_n(): diff --git a/python/cuml/tests/stemmer_tests/test_porter_stemmer_rules.py b/python/cuml/tests/stemmer_tests/test_porter_stemmer_rules.py index 8f1631574b..bf1354226f 100644 --- a/python/cuml/tests/stemmer_tests/test_porter_stemmer_rules.py +++ b/python/cuml/tests/stemmer_tests/test_porter_stemmer_rules.py @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cudf -import numpy as np from cuml.preprocessing.text.stem.porter_stemmer_utils import ( porter_stemmer_rules, ) +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') def test_ends_with_suffix(): diff --git a/python/cuml/tests/stemmer_tests/test_stemmer.py b/python/cuml/tests/stemmer_tests/test_stemmer.py index 048870895f..7560a7e229 100644 --- a/python/cuml/tests/stemmer_tests/test_stemmer.py +++ b/python/cuml/tests/stemmer_tests/test_stemmer.py @@ -14,10 +14,10 @@ # limitations under the License. # -import cudf - -from cuml.preprocessing.text import stem as rapids_stem from nltk import stem as nltk_stem +from cuml.preprocessing.text import stem as rapids_stem +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') def get_words(): @@ -51,4 +51,4 @@ def test_same_results(): cuml_stemmed = cuml_stemmer.stem(word_ser) assert all([a == b for a, b in - zip(nltk_stemmed, cuml_stemmed.to_pandas().values)]) + zip(nltk_stemmed, cuml_stemmed.to_pandas().values)]) diff --git a/python/cuml/tests/stemmer_tests/test_steps.py b/python/cuml/tests/stemmer_tests/test_steps.py index a40839475f..143d4b6166 100644 --- a/python/cuml/tests/stemmer_tests/test_steps.py +++ b/python/cuml/tests/stemmer_tests/test_steps.py @@ -14,8 +14,9 @@ # limitations under the License. # -import cudf from cuml.preprocessing.text.stem.porter_stemmer import PorterStemmer +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') def test_step1a(): diff --git a/python/cuml/tests/stemmer_tests/test_suffix_utils.py b/python/cuml/tests/stemmer_tests/test_suffix_utils.py index 3ba3ca4d8e..0f97dfe559 100644 --- a/python/cuml/tests/stemmer_tests/test_suffix_utils.py +++ b/python/cuml/tests/stemmer_tests/test_suffix_utils.py @@ -14,11 +14,12 @@ # limitations under the License. # -import cudf from cuml.preprocessing.text.stem.porter_stemmer_utils.suffix_utils import ( get_stem_series, replace_suffix, ) +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') def test_get_stem_series(): diff --git a/python/cuml/tests/test_adapters.py b/python/cuml/tests/test_adapters.py index 67403c86ae..bd0d3b70f4 100644 --- a/python/cuml/tests/test_adapters.py +++ b/python/cuml/tests/test_adapters.py @@ -14,33 +14,34 @@ # limitations under the License. # -import pytest - -import cupy as cp -import cupyx as cpx -import numpy as np -from cupyx.scipy.sparse import coo_matrix -from scipy import stats - -from cuml.thirdparty_adapters.adapters import check_array, \ - _get_mask as cu_get_mask, \ - _masked_column_median, \ - _masked_column_mean, \ - _masked_column_mode - -from sklearn.utils._mask import _get_mask as sk_get_mask - +import platform +from sklearn.preprocessing import normalize as sk_normalize +from cuml.testing.test_preproc_utils import assert_allclose from cuml.thirdparty_adapters.sparsefuncs_fast import \ csr_mean_variance_axis0, \ csc_mean_variance_axis0, \ _csc_mean_variance_axis0, \ inplace_csr_row_normalize_l1, \ inplace_csr_row_normalize_l2 +from sklearn.utils._mask import _get_mask as sk_get_mask +from cuml.thirdparty_adapters.adapters import check_array, \ + _get_mask as cu_get_mask, \ + _masked_column_median, \ + _masked_column_mean, \ + _masked_column_mode +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import +import pytest + +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cpx = gpu_only_import('cupyx') +np = cpu_only_import('numpy') +coo_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'coo_matrix') +stats = cpu_only_import_from('scipy', 'stats') -from cuml.testing.test_preproc_utils import assert_allclose -from sklearn.preprocessing import normalize as sk_normalize -import platform IS_ARM = platform.processor() == "aarch64" diff --git a/python/cuml/tests/test_agglomerative.py b/python/cuml/tests/test_agglomerative.py index 99c134ebf3..561d3af7e3 100644 --- a/python/cuml/tests/test_agglomerative.py +++ b/python/cuml/tests/test_agglomerative.py @@ -22,7 +22,8 @@ from sklearn import cluster -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') @pytest.mark.parametrize('connectivity', ['knn', 'pairwise']) diff --git a/python/cuml/tests/test_allocator.py b/python/cuml/tests/test_allocator.py index 8f0bd07b22..896a7759d0 100644 --- a/python/cuml/tests/test_allocator.py +++ b/python/cuml/tests/test_allocator.py @@ -14,15 +14,17 @@ # limitations under the License. # +from cuml.internals.input_utils import sparse_scipy_to_cp +from cuml.testing.utils import small_classification_dataset +from cuml.naive_bayes import MultinomialNB +from cuml import LogisticRegression +from cuml.internals.safe_imports import cpu_only_import import pytest -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from cuml import LogisticRegression -from cuml.naive_bayes import MultinomialNB -from cuml.testing.utils import small_classification_dataset -from cuml.internals.input_utils import sparse_scipy_to_cp try: from cupy.cuda import using_allocator as cupy_using_allocator diff --git a/python/cuml/tests/test_api.py b/python/cuml/tests/test_api.py index 479fc989e7..2a150a9c5b 100644 --- a/python/cuml/tests/test_api.py +++ b/python/cuml/tests/test_api.py @@ -14,16 +14,17 @@ # limitations under the License. # +from sklearn.datasets import make_classification +from cuml.testing.utils import ClassEnumerator +from cuml.internals.base import Base +from cuml.internals.safe_imports import cpu_only_import +import inspect import pytest import cuml import cuml.internals.mixins as cumix -import cupy as cp -import inspect -import numpy as np - -from cuml.internals.base import Base -from cuml.testing.utils import ClassEnumerator -from sklearn.datasets import make_classification +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') ############################################################################### diff --git a/python/cuml/tests/test_arima.py b/python/cuml/tests/test_arima.py index a4b3b98f8e..7bb13fead5 100644 --- a/python/cuml/tests/test_arima.py +++ b/python/cuml/tests/test_arima.py @@ -30,22 +30,25 @@ # Note that there are significant differences between our implementation and # the reference, and perfect parity cannot be expected for integration tests. -import pytest - -import numpy as np -import os +from cuml.testing.utils import stress_param +from cuml.internals.input_utils import input_to_host_array +import cuml.tsa.arima as arima +from cuml.internals.safe_imports import gpu_only_import +import statsmodels.api as sm +from sklearn.model_selection import train_test_split +from cuml.internals.safe_imports import cpu_only_import_from import warnings +import os +import pytest -import pandas as pd -from scipy.optimize.optimize import approx_fprime -from sklearn.model_selection import train_test_split -import statsmodels.api as sm +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -import cudf -import cuml.tsa.arima as arima -from cuml.internals.input_utils import input_to_host_array +pd = cpu_only_import('pandas') +approx_fprime = cpu_only_import_from( + 'scipy.optimize.optimize', 'approx_fprime') -from cuml.testing.utils import stress_param +cudf = gpu_only_import('cudf') ############################################################################### diff --git a/python/cuml/tests/test_array_sparse.py b/python/cuml/tests/test_array_sparse.py index 4f7e7f9417..c0bb7a0864 100644 --- a/python/cuml/tests/test_array_sparse.py +++ b/python/cuml/tests/test_array_sparse.py @@ -14,14 +14,16 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.internals.array import CumlArray from cuml.internals.array_sparse import SparseCumlArray -import scipy.sparse -import cupy as cp -import cupyx +from cuml.internals.safe_imports import cpu_only_import +scipy_sparse = cpu_only_import('scipy.sparse') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') test_input_types = [ 'cupy', 'scipy' @@ -34,7 +36,7 @@ @pytest.mark.parametrize('convert_format', [True, False]) def test_input(input_type, sparse_format, dtype, convert_format): - rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy.sparse + rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy_sparse X = rand_func.random(100, 100, format=sparse_format, density=0.5, @@ -72,7 +74,7 @@ def test_nonsparse_input_fails(): @pytest.mark.parametrize('input_type', test_input_types) def test_convert_to_dtype(input_type): - rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy.sparse + rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy_sparse X = rand_func.random(100, 100, format='csr', density=0.5, dtype=cp.float64) @@ -92,7 +94,7 @@ def test_convert_to_dtype(input_type): @pytest.mark.parametrize('input_type', test_input_types) def test_convert_index(input_type): - rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy.sparse + rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy_sparse X = rand_func.random(100, 100, format='csr', density=0.5, dtype=cp.float64) @@ -117,7 +119,7 @@ def test_convert_index(input_type): @pytest.mark.parametrize('output_format', [None, 'coo', 'csc']) def test_output(input_type, output_type, dtype, output_format): - rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy.sparse + rand_func = cupyx.scipy.sparse if input_type == 'cupy' else scipy_sparse X = rand_func.random(100, 100, format='csr', density=0.5, dtype=dtype) @@ -127,11 +129,11 @@ def test_output(input_type, output_type, dtype, output_format): if output_type == 'scipy': if output_format is None: - assert isinstance(output, scipy.sparse.csr_matrix) + assert isinstance(output, scipy_sparse.csr_matrix) elif output_format == 'coo': - assert isinstance(output, scipy.sparse.coo_matrix) + assert isinstance(output, scipy_sparse.coo_matrix) elif output_format == 'csc': - assert isinstance(output, scipy.sparse.csc_matrix) + assert isinstance(output, scipy_sparse.csc_matrix) else: pytest.fail("unecpected output format") else: diff --git a/python/cuml/tests/test_auto_arima.py b/python/cuml/tests/test_auto_arima.py index 8862a93076..5825b71434 100644 --- a/python/cuml/tests/test_auto_arima.py +++ b/python/cuml/tests/test_auto_arima.py @@ -14,12 +14,12 @@ # limitations under the License. # +from cuml.internals.input_utils import input_to_cuml_array +from cuml.tsa import auto_arima import pytest -import numpy as np - -from cuml.tsa import auto_arima -from cuml.internals.input_utils import input_to_cuml_array +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') ############################################################################### diff --git a/python/cuml/tests/test_base.py b/python/cuml/tests/test_base.py index e8d3823489..d22dec18e5 100644 --- a/python/cuml/tests/test_base.py +++ b/python/cuml/tests/test_base.py @@ -25,7 +25,7 @@ get_classes_from_package, \ small_classification_dataset from cuml._thirdparty.sklearn.utils.skl_dependencies import BaseEstimator \ - as sklBaseEstimator + as sklBaseEstimator all_base_children = get_classes_from_package(cuml, import_sub_packages=True) @@ -153,7 +153,6 @@ def get_param_doc(param_doc_obj, name: str): # ignore ColumnTransformer init warning @pytest.mark.filterwarnings("ignore:Transformers are required") def test_base_children_get_param_names(child_class: str): - """ This test ensures that the arguments in `Base.__init__` are available in all derived classes `get_param_names` diff --git a/python/cuml/tests/test_batched_lbfgs.py b/python/cuml/tests/test_batched_lbfgs.py index d271fd2930..f43349275c 100644 --- a/python/cuml/tests/test_batched_lbfgs.py +++ b/python/cuml/tests/test_batched_lbfgs.py @@ -14,9 +14,9 @@ # limitations under the License. # -import numpy as np - from cuml.tsa.batched_lbfgs import batched_fmin_lbfgs_b +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') def rosenbrock(x, a=1, b=100): diff --git a/python/cuml/tests/test_benchmark.py b/python/cuml/tests/test_benchmark.py index f775735e64..3cc67469f3 100644 --- a/python/cuml/tests/test_benchmark.py +++ b/python/cuml/tests/test_benchmark.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.benchmark.bench_helper_funcs import fit, fit_predict +import time +from sklearn import metrics +from cuml.internals.safe_imports import gpu_only_import_from +import pytest +from cuml.internals.safe_imports import gpu_only_import from cuml.benchmark import datagen, algorithms from cuml.benchmark.bench_helper_funcs import _training_data_to_numpy from cuml.benchmark.runners import AccuracyComparisonRunner, \ @@ -19,16 +25,12 @@ from cuml.internals.import_utils import has_umap from cuml.internals.import_utils import has_xgboost -import numpy as np -import cudf -import pytest -from numba import cuda -from sklearn import metrics -import pandas as pd - -import time +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') +cuda = gpu_only_import_from('numba', 'cuda') +pd = cpu_only_import('pandas') -from cuml.benchmark.bench_helper_funcs import fit, fit_predict pytestmark = pytest.mark.skip diff --git a/python/cuml/tests/test_compose.py b/python/cuml/tests/test_compose.py index 442875ce3a..c08cd7c43e 100644 --- a/python/cuml/tests/test_compose.py +++ b/python/cuml/tests/test_compose.py @@ -13,39 +13,38 @@ # limitations under the License. # -import pytest - -import cudf -import numpy as np -from pandas import DataFrame as pdDataFrame -from cudf import DataFrame as cuDataFrame - +from cuml.testing.test_preproc_utils import assert_allclose +from sklearn.preprocessing import \ + StandardScaler as skStandardScaler, \ + Normalizer as skNormalizer, \ + PolynomialFeatures as skPolynomialFeatures, \ + OneHotEncoder as skOneHotEncoder +from cuml.preprocessing import \ + StandardScaler as cuStandardScaler, \ + Normalizer as cuNormalizer, \ + PolynomialFeatures as cuPolynomialFeatures, \ + OneHotEncoder as cuOneHotEncoder +from sklearn.compose import \ + ColumnTransformer as skColumnTransformer, \ + make_column_transformer as sk_make_column_transformer, \ + make_column_selector as sk_make_column_selector from cuml.compose import \ ColumnTransformer as cuColumnTransformer, \ make_column_transformer as cu_make_column_transformer, \ make_column_selector as cu_make_column_selector +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import +import pytest -from sklearn.compose import \ - ColumnTransformer as skColumnTransformer, \ - make_column_transformer as sk_make_column_transformer, \ - make_column_selector as sk_make_column_selector +from cuml.internals.safe_imports import gpu_only_import from cuml.testing.test_preproc_utils import clf_dataset, \ sparse_clf_dataset # noqa: F401 - -from cuml.preprocessing import \ - StandardScaler as cuStandardScaler, \ - Normalizer as cuNormalizer, \ - PolynomialFeatures as cuPolynomialFeatures, \ - OneHotEncoder as cuOneHotEncoder - -from sklearn.preprocessing import \ - StandardScaler as skStandardScaler, \ - Normalizer as skNormalizer, \ - PolynomialFeatures as skPolynomialFeatures, \ - OneHotEncoder as skOneHotEncoder - -from cuml.testing.test_preproc_utils import assert_allclose +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') +pdDataFrame = cpu_only_import_from('pandas', 'DataFrame') +cuDataFrame = gpu_only_import_from('cudf', 'DataFrame') @pytest.mark.parametrize('remainder', ['drop', 'passthrough']) diff --git a/python/cuml/tests/test_coordinate_descent.py b/python/cuml/tests/test_coordinate_descent.py index 7b8fa5ca5f..84eed2dd1c 100644 --- a/python/cuml/tests/test_coordinate_descent.py +++ b/python/cuml/tests/test_coordinate_descent.py @@ -13,17 +13,16 @@ # limitations under the License. # -import pytest -import numpy as np - -from cuml import Lasso as cuLasso -from cuml.linear_model import ElasticNet as cuElasticNet -from cuml.metrics import r2_score -from cuml.testing.utils import unit_param, quality_param, stress_param - -from sklearn.linear_model import Lasso, ElasticNet -from sklearn.datasets import make_regression from sklearn.model_selection import train_test_split +from sklearn.datasets import make_regression +from sklearn.linear_model import Lasso, ElasticNet +from cuml.testing.utils import unit_param, quality_param, stress_param +from cuml.metrics import r2_score +from cuml.linear_model import ElasticNet as cuElasticNet +from cuml import Lasso as cuLasso +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @pytest.mark.parametrize('datatype', [np.float32, np.float64]) diff --git a/python/cuml/tests/test_cuml_descr_decor.py b/python/cuml/tests/test_cuml_descr_decor.py index de0edb31e2..63f8c738a2 100644 --- a/python/cuml/tests/test_cuml_descr_decor.py +++ b/python/cuml/tests/test_cuml_descr_decor.py @@ -13,18 +13,20 @@ # limitations under the License. # +from cuml.internals.input_utils import input_to_cuml_array +from cuml.internals.input_utils import determine_array_type +from cuml.internals.input_utils import determine_array_dtype +from cuml.common.array_descriptor import CumlArrayDescriptor +from cuml.internals.array import CumlArray +import pytest +from cuml.internals.safe_imports import cpu_only_import import pickle import cuml import cuml.internals -import cupy as cp -import numpy as np -import pytest -from cuml.internals.array import CumlArray -from cuml.common.array_descriptor import CumlArrayDescriptor -from cuml.internals.input_utils import determine_array_dtype -from cuml.internals.input_utils import determine_array_type -from cuml.internals.input_utils import input_to_cuml_array +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') test_input_types = ['numpy', 'numba', 'cupy', 'cudf'] diff --git a/python/cuml/tests/test_dataset_generator_types.py b/python/cuml/tests/test_dataset_generator_types.py index cd330213fb..3ac0dcf6a8 100644 --- a/python/cuml/tests/test_dataset_generator_types.py +++ b/python/cuml/tests/test_dataset_generator_types.py @@ -14,19 +14,20 @@ # limitations under the License. # -import cudf -import cupy as cp -import numba -import numpy as np -import pytest - -import cuml from cuml.datasets import ( make_arima, make_blobs, make_classification, make_regression ) +import cuml +import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +numba = gpu_only_import('numba') +np = cpu_only_import('numpy') TEST_OUTPUT_TYPES = ( diff --git a/python/cuml/tests/test_dbscan.py b/python/cuml/tests/test_dbscan.py index e2cb8f27b2..6dac2a2c19 100644 --- a/python/cuml/tests/test_dbscan.py +++ b/python/cuml/tests/test_dbscan.py @@ -13,19 +13,19 @@ # limitations under the License. # -import numpy as np -from numpy.testing import assert_raises -import pytest - -from cuml.testing.utils import get_handle -from cuml import DBSCAN as cuDBSCAN +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import pairwise_distances +from sklearn.datasets import make_blobs +from sklearn.cluster import DBSCAN as skDBSCAN from cuml.testing.utils import get_pattern, unit_param, \ quality_param, stress_param, array_equal, assert_dbscan_equal - -from sklearn.cluster import DBSCAN as skDBSCAN -from sklearn.datasets import make_blobs -from sklearn.metrics import pairwise_distances -from sklearn.preprocessing import StandardScaler +from cuml import DBSCAN as cuDBSCAN +from cuml.testing.utils import get_handle +import pytest +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +assert_raises = cpu_only_import_from('numpy.testing', 'assert_raises') @pytest.mark.parametrize('max_mbytes_per_batch', [1e3, None]) @@ -149,9 +149,9 @@ def test_dbscan_cosine(nrows, max_mbytes_per_batch, out_dtype): @pytest.mark.parametrize("name", [ - 'noisy_moons', - 'blobs', - 'no_structure']) + 'noisy_moons', + 'blobs', + 'no_structure']) @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(500000)]) # Vary the eps to get a range of core point counts @@ -196,9 +196,9 @@ def test_dbscan_sklearn_comparison(name, nrows, eps): @pytest.mark.parametrize("name", [ - 'noisy_moons', - 'blobs', - 'no_structure']) + 'noisy_moons', + 'blobs', + 'no_structure']) def test_dbscan_default(name): default_base = {'quantile': .3, 'eps': .5, diff --git a/python/cuml/tests/test_device_selection.py b/python/cuml/tests/test_device_selection.py index e2d79413c3..69b87a2c87 100644 --- a/python/cuml/tests/test_device_selection.py +++ b/python/cuml/tests/test_device_selection.py @@ -14,30 +14,10 @@ # -import itertools as it -import pytest -import cuml -import numpy as np -import pandas as pd -import cudf -import pickle -import inspect -from importlib import import_module -from pytest_cases import fixture_union, pytest_fixture_plus -from sklearn.datasets import make_regression, make_blobs -from sklearn.decomposition import TruncatedSVD as skTruncatedSVD -from sklearn.decomposition import PCA as skPCA -from sklearn.linear_model import LinearRegression as skLinearRegression -from sklearn.linear_model import LogisticRegression as skLogisticRegression -from sklearn.linear_model import Lasso as skLasso -from sklearn.linear_model import ElasticNet as skElasticNet -from sklearn.linear_model import Ridge as skRidge -from sklearn.neighbors import NearestNeighbors as skNearestNeighbors -from umap import UMAP as refUMAP -from cuml.common.device_selection import DeviceType, using_device_type -from cuml.decomposition import PCA, TruncatedSVD -from cuml.internals.mem_type import MemoryType -from cuml.internals.memory_utils import using_memory_type +from cuml.testing.test_preproc_utils import to_output_type +from cuml.neighbors import NearestNeighbors +from cuml.metrics import trustworthiness +from cuml.manifold import UMAP from cuml.linear_model import ( ElasticNet, Lasso, @@ -45,10 +25,32 @@ LogisticRegression, Ridge ) -from cuml.manifold import UMAP -from cuml.metrics import trustworthiness -from cuml.neighbors import NearestNeighbors -from cuml.testing.test_preproc_utils import to_output_type +from cuml.internals.memory_utils import using_memory_type +from cuml.internals.mem_type import MemoryType +from cuml.decomposition import PCA, TruncatedSVD +from cuml.common.device_selection import DeviceType, using_device_type +from umap import UMAP as refUMAP +from sklearn.neighbors import NearestNeighbors as skNearestNeighbors +from sklearn.linear_model import Ridge as skRidge +from sklearn.linear_model import ElasticNet as skElasticNet +from sklearn.linear_model import Lasso as skLasso +from sklearn.linear_model import LogisticRegression as skLogisticRegression +from sklearn.linear_model import LinearRegression as skLinearRegression +from sklearn.decomposition import PCA as skPCA +from sklearn.decomposition import TruncatedSVD as skTruncatedSVD +from sklearn.datasets import make_regression, make_blobs +from pytest_cases import fixture_union, pytest_fixture_plus +from importlib import import_module +import inspect +import pickle +from cuml.internals.safe_imports import gpu_only_import +import itertools as it +import pytest +import cuml +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +cudf = gpu_only_import('cudf') @pytest.mark.parametrize('input', [('cpu', DeviceType.host), @@ -152,11 +154,11 @@ def fixture_generation_helper(params): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'fit_intercept': [False, True], - 'normalize': [False, True] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'fit_intercept': [False, True], + 'normalize': [False, True] +})) def linreg_test_data(request): kwargs = { 'fit_intercept': request.param['fit_intercept'], @@ -188,11 +190,11 @@ def linreg_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'penalty': ['none', 'l2'], - 'fit_intercept': [False, True] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'penalty': ['none', 'l2'], + 'fit_intercept': [False, True] +})) def logreg_test_data(request): kwargs = { 'penalty': request.param['penalty'], @@ -227,11 +229,11 @@ def logreg_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'fit_intercept': [False, True], - 'selection': ['cyclic', 'random'] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'fit_intercept': [False, True], + 'selection': ['cyclic', 'random'] +})) def lasso_test_data(request): kwargs = { 'fit_intercept': request.param['fit_intercept'], @@ -264,11 +266,11 @@ def lasso_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'fit_intercept': [False, True], - 'selection': ['cyclic', 'random'] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'fit_intercept': [False, True], + 'selection': ['cyclic', 'random'] +})) def elasticnet_test_data(request): kwargs = { 'fit_intercept': request.param['fit_intercept'], @@ -301,10 +303,10 @@ def elasticnet_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'fit_intercept': [False, True] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'fit_intercept': [False, True] +})) def ridge_test_data(request): kwargs = { 'fit_intercept': request.param['fit_intercept'], @@ -336,10 +338,10 @@ def ridge_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['cupy'], - 'n_components': [2, 16], - 'init': ['spectral', 'random'] - })) + 'input_type': ['cupy'], + 'n_components': [2, 16], + 'init': ['spectral', 'random'] +})) def umap_test_data(request): kwargs = { 'n_neighbors': 12, @@ -375,10 +377,10 @@ def umap_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'n_components': [2, 8] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'n_components': [2, 8] +})) def pca_test_data(request): kwargs = { 'n_components': request.param['n_components'], @@ -412,10 +414,10 @@ def pca_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'n_components': [2, 8] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'n_components': [2, 8] +})) def tsvd_test_data(request): kwargs = { 'n_components': request.param['n_components'], @@ -448,11 +450,11 @@ def tsvd_test_data(request): @pytest_fixture_plus(**fixture_generation_helper({ - 'input_type': ['numpy', 'dataframe', 'cupy', - 'cudf', 'numba'], - 'metric': ['euclidean', 'cosine'], - 'n_neighbors': [3, 8] - })) + 'input_type': ['numpy', 'dataframe', 'cupy', + 'cudf', 'numba'], + 'metric': ['euclidean', 'cosine'], + 'n_neighbors': [3, 8] +})) def nn_test_data(request): kwargs = { 'metric': request.param['metric'], diff --git a/python/cuml/tests/test_doctest.py b/python/cuml/tests/test_doctest.py index 09ed342842..32c0049872 100644 --- a/python/cuml/tests/test_doctest.py +++ b/python/cuml/tests/test_doctest.py @@ -13,15 +13,17 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import +import pytest import contextlib import doctest import inspect import io import cuml -import numpy as np -import pytest -import cudf +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') def _name_in_all(parent, name): diff --git a/python/cuml/tests/test_fil.py b/python/cuml/tests/test_fil.py index 99f62990f0..edcaa7e462 100644 --- a/python/cuml/tests/test_fil.py +++ b/python/cuml/tests/test_fil.py @@ -13,24 +13,24 @@ # limitations under the License. # -import numpy as np -import pytest -import os -import pandas as pd -from math import ceil - -from cuml import ForestInference -from cuml.testing.utils import array_equal, unit_param, \ - quality_param, stress_param -from cuml.internals.import_utils import has_xgboost -# from cuml.internals.import_utils import has_lightgbm - -from sklearn.datasets import make_classification, make_regression +from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score, mean_squared_error from sklearn.ensemble import GradientBoostingClassifier, \ GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor, \ ExtraTreesClassifier, ExtraTreesRegressor -from sklearn.metrics import accuracy_score, mean_squared_error -from sklearn.model_selection import train_test_split +from sklearn.datasets import make_classification, make_regression +from cuml.internals.import_utils import has_xgboost +from cuml.testing.utils import array_equal, unit_param, \ + quality_param, stress_param +from cuml import ForestInference +from math import ceil +import os +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') + +# from cuml.internals.import_utils import has_lightgbm if has_xgboost(): @@ -366,7 +366,7 @@ def small_classifier_and_preds(tmpdir_factory, request): ext = 'json' if request.param == 'json' else 'model' model_type = 'xgboost_json' if request.param == 'json' else 'xgboost' model_path = str(tmpdir_factory.mktemp("models").join( - f"small_class.{ext}")) + f"small_class.{ext}")) bst = _build_and_save_xgboost(model_path, X, y) # just do within-sample since it's not an accuracy test dtrain = xgb.DMatrix(X, label=y) diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py index 73ca7c2d8e..7af63175fb 100644 --- a/python/cuml/tests/test_hdbscan.py +++ b/python/cuml/tests/test_hdbscan.py @@ -13,6 +13,12 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import +from sklearn.model_selection import train_test_split +from sklearn import datasets +from hdbscan.plots import CondensedTree +import hdbscan +from cuml.internals import logger import pytest @@ -24,22 +30,17 @@ from cuml.metrics import adjusted_rand_score from cuml.testing.utils import get_pattern, array_equal -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from cuml.internals import logger -import hdbscan -from hdbscan.plots import CondensedTree - -from sklearn import datasets -from sklearn.model_selection import train_test_split -import cupy as cp +cp = gpu_only_import('cupy') test_datasets = { - "digits": datasets.load_digits(), - "boston": datasets.load_boston(), - "diabetes": datasets.load_diabetes(), - "cancer": datasets.load_breast_cancer(), + "digits": datasets.load_digits(), + "boston": datasets.load_boston(), + "diabetes": datasets.load_diabetes(), + "cancer": datasets.load_breast_cancer(), } dataset_names = ['noisy_circles', 'noisy_moons', 'varied'] @@ -201,7 +202,8 @@ def test_hdbscan_blobs(nrows, ncols, nclusters, assert(len(np.unique(sk_agg.labels_)) == len(cp.unique(cuml_agg.labels_))) assert np.allclose(np.sort(sk_agg.cluster_persistence_), - np.sort(cuml_agg.cluster_persistence_), rtol=0.01, atol=0.01) + np.sort(cuml_agg.cluster_persistence_), + rtol=0.01, atol=0.01) @pytest.mark.skipif(cp.cuda.driver.get_build_version() <= 11020, @@ -255,7 +257,8 @@ def test_hdbscan_sklearn_datasets(dataset, assert(adjusted_rand_score(cuml_agg.labels_, sk_agg.labels_) > 0.85) assert np.allclose(np.sort(sk_agg.cluster_persistence_), - np.sort(cuml_agg.cluster_persistence_), rtol=0.1, atol=0.1) + np.sort(cuml_agg.cluster_persistence_), + rtol=0.1, atol=0.1) @pytest.mark.parametrize('dataset', test_datasets.values()) @@ -355,7 +358,8 @@ def test_hdbscan_cluster_patterns(dataset, nrows, assert(adjusted_rand_score(cuml_agg.labels_, sk_agg.labels_) > 0.95) assert np.allclose(np.sort(sk_agg.cluster_persistence_), - np.sort(cuml_agg.cluster_persistence_), rtol=0.1, atol=0.1) + np.sort(cuml_agg.cluster_persistence_), + rtol=0.1, atol=0.1) @pytest.mark.parametrize('nrows', [1000]) diff --git a/python/cuml/tests/test_holtwinters.py b/python/cuml/tests/test_holtwinters.py index 006cdb7da6..4e8a8afa35 100644 --- a/python/cuml/tests/test_holtwinters.py +++ b/python/cuml/tests/test_holtwinters.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest - +from sklearn.metrics import r2_score +from statsmodels.tsa.holtwinters import ExponentialSmoothing as sm_ES from cuml.tsa.holtwinters import ExponentialSmoothing as cuml_ES +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from statsmodels.tsa.holtwinters import ExponentialSmoothing as sm_ES -from sklearn.metrics import r2_score airpassengers = [112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118, 115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140, diff --git a/python/cuml/tests/test_incremental_pca.py b/python/cuml/tests/test_incremental_pca.py index 3793ace944..64e3432736 100644 --- a/python/cuml/tests/test_incremental_pca.py +++ b/python/cuml/tests/test_incremental_pca.py @@ -14,18 +14,16 @@ # limitations under the License. # -import pytest -import cupy as cp -import cupyx - -from sklearn.decomposition import IncrementalPCA as skIPCA - -from cuml.datasets import make_blobs -from cuml.decomposition import IncrementalPCA as cuIPCA -from cuml.decomposition.incremental_pca import _svd_flip - -from cuml.testing.utils import array_equal from cuml.common.exceptions import NotFittedError +from cuml.testing.utils import array_equal +from cuml.decomposition.incremental_pca import _svd_flip +from cuml.decomposition import IncrementalPCA as cuIPCA +from cuml.datasets import make_blobs +from sklearn.decomposition import IncrementalPCA as skIPCA +import pytest +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @pytest.mark.parametrize( diff --git a/python/cuml/tests/test_input_utils.py b/python/cuml/tests/test_input_utils.py index 0c556da1fd..a143ff5882 100644 --- a/python/cuml/tests/test_input_utils.py +++ b/python/cuml/tests/test_input_utils.py @@ -14,20 +14,24 @@ # limitations under the License. # +from pandas import Series as pdSeries +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.input_utils import convert_dtype +from cuml.common import has_cupy +from cuml.internals.input_utils import input_to_cupy_array +from cuml.common import input_to_host_array +from cuml.common import input_to_cuml_array, CumlArray +from cuml.internals.safe_imports import cpu_only_import import pytest -import cudf -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from cuml.common import input_to_cuml_array, CumlArray -from cuml.common import input_to_host_array -from cuml.internals.input_utils import input_to_cupy_array -from cuml.common import has_cupy -from cuml.internals.input_utils import convert_dtype -from numba import cuda as nbcuda -from pandas import DataFrame as pdDF -from pandas import Series as pdSeries +nbcuda = gpu_only_import_from('numba', 'cuda') +pdDF = cpu_only_import_from('pandas', 'DataFrame') ############################################################################### diff --git a/python/cuml/tests/test_kernel_density.py b/python/cuml/tests/test_kernel_density.py index eade81c55b..9bad00c7b4 100644 --- a/python/cuml/tests/test_kernel_density.py +++ b/python/cuml/tests/test_kernel_density.py @@ -14,16 +14,17 @@ # limitations under the License. # +from cuml.testing.utils import as_type +from sklearn.model_selection import GridSearchCV +import pytest +from hypothesis.extra.numpy import arrays +from hypothesis import given, settings, assume, strategies as st from cuml.neighbors import KernelDensity, VALID_KERNELS, logsumexp_kernel from cuml.common.exceptions import NotFittedError from sklearn.metrics import pairwise_distances as skl_pairwise_distances from sklearn.neighbors._ball_tree import kernel_norm -import numpy as np -from hypothesis import given, settings, assume, strategies as st -from hypothesis.extra.numpy import arrays -import pytest -from sklearn.model_selection import GridSearchCV -from cuml.testing.utils import as_type +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') # not in log probability space diff --git a/python/cuml/tests/test_kernel_ridge.py b/python/cuml/tests/test_kernel_ridge.py index 062fdb2d6f..aab62ca9c2 100644 --- a/python/cuml/tests/test_kernel_ridge.py +++ b/python/cuml/tests/test_kernel_ridge.py @@ -12,20 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cupy as cp -from cupy import linalg -import numpy as np -from numba import cuda -from cuml import KernelRidge as cuKernelRidge -from cuml.metrics import pairwise_kernels, PAIRWISE_KERNEL_FUNCTIONS -from sklearn.metrics.pairwise import pairwise_kernels as skl_pairwise_kernels -import pytest -import math -import inspect -from sklearn.kernel_ridge import KernelRidge as sklKernelRidge -from hypothesis import given, settings, assume, strategies as st -from hypothesis.extra.numpy import arrays from cuml.testing.utils import as_type +from hypothesis.extra.numpy import arrays +from hypothesis import given, settings, assume, strategies as st +from sklearn.kernel_ridge import KernelRidge as sklKernelRidge +import inspect +import math +import pytest +from sklearn.metrics.pairwise import pairwise_kernels as skl_pairwise_kernels +from cuml.metrics import pairwise_kernels, PAIRWISE_KERNEL_FUNCTIONS +from cuml import KernelRidge as cuKernelRidge +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +linalg = gpu_only_import_from('cupy', 'linalg') +np = cpu_only_import('numpy') +cuda = gpu_only_import_from('numba', 'cuda') def gradient_norm(model, X, y, K, sw=None): diff --git a/python/cuml/tests/test_kmeans.py b/python/cuml/tests/test_kmeans.py index 01a26165b1..833a92bf3f 100644 --- a/python/cuml/tests/test_kmeans.py +++ b/python/cuml/tests/test_kmeans.py @@ -13,23 +13,23 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import adjusted_rand_score +from sklearn import cluster +from cuml.testing.utils import get_pattern, unit_param, \ + quality_param, stress_param, array_equal +from cuml.datasets import make_blobs +import pytest import random import cuml import cuml.internals.logger as logger -import numpy as np -import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from cuml.datasets import make_blobs - -from cuml.testing.utils import get_pattern, unit_param, \ - quality_param, stress_param, array_equal - -from sklearn import cluster -from sklearn.metrics import adjusted_rand_score -from sklearn.preprocessing import StandardScaler -import cupy as cp +cp = gpu_only_import('cupy') dataset_names = ['blobs', 'noisy_circles', 'noisy_moons', 'varied', 'aniso'] diff --git a/python/cuml/tests/test_kneighbors_classifier.py b/python/cuml/tests/test_kneighbors_classifier.py index 8627139fc9..453c72c3bd 100644 --- a/python/cuml/tests/test_kneighbors_classifier.py +++ b/python/cuml/tests/test_kneighbors_classifier.py @@ -14,20 +14,22 @@ # limitations under the License. # +from cuml.testing.utils import array_equal +from cuml.internals.safe_imports import cpu_only_import +from sklearn.datasets import make_blobs +from sklearn.neighbors import KNeighborsClassifier as skKNN +from cuml.neighbors import KNeighborsClassifier as cuKNN +import cuml import pytest -import cudf +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') -import cuml -from cuml.neighbors import KNeighborsClassifier as cuKNN -from sklearn.neighbors import KNeighborsClassifier as skKNN -from sklearn.datasets import make_blobs -import numpy as np -from cuml.testing.utils import array_equal +np = cpu_only_import('numpy') -import pandas as pd -import cupy as cp +pd = cpu_only_import('pandas') +cp = gpu_only_import('cupy') def _build_train_test_data(X, y, datatype, train_ratio=0.9): diff --git a/python/cuml/tests/test_kneighbors_regressor.py b/python/cuml/tests/test_kneighbors_regressor.py index 0f04764414..346758383c 100644 --- a/python/cuml/tests/test_kneighbors_regressor.py +++ b/python/cuml/tests/test_kneighbors_regressor.py @@ -14,23 +14,26 @@ # limitations under the License. # +from cuml.testing.utils import array_equal +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import cpu_only_import_from +from sklearn.model_selection import train_test_split +from sklearn.utils.validation import check_random_state +from sklearn.datasets import make_blobs +from cuml.neighbors import KNeighborsRegressor as cuKNN import pytest -import cudf +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') -from cuml.neighbors import KNeighborsRegressor as cuKNN -from sklearn.datasets import make_blobs +assert_array_almost_equal = cpu_only_import_from( + 'numpy.testing', 'assert_array_almost_equal') -from sklearn.utils.validation import check_random_state -from sklearn.model_selection import train_test_split -from numpy.testing import assert_array_almost_equal +np = cpu_only_import('numpy') -import numpy as np - -from cuml.testing.utils import array_equal -import cupy as cp +cp = gpu_only_import('cupy') def test_kneighbors_regressor(n_samples=40, diff --git a/python/cuml/tests/test_label_binarizer.py b/python/cuml/tests/test_label_binarizer.py index 522e677859..a38890ff66 100644 --- a/python/cuml/tests/test_label_binarizer.py +++ b/python/cuml/tests/test_label_binarizer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.preprocessing import LabelBinarizer from cuml.testing.utils import array_equal @@ -19,8 +20,9 @@ from sklearn.preprocessing import LabelBinarizer as skLB -import numpy as np -import cupy as cp +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.parametrize( diff --git a/python/cuml/tests/test_label_encoder.py b/python/cuml/tests/test_label_encoder.py index 6662e9d1f7..82fff7aaae 100644 --- a/python/cuml/tests/test_label_encoder.py +++ b/python/cuml/tests/test_label_encoder.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cuml.preprocessing.LabelEncoder import LabelEncoder -import cudf -import numpy as np -import cupy as cp - -import pytest from cuml.common.exceptions import NotFittedError +import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.preprocessing.LabelEncoder import LabelEncoder +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') def _df_to_similarity_mat(df): @@ -82,23 +83,23 @@ def test_labelencoder_unfitted(): @pytest.mark.parametrize("use_fit_transform", [False, True]) @pytest.mark.parametrize( - "orig_label, ord_label, expected_reverted, bad_ord_label", - [(cudf.Series(['a', 'b', 'c']), - cudf.Series([2, 1, 2, 0]), - cudf.Series(['c', 'b', 'c', 'a']), - cudf.Series([-1, 1, 2, 0])), - (cudf.Series(['Tokyo', 'Paris', 'Austin']), - cudf.Series([0, 2, 0]), - cudf.Series(['Austin', 'Tokyo', 'Austin']), - cudf.Series([0, 1, 2, 3])), - (cudf.Series(['a', 'b', 'c1']), - cudf.Series([2, 1]), - cudf.Series(['c1', 'b']), - cudf.Series([0, 1, 2, 3])), - (cudf.Series(['1.09', '0.09', '.09', '09']), - cudf.Series([0, 1, 2, 3]), - cudf.Series(['.09', '0.09', '09', '1.09']), - cudf.Series([0, 1, 2, 3, 4]))]) + "orig_label, ord_label, expected_reverted, bad_ord_label", + [(cudf.Series(['a', 'b', 'c']), + cudf.Series([2, 1, 2, 0]), + cudf.Series(['c', 'b', 'c', 'a']), + cudf.Series([-1, 1, 2, 0])), + (cudf.Series(['Tokyo', 'Paris', 'Austin']), + cudf.Series([0, 2, 0]), + cudf.Series(['Austin', 'Tokyo', 'Austin']), + cudf.Series([0, 1, 2, 3])), + (cudf.Series(['a', 'b', 'c1']), + cudf.Series([2, 1]), + cudf.Series(['c1', 'b']), + cudf.Series([0, 1, 2, 3])), + (cudf.Series(['1.09', '0.09', '.09', '09']), + cudf.Series([0, 1, 2, 3]), + cudf.Series(['.09', '0.09', '09', '1.09']), + cudf.Series([0, 1, 2, 3, 4]))]) def test_inverse_transform(orig_label, ord_label, expected_reverted, bad_ord_label, use_fit_transform): @@ -195,15 +196,15 @@ def test_labelencoder_fit_transform_cupy_numpy(length, cardinality, dtype): @pytest.mark.parametrize("use_fit_transform", [False, True]) @pytest.mark.parametrize( - "orig_label, ord_label, expected_reverted, bad_ord_label", - [(cp.array([7, 5, 3, 1]), - cp.array([2, 1, 2, 3, 0]), - cp.array([5, 3, 5, 7, 1]), - cp.array([0, 1, 2, 3, 4])), - (np.array([1.09, .09, .09, .09]), - np.array([1, 1, 0, 0, 1]), - cp.array([1.09, 1.09, .09, .09, 1.09]), - np.array([0, 1, 1, 1, 2]))]) + "orig_label, ord_label, expected_reverted, bad_ord_label", + [(cp.array([7, 5, 3, 1]), + cp.array([2, 1, 2, 3, 0]), + cp.array([5, 3, 5, 7, 1]), + cp.array([0, 1, 2, 3, 4])), + (np.array([1.09, .09, .09, .09]), + np.array([1, 1, 0, 0, 1]), + cp.array([1.09, 1.09, .09, .09, 1.09]), + np.array([0, 1, 1, 1, 2]))]) def test_inverse_transform_cupy_numpy(orig_label, ord_label, expected_reverted, bad_ord_label, diff --git a/python/cuml/tests/test_lars.py b/python/cuml/tests/test_lars.py index 011fef07bf..d72846b38e 100644 --- a/python/cuml/tests/test_lars.py +++ b/python/cuml/tests/test_lars.py @@ -12,21 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cupy as cp -import numpy as np -import pytest -import sys - -from cuml.experimental.linear_model import Lars as cuLars +from sklearn.linear_model import Lars as skLars +from sklearn.datasets import load_boston from cuml.testing.utils import ( array_equal, unit_param, quality_param, stress_param ) +from cuml.experimental.linear_model import Lars as cuLars +import sys +import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from sklearn.datasets import load_boston -from sklearn.linear_model import Lars as skLars # As tests directory is not a module, we need to add it to the path sys.path.insert(0, '.') diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index 695b7e10e9..b50177a5a1 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -12,31 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from functools import lru_cache -import cupy as cp -import numpy as np -import pytest -from hypothesis import ( - assume, - example, - given, - settings, - strategies as st, - target -) -from hypothesis.extra.numpy import floating_dtypes -from distutils.version import LooseVersion -import cudf -from cuml import ElasticNet as cuElasticNet -from cuml import LinearRegression as cuLinearRegression -from cuml import LogisticRegression as cuLog -from cuml import Ridge as cuRidge -from cuml.internals.array import elements_in_representable_range -from cuml.testing.strategies import ( - regression_datasets, - split_datasets, - standard_regression_datasets, -) +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LogisticRegression as skLog +from sklearn.linear_model import Ridge as skRidge +from sklearn.linear_model import LinearRegression as skLinearRegression +from sklearn.datasets import load_breast_cancer +from sklearn.datasets import make_regression, make_classification, load_digits +import sklearn +from cuml.internals.safe_imports import cpu_only_import_from from cuml.testing.utils import ( array_difference, array_equal, @@ -46,17 +29,36 @@ quality_param, stress_param, ) -import rmm - -from scipy.sparse import csr_matrix +from cuml.testing.strategies import ( + regression_datasets, + split_datasets, + standard_regression_datasets, +) +from cuml.internals.array import elements_in_representable_range +from cuml import Ridge as cuRidge +from cuml import LogisticRegression as cuLog +from cuml import LinearRegression as cuLinearRegression +from cuml import ElasticNet as cuElasticNet +from distutils.version import LooseVersion +from hypothesis.extra.numpy import floating_dtypes +from hypothesis import ( + assume, + example, + given, + settings, + strategies as st, + target +) +import pytest +from cuml.internals.safe_imports import cpu_only_import +from functools import lru_cache +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') +rmm = gpu_only_import('rmm') -import sklearn -from sklearn.datasets import make_regression, make_classification, load_digits -from sklearn.datasets import load_breast_cancer -from sklearn.linear_model import LinearRegression as skLinearRegression -from sklearn.linear_model import Ridge as skRidge -from sklearn.linear_model import LogisticRegression as skLog -from sklearn.model_selection import train_test_split +csr_matrix = cpu_only_import_from('scipy.sparse', 'csr_matrix') pytestmark = pytest.mark.filterwarnings("ignore: Regressors in active " diff --git a/python/cuml/tests/test_linear_svm.py b/python/cuml/tests/test_linear_svm.py index 221789d895..ff516c68b6 100644 --- a/python/cuml/tests/test_linear_svm.py +++ b/python/cuml/tests/test_linear_svm.py @@ -13,22 +13,23 @@ # limitations under the License. # +import cuml.internals.logger as logger +import cuml +import cuml.svm as cu +import sklearn.svm as sk +from cuml.testing.utils import unit_param, quality_param, stress_param +from queue import Empty +import cuml.model_selection as dsel +import cuml.datasets as data +import pytest +from cuml.internals.safe_imports import cpu_only_import import gc import multiprocessing as mp import time import math -import cupy as cp -import numpy as np -import pytest -import cuml.datasets as data -import cuml.model_selection as dsel -from queue import Empty -from cuml.testing.utils import unit_param, quality_param, stress_param - -import sklearn.svm as sk -import cuml.svm as cu -import cuml -import cuml.internals.logger as logger +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') SEED = 42 @@ -104,22 +105,22 @@ def make_classification_dataset(datatype, nrows, ncols, nclasses): max(nclasses * 2, math.ceil(ncols / 10)))) n_clusters_per_class = min( 2, max(1, int(2**n_real_features / nclasses)) - ) + ) n_redundant = min( ncols - n_real_features, max(2, math.ceil(ncols / 20))) try: X, y = data.make_classification( - dtype=datatype, - n_samples=nrows + 1000, - n_features=ncols, - random_state=SEED, - class_sep=1.0, - n_informative=n_real_features, - n_clusters_per_class=n_clusters_per_class, - n_redundant=n_redundant, - n_classes=nclasses - ) + dtype=datatype, + n_samples=nrows + 1000, + n_features=ncols, + random_state=SEED, + class_sep=1.0, + n_informative=n_real_features, + n_clusters_per_class=n_clusters_per_class, + n_redundant=n_redundant, + n_classes=nclasses + ) r = dsel.train_test_split(X, y, random_state=SEED, train_size=nrows) diff --git a/python/cuml/tests/test_make_blobs.py b/python/cuml/tests/test_make_blobs.py index 19d6036d00..c7171bb346 100644 --- a/python/cuml/tests/test_make_blobs.py +++ b/python/cuml/tests/test_make_blobs.py @@ -15,7 +15,8 @@ import cuml import pytest -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') # Testing parameters for scalar parameter tests diff --git a/python/cuml/tests/test_make_classification.py b/python/cuml/tests/test_make_classification.py index 6f06086ba1..0fc402ef92 100644 --- a/python/cuml/tests/test_make_classification.py +++ b/python/cuml/tests/test_make_classification.py @@ -14,13 +14,14 @@ # limitations under the License. # +from cuml.testing.utils import array_equal +from cuml.datasets.classification import make_classification +from cuml.internals.safe_imports import gpu_only_import import pytest from functools import partial -import numpy as np -import cupy as cp - -from cuml.datasets.classification import make_classification -from cuml.testing.utils import array_equal +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.mark.parametrize('n_samples', [500, 1000]) diff --git a/python/cuml/tests/test_mbsgd_classifier.py b/python/cuml/tests/test_mbsgd_classifier.py index 0fda4b7724..ee168bb799 100644 --- a/python/cuml/tests/test_mbsgd_classifier.py +++ b/python/cuml/tests/test_mbsgd_classifier.py @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest -import cupy as cp - -from cuml.linear_model import MBSGDClassifier as cumlMBSGClassifier -from cuml.testing.utils import unit_param, quality_param, stress_param - -from sklearn.linear_model import SGDClassifier -from cuml.datasets import make_classification -from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score +from cuml.datasets import make_classification +from sklearn.linear_model import SGDClassifier +from cuml.testing.utils import unit_param, quality_param, stress_param +from cuml.linear_model import MBSGDClassifier as cumlMBSGClassifier +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.fixture(scope="module", params=[ diff --git a/python/cuml/tests/test_mbsgd_regressor.py b/python/cuml/tests/test_mbsgd_regressor.py index 6787f294f3..e1efe6299a 100644 --- a/python/cuml/tests/test_mbsgd_regressor.py +++ b/python/cuml/tests/test_mbsgd_regressor.py @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest -import cupy as cp - -from cuml.linear_model import MBSGDRegressor as cumlMBSGRegressor -from cuml.metrics import r2_score -from cuml.testing.utils import unit_param, quality_param, stress_param - -from sklearn.linear_model import SGDRegressor -from cuml.datasets import make_regression from sklearn.model_selection import train_test_split +from cuml.datasets import make_regression +from sklearn.linear_model import SGDRegressor +from cuml.testing.utils import unit_param, quality_param, stress_param +from cuml.metrics import r2_score +from cuml.linear_model import MBSGDRegressor as cumlMBSGRegressor +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') @pytest.fixture(scope="module", params=[ diff --git a/python/cuml/tests/test_meta_estimators.py b/python/cuml/tests/test_meta_estimators.py index 58b1f392b5..2a36b7374a 100644 --- a/python/cuml/tests/test_meta_estimators.py +++ b/python/cuml/tests/test_meta_estimators.py @@ -14,21 +14,18 @@ # limitations under the License. # +from cuml.svm import SVC +from cuml.preprocessing import StandardScaler +from sklearn.datasets import load_iris +from cuml.model_selection import train_test_split +from cuml.datasets import make_regression, make_classification +from cuml.testing.utils import ClassEnumerator +from cuml.model_selection import GridSearchCV +from cuml.pipeline import Pipeline, make_pipeline import pytest import cuml -import cupy - -from cuml.pipeline import Pipeline, make_pipeline -from cuml.model_selection import GridSearchCV - -from cuml.testing.utils import ClassEnumerator - -from cuml.datasets import make_regression, make_classification -from cuml.model_selection import train_test_split -from sklearn.datasets import load_iris - -from cuml.preprocessing import StandardScaler -from cuml.svm import SVC +from cuml.internals.safe_imports import gpu_only_import +cupy = gpu_only_import('cupy') def test_pipeline(): diff --git a/python/cuml/tests/test_metrics.py b/python/cuml/tests/test_metrics.py index f34c39e75b..28d5a26f1b 100644 --- a/python/cuml/tests/test_metrics.py +++ b/python/cuml/tests/test_metrics.py @@ -14,6 +14,56 @@ # limitations under the License. # +import platform +from cuml.metrics.cluster import v_measure_score +from sklearn.metrics.cluster import v_measure_score as sklearn_v_measure_score +from scipy.special import rel_entr as scipy_kl_divergence +from sklearn.metrics import pairwise_distances as sklearn_pairwise_distances +from cuml.metrics import pairwise_distances, sparse_pairwise_distances, \ + PAIRWISE_DISTANCE_METRICS, PAIRWISE_DISTANCE_SPARSE_METRICS +from sklearn.metrics import precision_recall_curve \ + as sklearn_precision_recall_curve +from sklearn.metrics import roc_auc_score as sklearn_roc_auc_score +from cuml.metrics import log_loss +from cuml.metrics import precision_recall_curve +from cuml.metrics import roc_auc_score +from cuml.common.sparsefuncs import csr_row_normalize_l1 +from cuml.common import has_scipy +from sklearn.metrics import mean_squared_log_error as sklearn_msle +from sklearn.metrics import mean_absolute_error as sklearn_mae +from cuml.metrics import confusion_matrix +from sklearn.metrics import confusion_matrix as sk_confusion_matrix +from sklearn.metrics import mean_squared_error as sklearn_mse +from cuml.metrics.regression import mean_squared_error, \ + mean_squared_log_error, mean_absolute_error +from cuml.model_selection import train_test_split +from cuml.metrics.cluster import entropy +from cuml.metrics import kl_divergence as cu_kl_divergence +from cuml.metrics import hinge_loss as cuml_hinge +from cuml import LogisticRegression as cu_log +from sklearn import preprocessing +from sklearn.preprocessing import StandardScaler +from sklearn.metrics.cluster import silhouette_samples as sk_silhouette_samples +from sklearn.metrics.cluster import silhouette_score as sk_silhouette_score +from sklearn.metrics.cluster import mutual_info_score as sk_mutual_info_score +from sklearn.metrics.cluster import completeness_score as sk_completeness_score +from sklearn.metrics.cluster import homogeneity_score as sk_homogeneity_score +from sklearn.metrics.cluster import adjusted_rand_score as sk_ars +from sklearn.metrics import log_loss as sklearn_log_loss +from sklearn.metrics import accuracy_score as sk_acc_score +from sklearn.datasets import make_classification, make_blobs +from sklearn.metrics import hinge_loss as sk_hinge +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.testing.utils import get_handle, get_pattern, array_equal, \ + unit_param, quality_param, stress_param, generate_random_labels, \ + score_labeling_with_handle +from cuml.metrics.cluster import silhouette_samples as cu_silhouette_samples +from cuml.metrics.cluster import silhouette_score as cu_silhouette_score +from cuml.metrics import accuracy_score as cu_acc_score +from cuml.metrics.cluster import adjusted_rand_score as cu_ars +from cuml.ensemble import RandomForestClassifier as curfc +from cuml.internals.safe_imports import cpu_only_import import pytest import random @@ -22,69 +72,20 @@ import cuml import cuml.internals.logger as logger -import cupy as cp -import cupyx -import numpy as np -import cudf - -from cuml.ensemble import RandomForestClassifier as curfc -from cuml.metrics.cluster import adjusted_rand_score as cu_ars -from cuml.metrics import accuracy_score as cu_acc_score -from cuml.metrics.cluster import silhouette_score as cu_silhouette_score -from cuml.metrics.cluster import silhouette_samples as cu_silhouette_samples -from cuml.testing.utils import get_handle, get_pattern, array_equal, \ - unit_param, quality_param, stress_param, generate_random_labels, \ - score_labeling_with_handle +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +np = cpu_only_import('numpy') +cudf = gpu_only_import('cudf') -from numba import cuda -from numpy.testing import assert_almost_equal -from sklearn.metrics import hinge_loss as sk_hinge -from sklearn.datasets import make_classification, make_blobs -from sklearn.metrics import accuracy_score as sk_acc_score -from sklearn.metrics import log_loss as sklearn_log_loss -from sklearn.metrics.cluster import adjusted_rand_score as sk_ars -from sklearn.metrics.cluster import homogeneity_score as sk_homogeneity_score -from sklearn.metrics.cluster import completeness_score as sk_completeness_score -from sklearn.metrics.cluster import mutual_info_score as sk_mutual_info_score -from sklearn.metrics.cluster import silhouette_score as sk_silhouette_score -from sklearn.metrics.cluster import silhouette_samples as sk_silhouette_samples -from sklearn.preprocessing import StandardScaler -from sklearn import preprocessing +cuda = gpu_only_import_from('numba', 'cuda') +assert_almost_equal = cpu_only_import_from( + 'numpy.testing', 'assert_almost_equal') -from cuml import LogisticRegression as cu_log -from cuml.metrics import hinge_loss as cuml_hinge -from cuml.metrics import kl_divergence as cu_kl_divergence -from cuml.metrics.cluster import entropy -from cuml.model_selection import train_test_split -from cuml.metrics.regression import mean_squared_error, \ - mean_squared_log_error, mean_absolute_error -from sklearn.metrics import mean_squared_error as sklearn_mse -from sklearn.metrics import confusion_matrix as sk_confusion_matrix - -from cuml.metrics import confusion_matrix -from sklearn.metrics import mean_absolute_error as sklearn_mae -from sklearn.metrics import mean_squared_log_error as sklearn_msle - -from cuml.common import has_scipy -from cuml.common.sparsefuncs import csr_row_normalize_l1 - -from cuml.metrics import roc_auc_score -from cuml.metrics import precision_recall_curve -from cuml.metrics import log_loss -from sklearn.metrics import roc_auc_score as sklearn_roc_auc_score -from sklearn.metrics import precision_recall_curve \ - as sklearn_precision_recall_curve -from cuml.metrics import pairwise_distances, sparse_pairwise_distances, \ - PAIRWISE_DISTANCE_METRICS, PAIRWISE_DISTANCE_SPARSE_METRICS -from sklearn.metrics import pairwise_distances as sklearn_pairwise_distances -from scipy.spatial import distance as scipy_pairwise_distances -from scipy.special import rel_entr as scipy_kl_divergence -from sklearn.metrics.cluster import v_measure_score as sklearn_v_measure_score -from cuml.metrics.cluster import v_measure_score +scipy_pairwise_distances = cpu_only_import_from('scipy.spatial', 'distance') -import platform IS_ARM = platform.processor() == "aarch64" diff --git a/python/cuml/tests/test_module_config.py b/python/cuml/tests/test_module_config.py index 9eb5f675b8..77fc0c08fa 100644 --- a/python/cuml/tests/test_module_config.py +++ b/python/cuml/tests/test_module_config.py @@ -14,15 +14,16 @@ # limitations under the License. # -import pytest - -import cudf +from numba.cuda import is_cuda_array, as_cuda_array +from cuml.internals.safe_imports import cpu_only_import import cuml -import cupy as cp -import numpy as np -import pandas as pd +import pytest -from numba.cuda import is_cuda_array, as_cuda_array +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') ############################################################################### diff --git a/python/cuml/tests/test_multiclass.py b/python/cuml/tests/test_multiclass.py index ebd8f44081..ad67fdcaa7 100644 --- a/python/cuml/tests/test_multiclass.py +++ b/python/cuml/tests/test_multiclass.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import numpy as np -import pytest +from cuml import LogisticRegression as cuLog +from cuml import multiclass as cu_multiclass import sys +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from cuml import multiclass as cu_multiclass -from cuml import LogisticRegression as cuLog # As tests directory is not a module, we need to add it to the path sys.path.insert(0, '.') diff --git a/python/cuml/tests/test_naive_bayes.py b/python/cuml/tests/test_naive_bayes.py index cbd4c3346a..37886544bc 100644 --- a/python/cuml/tests/test_naive_bayes.py +++ b/python/cuml/tests/test_naive_bayes.py @@ -14,30 +14,29 @@ # limitations under the License. # -import cupy as cp - -import pytest - -from sklearn.metrics import accuracy_score -from cuml.naive_bayes import MultinomialNB -from cuml.naive_bayes import BernoulliNB -from cuml.naive_bayes import CategoricalNB -from cuml.naive_bayes import ComplementNB -from cuml.naive_bayes import GaussianNB -from cuml.internals.input_utils import sparse_scipy_to_cp -from cuml.datasets import make_classification - -from numpy.testing import assert_allclose, assert_array_equal -from numpy.testing import assert_array_almost_equal, assert_raises -from sklearn.naive_bayes import MultinomialNB as skNB -from sklearn.naive_bayes import BernoulliNB as skBNB -from sklearn.naive_bayes import CategoricalNB as skCNB -from sklearn.naive_bayes import ComplementNB as skComplementNB +from cuml.internals.safe_imports import cpu_only_import +import math from sklearn.naive_bayes import GaussianNB as skGNB +from sklearn.naive_bayes import ComplementNB as skComplementNB +from sklearn.naive_bayes import CategoricalNB as skCNB +from sklearn.naive_bayes import BernoulliNB as skBNB +from sklearn.naive_bayes import MultinomialNB as skNB +from numpy.testing import assert_array_almost_equal, assert_raises +from numpy.testing import assert_allclose, assert_array_equal +from cuml.datasets import make_classification +from cuml.internals.input_utils import sparse_scipy_to_cp +from cuml.naive_bayes import GaussianNB +from cuml.naive_bayes import ComplementNB +from cuml.naive_bayes import CategoricalNB +from cuml.naive_bayes import BernoulliNB +from cuml.naive_bayes import MultinomialNB +from sklearn.metrics import accuracy_score +import pytest +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') -import math -import numpy as np +np = cpu_only_import('numpy') @pytest.mark.parametrize("x_dtype", [cp.int32, cp.int64]) diff --git a/python/cuml/tests/test_nearest_neighbors.py b/python/cuml/tests/test_nearest_neighbors.py index 68c04f1b94..8c31ea328c 100644 --- a/python/cuml/tests/test_nearest_neighbors.py +++ b/python/cuml/tests/test_nearest_neighbors.py @@ -14,6 +14,13 @@ # limitations under the License. # +import gc +from cuml.common import has_scipy +import cuml +import sklearn +from cuml.internals.safe_imports import cpu_only_import_from +from numpy.testing import assert_array_equal, assert_allclose +from cuml.internals.safe_imports import cpu_only_import import pytest import math @@ -29,18 +36,13 @@ from cuml.internals import logger -import cupy as cp -import cupyx -import cudf -import pandas as pd -import numpy as np -from numpy.testing import assert_array_equal, assert_allclose -from scipy.sparse import isspmatrix_csr - -import sklearn -import cuml -from cuml.common import has_scipy -import gc +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') +cudf = gpu_only_import('cudf') +pd = cpu_only_import('pandas') +np = cpu_only_import('numpy') +isspmatrix_csr = cpu_only_import_from('scipy.sparse', 'isspmatrix_csr') pytestmark = pytest.mark.filterwarnings("ignore:((.|\n)*)#4020((.|\n)*):" @@ -200,7 +202,7 @@ def test_neighborhood_predictions(nrows, ncols, n_neighbors, n_clusters, (4, 10000, 128, 8), (8, 100, 512, 8), (8, 10000, 512, 16), - ]) +]) def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist): algo_params = { 'nlist': nlist, @@ -285,9 +287,9 @@ def test_ivfsq_pred(qtype, encodeResidual, nrows, ncols, n_neighbors, nlist): @pytest.mark.parametrize("algo", ["brute", "ivfflat", "ivfpq", "ivfsq"]) @pytest.mark.parametrize("metric", [ - "l2", "euclidean", "sqeuclidean", - "cosine", "correlation" - ]) + "l2", "euclidean", "sqeuclidean", + "cosine", "correlation" +]) def test_ann_distances_metrics(algo, metric): X, y = make_blobs(n_samples=500, centers=2, n_features=128, random_state=0) @@ -475,7 +477,7 @@ def test_nn_downcast_fails(input_type, nrows, n_feats): ("dataframe", "distance", "numpy", True), ("ndarray", "connectivity", "cupy", False), ("ndarray", "distance", "numpy", False), - ]) +]) @pytest.mark.parametrize('nrows', [unit_param(100), stress_param(1000)]) @pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)]) @pytest.mark.parametrize("p", [2, 5]) diff --git a/python/cuml/tests/test_one_hot_encoder.py b/python/cuml/tests/test_one_hot_encoder.py index 50ff016e8b..738987ad81 100644 --- a/python/cuml/tests/test_one_hot_encoder.py +++ b/python/cuml/tests/test_one_hot_encoder.py @@ -11,20 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math - -import cupy as cp -import numpy as np -import pandas as pd -import pytest -from cudf import DataFrame -from cuml.preprocessing import OneHotEncoder +from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder from cuml.testing.utils import \ stress_param, \ from_df_to_numpy, \ assert_inverse_equal, \ generate_inputs_from_categories -from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder +from cuml.preprocessing import OneHotEncoder +from cuml.internals.safe_imports import gpu_only_import_from +import pytest +from cuml.internals.safe_imports import cpu_only_import +import math + +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') +DataFrame = gpu_only_import_from('cudf', 'DataFrame') def _from_df_to_cupy(df): diff --git a/python/cuml/tests/test_pca.py b/python/cuml/tests/test_pca.py index 60c517f927..4d504a2378 100644 --- a/python/cuml/tests/test_pca.py +++ b/python/cuml/tests/test_pca.py @@ -13,20 +13,20 @@ # limitations under the License. # -import numpy as np -import cupy as cp -import cupyx -import pytest - -from cuml import PCA as cuPCA +from cuml.common.exceptions import NotFittedError +from sklearn.datasets import make_blobs +from sklearn.decomposition import PCA as skPCA +from sklearn.datasets import make_multilabel_classification +from sklearn import datasets from cuml.testing.utils import get_handle, array_equal, unit_param, \ quality_param, stress_param - -from sklearn import datasets -from sklearn.datasets import make_multilabel_classification -from sklearn.decomposition import PCA as skPCA -from sklearn.datasets import make_blobs -from cuml.common.exceptions import NotFittedError +from cuml import PCA as cuPCA +import pytest +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @pytest.mark.parametrize('datatype', [np.float32, np.float64]) diff --git a/python/cuml/tests/test_pickle.py b/python/cuml/tests/test_pickle.py index e1d80d329a..9ecc3826e5 100644 --- a/python/cuml/tests/test_pickle.py +++ b/python/cuml/tests/test_pickle.py @@ -13,20 +13,19 @@ # limitations under the License. # -import cuml -import numpy as np -import pickle -import pytest - -from cuml.tsa.arima import ARIMA +from sklearn.model_selection import train_test_split +from sklearn.manifold import trustworthiness +from sklearn.datasets import load_iris, make_classification, make_regression +from sklearn.base import clone from cuml.testing.utils import array_equal, unit_param, stress_param, \ ClassEnumerator, get_classes_from_package, compare_svm, \ compare_probabilistic_svm - -from sklearn.base import clone -from sklearn.datasets import load_iris, make_classification, make_regression -from sklearn.manifold import trustworthiness -from sklearn.model_selection import train_test_split +from cuml.tsa.arima import ARIMA +import pytest +import pickle +import cuml +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') regression_config = ClassEnumerator(module=cuml.linear_model) diff --git a/python/cuml/tests/test_preprocessing.py b/python/cuml/tests/test_preprocessing.py index 5f571d8fd3..cfcf749361 100644 --- a/python/cuml/tests/test_preprocessing.py +++ b/python/cuml/tests/test_preprocessing.py @@ -13,6 +13,7 @@ # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import import pytest from cuml.preprocessing import \ @@ -79,10 +80,11 @@ from cuml.testing.test_preproc_utils import assert_allclose from cuml.metrics import pairwise_kernels -import numpy as np -import cupy as cp -import cupyx as cpx -import scipy +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cpx = gpu_only_import('cupyx') +scipy = cpu_only_import('scipy') @pytest.mark.parametrize("feature_range", [(0, 1), (.1, 0.8)]) diff --git a/python/cuml/tests/test_prims.py b/python/cuml/tests/test_prims.py index fdc57502ff..d4d7378502 100644 --- a/python/cuml/tests/test_prims.py +++ b/python/cuml/tests/test_prims.py @@ -13,6 +13,7 @@ # limitations under the License. # +from cuml.internals.safe_imports import cpu_only_import from cuml.prims.label import make_monotonic from cuml.prims.label import invert_labels from cuml.prims.label import check_labels @@ -21,8 +22,9 @@ import pytest -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') @pytest.mark.parametrize("arr_type", ["np", "cp"]) diff --git a/python/cuml/tests/test_qn.py b/python/cuml/tests/test_qn.py index 4935510eb3..de9e8e8578 100644 --- a/python/cuml/tests/test_qn.py +++ b/python/cuml/tests/test_qn.py @@ -13,14 +13,15 @@ # limitations under the License. # -import pytest -import numpy as np -import cupy as cp - -from cuml.solvers import QN as cuQN -from cuml.model_selection import train_test_split -from cuml.datasets.classification import make_classification from cuml.metrics import accuracy_score +from cuml.datasets.classification import make_classification +from cuml.model_selection import train_test_split +from cuml.solvers import QN as cuQN +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') # todo: add util functions to better compare against precomputed solutions @@ -430,16 +431,16 @@ def test_qn(loss, dtype, penalty, l1_strength, l2_strength, fit_intercept): precomputed_X = [ - [-0.2047076594847130, 0.4789433380575482], - [-0.5194387150567381, -0.5557303043474900], - [1.9657805725027142, 1.3934058329729904], - [0.0929078767437177, 0.2817461528302025], - [0.7690225676118387, 1.2464347363862822], - [1.0071893575830049, -1.2962211091122635], - [0.2749916334321240, 0.2289128789353159], - [1.3529168351654497, 0.8864293405915888], - [-2.0016373096603974, -0.3718425371402544], - [1.6690253095248706, -0.4385697358355719]] + [-0.2047076594847130, 0.4789433380575482], + [-0.5194387150567381, -0.5557303043474900], + [1.9657805725027142, 1.3934058329729904], + [0.0929078767437177, 0.2817461528302025], + [0.7690225676118387, 1.2464347363862822], + [1.0071893575830049, -1.2962211091122635], + [0.2749916334321240, 0.2289128789353159], + [1.3529168351654497, 0.8864293405915888], + [-2.0016373096603974, -0.3718425371402544], + [1.6690253095248706, -0.4385697358355719]] precomputed_y_log = [1, 1, 1, 0, 1, 0, 1, 0, 1, 0] diff --git a/python/cuml/tests/test_random_forest.py b/python/cuml/tests/test_random_forest.py index f7377e5e71..7864d63087 100644 --- a/python/cuml/tests/test_random_forest.py +++ b/python/cuml/tests/test_random_forest.py @@ -13,35 +13,35 @@ # limitations under the License. # -import pytest - -import warnings -import cudf -import numpy as np -import random -import json -import os - -from numba import cuda - -import cuml -from cuml.ensemble import RandomForestClassifier as curfc -from cuml.ensemble import RandomForestRegressor as curfr -from cuml.metrics import r2_score -from cuml.testing.utils import get_handle, unit_param, quality_param, \ - stress_param -import cuml.internals.logger as logger - -from sklearn.ensemble import RandomForestClassifier as skrfc -from sklearn.ensemble import RandomForestRegressor as skrfr -from sklearn.metrics import accuracy_score, mean_squared_error, \ - mean_tweedie_deviance +import treelite +from sklearn.model_selection import train_test_split from sklearn.datasets import fetch_california_housing, \ make_classification, make_regression, load_iris, load_breast_cancer, \ load_boston -from sklearn.model_selection import train_test_split +from sklearn.metrics import accuracy_score, mean_squared_error, \ + mean_tweedie_deviance +from sklearn.ensemble import RandomForestRegressor as skrfr +from sklearn.ensemble import RandomForestClassifier as skrfc +import cuml.internals.logger as logger +from cuml.testing.utils import get_handle, unit_param, quality_param, \ + stress_param +from cuml.metrics import r2_score +from cuml.ensemble import RandomForestRegressor as curfr +from cuml.ensemble import RandomForestClassifier as curfc +import cuml +from cuml.internals.safe_imports import gpu_only_import_from +import os +import json +import random +from cuml.internals.safe_imports import cpu_only_import +import pytest -import treelite +import warnings +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') + +cuda = gpu_only_import_from('numba', 'cuda') pytestmark = pytest.mark.filterwarnings("ignore: For reproducible results(.*)" @@ -312,7 +312,7 @@ def test_rf_classification(small_clf, datatype, max_samples, max_features): ) @pytest.mark.parametrize("datatype", [np.float32, np.float64]) def test_rf_classification_unorder( - small_clf, datatype, max_samples, max_features=1, a=2, b=5): + small_clf, datatype, max_samples, max_features=1, a=2, b=5): use_handle = True X, y = small_clf diff --git a/python/cuml/tests/test_random_projection.py b/python/cuml/tests/test_random_projection.py index 64c43a2ab3..7388ac9ede 100644 --- a/python/cuml/tests/test_random_projection.py +++ b/python/cuml/tests/test_random_projection.py @@ -13,19 +13,17 @@ # limitations under the License. # -import numpy as np -import pytest - -from cuml.random_projection import GaussianRandomProjection, \ - SparseRandomProjection -from cuml.random_projection import johnson_lindenstrauss_min_dim \ - as cuml_johnson_lindenstrauss_min_dim - -from sklearn.random_projection import johnson_lindenstrauss_min_dim \ - as sklearn_johnson_lindenstrauss_min_dim -from sklearn.datasets import make_blobs - from cuml.common import has_scipy +from sklearn.datasets import make_blobs +from sklearn.random_projection import johnson_lindenstrauss_min_dim \ + as sklearn_johnson_lindenstrauss_min_dim +from cuml.random_projection import johnson_lindenstrauss_min_dim \ + as cuml_johnson_lindenstrauss_min_dim +from cuml.random_projection import GaussianRandomProjection, \ + SparseRandomProjection +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @pytest.mark.parametrize('datatype', [np.float32, np.float64]) diff --git a/python/cuml/tests/test_serialize.py b/python/cuml/tests/test_serialize.py index b0e443e1a0..359298e945 100644 --- a/python/cuml/tests/test_serialize.py +++ b/python/cuml/tests/test_serialize.py @@ -13,13 +13,12 @@ # limitations under the License. # -import cupy as cp -import cupyx -import pickle - -from cuml.naive_bayes.naive_bayes import MultinomialNB - from distributed.protocol.serialize import serialize as ser +from cuml.naive_bayes.naive_bayes import MultinomialNB +import pickle +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') def test_naive_bayes_cuda(): diff --git a/python/cuml/tests/test_sgd.py b/python/cuml/tests/test_sgd.py index 3587972765..9fc5a86e27 100644 --- a/python/cuml/tests/test_sgd.py +++ b/python/cuml/tests/test_sgd.py @@ -13,15 +13,15 @@ # limitations under the License. # -import numpy as np -import pytest - -import cudf - +from sklearn.model_selection import train_test_split +from sklearn.datasets import make_blobs from cuml.solvers import SGD as cumlSGD +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from sklearn.datasets import make_blobs -from sklearn.model_selection import train_test_split +cudf = gpu_only_import('cudf') @pytest.mark.parametrize('lrate', ['constant', 'invscaling', 'adaptive']) diff --git a/python/cuml/tests/test_simpl_set.py b/python/cuml/tests/test_simpl_set.py index e7ae152d96..681607180b 100644 --- a/python/cuml/tests/test_simpl_set.py +++ b/python/cuml/tests/test_simpl_set.py @@ -13,20 +13,21 @@ # limitations under the License. # -import pytest -from cuml.datasets import make_blobs -import numpy as np -import cupy as cp -import umap.distances as dist -from cuml.manifold.umap import UMAP -from cuml.neighbors import NearestNeighbors - -from umap.umap_ import fuzzy_simplicial_set as ref_fuzzy_simplicial_set -from cuml.manifold.umap import fuzzy_simplicial_set \ - as cu_fuzzy_simplicial_set -from umap.umap_ import simplicial_set_embedding as ref_simplicial_set_embedding from cuml.manifold.umap import simplicial_set_embedding \ as cu_simplicial_set_embedding +from umap.umap_ import simplicial_set_embedding as ref_simplicial_set_embedding +from cuml.manifold.umap import fuzzy_simplicial_set \ + as cu_fuzzy_simplicial_set +from umap.umap_ import fuzzy_simplicial_set as ref_fuzzy_simplicial_set +from cuml.neighbors import NearestNeighbors +from cuml.manifold.umap import UMAP +import umap.distances as dist +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.datasets import make_blobs +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') def correctness_dense(a, b, rtol=0.1, threshold=0.95): diff --git a/python/cuml/tests/test_sparsefuncs.py b/python/cuml/tests/test_sparsefuncs.py index a9db20375b..6e179f5ba0 100644 --- a/python/cuml/tests/test_sparsefuncs.py +++ b/python/cuml/tests/test_sparsefuncs.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.internals.safe_imports import gpu_only_import from cuml.common.sparsefuncs import csr_row_normalize_l1 from cuml.common.sparsefuncs import csr_row_normalize_l2 from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l1 from sklearn.utils.sparsefuncs_fast import inplace_csr_row_normalize_l2 import pytest -import numpy as np -import scipy.sparse as sp -import cupy as cp -import cupyx +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +sp = cpu_only_import('scipy.sparse') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @pytest.mark.parametrize('norm, ref_norm', [ diff --git a/python/cuml/tests/test_stationarity.py b/python/cuml/tests/test_stationarity.py index 095bc1c3a1..5cf403496c 100644 --- a/python/cuml/tests/test_stationarity.py +++ b/python/cuml/tests/test_stationarity.py @@ -15,13 +15,13 @@ # TODO: update! -import pytest - -import numpy as np +from cuml.tsa import stationarity +from statsmodels.tsa import stattools import warnings +import pytest -from statsmodels.tsa import stattools -from cuml.tsa import stationarity +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') ############################################################################### diff --git a/python/cuml/tests/test_stats.py b/python/cuml/tests/test_stats.py index 7afd2b6e61..19782b9e7c 100644 --- a/python/cuml/tests/test_stats.py +++ b/python/cuml/tests/test_stats.py @@ -13,12 +13,12 @@ # limitations under the License. # -import pytest -import cupy as cp -import cupyx - -from cuml.prims.stats import cov from cuml.testing.utils import array_equal +from cuml.prims.stats import cov +import pytest +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') @pytest.mark.parametrize("nrows", [1000]) diff --git a/python/cuml/tests/test_strategies.py b/python/cuml/tests/test_strategies.py index b019972875..5eef6a737a 100644 --- a/python/cuml/tests/test_strategies.py +++ b/python/cuml/tests/test_strategies.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import cupy as cp -import numpy as np -from cuml.internals.array import CumlArray +from hypothesis.extra.numpy import floating_dtypes +from hypothesis import strategies as st +from hypothesis import given, settings, HealthCheck +from cuml.testing.utils import normalized_shape, series_squeezed_shape from cuml.testing.strategies import ( create_cuml_array_input, cuml_array_dtypes, @@ -27,10 +28,11 @@ standard_datasets, standard_regression_datasets, ) -from cuml.testing.utils import normalized_shape, series_squeezed_shape -from hypothesis import given, settings, HealthCheck -from hypothesis import strategies as st -from hypothesis.extra.numpy import floating_dtypes +from cuml.internals.array import CumlArray +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') @given( diff --git a/python/cuml/tests/test_stratified_kfold.py b/python/cuml/tests/test_stratified_kfold.py index 226cc6e0f2..77e3d1e355 100644 --- a/python/cuml/tests/test_stratified_kfold.py +++ b/python/cuml/tests/test_stratified_kfold.py @@ -13,11 +13,11 @@ # limitations under the License. # -import cudf -import cupy as cp -import pytest - from cuml.model_selection import StratifiedKFold +import pytest +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') def get_x_y(n_samples, n_classes): diff --git a/python/cuml/tests/test_svm.py b/python/cuml/tests/test_svm.py index 8d3d2110d8..3dcb4b8242 100644 --- a/python/cuml/tests/test_svm.py +++ b/python/cuml/tests/test_svm.py @@ -13,28 +13,30 @@ # limitations under the License. # -import pytest -import cupy as cp -import numpy as np -from numba import cuda - -import cuml -import cuml.svm as cu_svm -from cuml.common import input_to_cuml_array +import platform +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error +from sklearn.datasets import make_classification, make_gaussian_quantiles +from sklearn.datasets import make_regression, make_friedman1 +from sklearn.datasets import load_iris, make_blobs +from sklearn import svm from cuml.testing.utils import unit_param, quality_param, stress_param, \ compare_svm, compare_probabilistic_svm, svm_array_equal +from cuml.common import input_to_cuml_array +import cuml.svm as cu_svm +import cuml +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import cpu_only_import +import pytest +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') +cuda = gpu_only_import_from('numba', 'cuda') -from sklearn import svm -from sklearn.datasets import load_iris, make_blobs -from sklearn.datasets import make_regression, make_friedman1 -from sklearn.datasets import make_classification, make_gaussian_quantiles -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler -import cudf +cudf = gpu_only_import('cudf') -import platform IS_ARM = platform.processor() == "aarch64" diff --git a/python/cuml/tests/test_target_encoder.py b/python/cuml/tests/test_target_encoder.py index de700b35cd..5488aadb48 100644 --- a/python/cuml/tests/test_target_encoder.py +++ b/python/cuml/tests/test_target_encoder.py @@ -12,13 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cuml.preprocessing.TargetEncoder import TargetEncoder -import cudf -import pandas -import numpy as np -import cupy as cp -from cuml.testing.utils import array_equal import pytest +from cuml.testing.utils import array_equal +from cuml.internals.safe_imports import cpu_only_import +from cuml.preprocessing.TargetEncoder import TargetEncoder +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +pandas = cpu_only_import('pandas') +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') def test_targetencoder_fit_transform(): @@ -268,10 +270,10 @@ def test_transform_with_index(): def test_get_params(): params = { - 'n_folds': 5, - 'smooth': 1, - 'seed': 49, - 'split_method': 'customize' + 'n_folds': 5, + 'smooth': 1, + 'seed': 49, + 'split_method': 'customize' } encoder = TargetEncoder(**params) p2 = encoder.get_params() diff --git a/python/cuml/tests/test_text_feature_extraction.py b/python/cuml/tests/test_text_feature_extraction.py index 09319fb492..e21a378486 100644 --- a/python/cuml/tests/test_text_feature_extraction.py +++ b/python/cuml/tests/test_text_feature_extraction.py @@ -13,19 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import cpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import_from +from sklearn.feature_extraction.text import TfidfVectorizer as SkTfidfVect +from sklearn.feature_extraction.text import HashingVectorizer as SkHashVect +from sklearn.feature_extraction.text import CountVectorizer as SkCountVect +import pytest from cuml.feature_extraction.text import CountVectorizer from cuml.feature_extraction.text import TfidfVectorizer from cuml.feature_extraction.text import HashingVectorizer -import cupy as cp -import pytest -from sklearn.feature_extraction.text import CountVectorizer as SkCountVect -from sklearn.feature_extraction.text import HashingVectorizer as SkHashVect - -from sklearn.feature_extraction.text import TfidfVectorizer as SkTfidfVect -from cudf import Series -from numpy.testing import assert_array_equal -import numpy as np -import pandas as pd +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') + +Series = gpu_only_import_from('cudf', 'Series') +assert_array_equal = cpu_only_import_from( + 'numpy.testing', 'assert_array_equal') +np = cpu_only_import('numpy') +pd = cpu_only_import('pandas') def test_count_vectorizer(): diff --git a/python/cuml/tests/test_tfidf.py b/python/cuml/tests/test_tfidf.py index 8756500dfb..e9a67e895e 100644 --- a/python/cuml/tests/test_tfidf.py +++ b/python/cuml/tests/test_tfidf.py @@ -13,12 +13,14 @@ # limitations under the License. # -import pytest -import numpy as np -import cupy as cp -import cupyx -from cuml.feature_extraction.text import TfidfTransformer from sklearn.feature_extraction.text import TfidfTransformer as SkTfidfTransfo +from cuml.feature_extraction.text import TfidfTransformer +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cupyx = gpu_only_import('cupyx') # data_ids correspond to data, order is important diff --git a/python/cuml/tests/test_thirdparty.py b/python/cuml/tests/test_thirdparty.py index fac2bec954..f60a3b0fe1 100644 --- a/python/cuml/tests/test_thirdparty.py +++ b/python/cuml/tests/test_thirdparty.py @@ -13,32 +13,31 @@ # limitations under the License. # -import pytest - -import numpy as np -import cupy as cp -import cupyx as cpx -from cuml._thirdparty.sklearn.utils.validation import check_X_y - -from cuml._thirdparty.sklearn.utils.extmath import row_norms as cu_row_norms, \ - _incremental_mean_and_var as cu_incremental_mean_and_var -from sklearn.utils.extmath import row_norms as sk_row_norms, \ - _incremental_mean_and_var as sk_incremental_mean_and_var - -from cuml._thirdparty.sklearn.utils.sparsefuncs import \ - inplace_csr_column_scale as cu_inplace_csr_column_scale, \ - inplace_csr_row_scale as cu_inplace_csr_row_scale, \ - inplace_column_scale as cu_inplace_column_scale, \ - mean_variance_axis as cu_mean_variance_axis, \ - min_max_axis as cu_min_max_axis +from cuml.testing.test_preproc_utils import assert_allclose from sklearn.utils.sparsefuncs import \ inplace_csr_column_scale as sk_inplace_csr_column_scale, \ inplace_csr_row_scale as sk_inplace_csr_row_scale, \ inplace_column_scale as sk_inplace_column_scale, \ mean_variance_axis as sk_mean_variance_axis, \ min_max_axis as sk_min_max_axis +from cuml._thirdparty.sklearn.utils.sparsefuncs import \ + inplace_csr_column_scale as cu_inplace_csr_column_scale, \ + inplace_csr_row_scale as cu_inplace_csr_row_scale, \ + inplace_column_scale as cu_inplace_column_scale, \ + mean_variance_axis as cu_mean_variance_axis, \ + min_max_axis as cu_min_max_axis +from sklearn.utils.extmath import row_norms as sk_row_norms, \ + _incremental_mean_and_var as sk_incremental_mean_and_var +from cuml._thirdparty.sklearn.utils.extmath import row_norms as cu_row_norms, \ + _incremental_mean_and_var as cu_incremental_mean_and_var +from cuml._thirdparty.sklearn.utils.validation import check_X_y +from cuml.internals.safe_imports import gpu_only_import +import pytest -from cuml.testing.test_preproc_utils import assert_allclose +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +cpx = gpu_only_import('cupyx') @pytest.fixture(scope="session") @@ -184,15 +183,15 @@ def test_min_max_axis(failure_logger, sparse_random_dataset, axis, ignore_nan): params=["cupy-csr", "cupy-csc"]) def sparse_extremes(request, random_seed): X = cp.array([ - [-0.9933658, 0.871748, 0.44418066], - [0.87808335, cp.nan, 0.18183318], - [cp.nan, 0.25030251, -0.7269053], - [cp.nan, 0.17725405, cp.nan], - [cp.nan, cp.nan, cp.nan], - [0.0, 0.0, 0.44418066], - [0.0, 0.0, 0.0], - [0.0, 0.0, cp.nan], - [0.0, cp.nan, cp.nan]]) + [-0.9933658, 0.871748, 0.44418066], + [0.87808335, cp.nan, 0.18183318], + [cp.nan, 0.25030251, -0.7269053], + [cp.nan, 0.17725405, cp.nan], + [cp.nan, cp.nan, cp.nan], + [0.0, 0.0, 0.44418066], + [0.0, 0.0, 0.0], + [0.0, 0.0, cp.nan], + [0.0, cp.nan, cp.nan]]) if request.param == 'cupy-csr': X_sparse = cpx.scipy.sparse.csr_matrix(X) elif request.param == 'cupy-csc': diff --git a/python/cuml/tests/test_train_test_split.py b/python/cuml/tests/test_train_test_split.py index 59f99c5415..14d4b33c2a 100644 --- a/python/cuml/tests/test_train_test_split.py +++ b/python/cuml/tests/test_train_test_split.py @@ -13,14 +13,17 @@ # limitations under the License. # -import cudf -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.datasets import make_classification +from cuml.model_selection import train_test_split import pytest +from cuml.internals.safe_imports import cpu_only_import +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +cp = gpu_only_import('cupy') +np = cpu_only_import('numpy') -from cuml.model_selection import train_test_split -from cuml.datasets import make_classification -from numba import cuda +cuda = gpu_only_import_from('numba', 'cuda') test_array_input_types = [ 'numba', 'cupy' diff --git a/python/cuml/tests/test_trustworthiness.py b/python/cuml/tests/test_trustworthiness.py index 6ad6574b56..149f784032 100644 --- a/python/cuml/tests/test_trustworthiness.py +++ b/python/cuml/tests/test_trustworthiness.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from cuml.internals.safe_imports import cpu_only_import import pytest from sklearn.manifold import trustworthiness as sklearn_trustworthiness from cuml.metrics import trustworthiness as cuml_trustworthiness @@ -19,8 +20,9 @@ from sklearn.datasets import make_blobs from umap import UMAP -import cudf -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +np = cpu_only_import('numpy') @pytest.mark.parametrize('input_type', ['ndarray', 'dataframe']) diff --git a/python/cuml/tests/test_tsne.py b/python/cuml/tests/test_tsne.py index bd9a58107a..5b94641559 100644 --- a/python/cuml/tests/test_tsne.py +++ b/python/cuml/tests/test_tsne.py @@ -13,19 +13,20 @@ # limitations under the License. # -import numpy as np -import pytest -import scipy -import cupyx - -from cuml.manifold import TSNE -from cuml.testing.utils import array_equal, stress_param +from sklearn.manifold import TSNE as skTSNE +from sklearn import datasets +from sklearn.manifold import trustworthiness +from sklearn.datasets import make_blobs from cuml.neighbors import NearestNeighbors as cuKNN +from cuml.testing.utils import array_equal, stress_param +from cuml.manifold import TSNE +from cuml.internals.safe_imports import gpu_only_import +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +scipy = cpu_only_import('scipy') +cupyx = gpu_only_import('cupyx') -from sklearn.datasets import make_blobs -from sklearn.manifold import trustworthiness -from sklearn import datasets -from sklearn.manifold import TSNE as skTSNE pytestmark = pytest.mark.filterwarnings("ignore:Method 'fft' is " "experimental::") @@ -34,10 +35,10 @@ DEFAULT_PERPLEXITY = 30 test_datasets = { - "digits": datasets.load_digits(), - "boston": datasets.load_boston(), - "diabetes": datasets.load_diabetes(), - "cancer": datasets.load_breast_cancer(), + "digits": datasets.load_digits(), + "boston": datasets.load_boston(), + "diabetes": datasets.load_diabetes(), + "cancer": datasets.load_breast_cancer(), } diff --git a/python/cuml/tests/test_tsvd.py b/python/cuml/tests/test_tsvd.py index d108e53097..82405226f6 100644 --- a/python/cuml/tests/test_tsvd.py +++ b/python/cuml/tests/test_tsvd.py @@ -12,17 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import pytest - -from cuml import TruncatedSVD as cuTSVD -from cuml.testing.utils import get_handle +from sklearn.utils import check_random_state +from sklearn.decomposition import TruncatedSVD as skTSVD +from sklearn.datasets import make_blobs from cuml.testing.utils import array_equal, unit_param, \ quality_param, stress_param - -from sklearn.datasets import make_blobs -from sklearn.decomposition import TruncatedSVD as skTSVD -from sklearn.utils import check_random_state +from cuml.testing.utils import get_handle +from cuml import TruncatedSVD as cuTSVD +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @pytest.mark.parametrize('datatype', [np.float32, np.float64]) diff --git a/python/cuml/tests/test_umap.py b/python/cuml/tests/test_umap.py index 4bb5833dda..9173570bcd 100644 --- a/python/cuml/tests/test_umap.py +++ b/python/cuml/tests/test_umap.py @@ -17,29 +17,28 @@ # Please install UMAP before running the code # use 'conda install -c conda-forge umap-learn' command to install it -import numpy as np -import pytest -import umap -import copy - -import cupyx -import scipy.sparse -import cupy as cp - -from cuml.manifold.umap import UMAP as cuUMAP +from sklearn.metrics import adjusted_rand_score +from sklearn.manifold import trustworthiness +from sklearn.datasets import make_blobs +from sklearn.cluster import KMeans +from sklearn import datasets +from cuml.internals import logger +import joblib +from sklearn.neighbors import NearestNeighbors from cuml.testing.utils import array_equal, unit_param, \ quality_param, stress_param -from sklearn.neighbors import NearestNeighbors - -import joblib +from cuml.manifold.umap import UMAP as cuUMAP +import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +import copy +import umap +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -from cuml.internals import logger +cupyx = gpu_only_import('cupyx') +scipy_sparse = cpu_only_import('scipy.sparse') -from sklearn import datasets -from sklearn.cluster import KMeans -from sklearn.datasets import make_blobs -from sklearn.manifold import trustworthiness -from sklearn.metrics import adjusted_rand_score dataset_names = ['iris', 'digits', 'wine', 'blobs'] @@ -159,10 +158,10 @@ def test_umap_transform_on_digits_sparse(target_metric, input_type, if input_type == 'cupy': sp_prefix = cupyx.scipy.sparse else: - sp_prefix = scipy.sparse + sp_prefix = scipy_sparse data = sp_prefix.csr_matrix( - scipy.sparse.csr_matrix(digits.data[digits_selection])) + scipy_sparse.csr_matrix(digits.data[digits_selection])) fitter = cuUMAP(n_neighbors=15, verbose=logger.level_info, @@ -173,7 +172,7 @@ def test_umap_transform_on_digits_sparse(target_metric, input_type, target_metric=target_metric) new_data = sp_prefix.csr_matrix( - scipy.sparse.csr_matrix(digits.data[~digits_selection])) + scipy_sparse.csr_matrix(digits.data[~digits_selection])) if xform_method == 'fit': fitter.fit(data, convert_dtype=True) @@ -298,7 +297,7 @@ def test_umap_fit_transform_score_default(target_metric): cuml_score = adjusted_rand_score(labels, KMeans(10).fit_predict( - cuml_embedding)) + cuml_embedding)) score = adjusted_rand_score(labels, KMeans(10).fit_predict(embedding)) @@ -605,12 +604,12 @@ def test_umap_distance_metrics_fit_transform_trust_on_sparse_input(metric): centers=5, random_state=42) data_selection = np.random.RandomState(42).choice( - [True, False], 1000, replace=True, p=[0.75, 0.25]) + [True, False], 1000, replace=True, p=[0.75, 0.25]) if metric == 'jaccard': data = data >= 0 - new_data = scipy.sparse.csr_matrix(data[~data_selection]) + new_data = scipy_sparse.csr_matrix(data[~data_selection]) umap_model = umap.UMAP(n_neighbors=10, min_dist=0.01, metric=metric, init='random') diff --git a/python/cuml/tests/test_utils.py b/python/cuml/tests/test_utils.py index e5f21e8e12..6b6840c905 100644 --- a/python/cuml/tests/test_utils.py +++ b/python/cuml/tests/test_utils.py @@ -12,14 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest -import numpy as np -from cuml.testing.utils import array_equal, assert_array_equal -from hypothesis import given, note -from hypothesis import strategies as st -from hypothesis import target from hypothesis.extra.numpy import (array_shapes, arrays, floating_dtypes, integer_dtypes) +from hypothesis import target +from hypothesis import strategies as st +from hypothesis import given, note +from cuml.testing.utils import array_equal, assert_array_equal +import pytest +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') @given( diff --git a/python/cuml/thirdparty_adapters/adapters.py b/python/cuml/thirdparty_adapters/adapters.py index 9aaf78c4e5..5e612a3059 100644 --- a/python/cuml/thirdparty_adapters/adapters.py +++ b/python/cuml/thirdparty_adapters/adapters.py @@ -14,21 +14,25 @@ # limitations under the License. # -import numpy as np -import cupy as cp -from cuml.internals.input_utils import input_to_cupy_array, input_to_host_array -from cuml.internals.global_settings import GlobalSettings -from cupyx.scipy.sparse import csr_matrix as gpu_csr_matrix -from cupyx.scipy.sparse import csc_matrix as gpu_csc_matrix -from cupyx.scipy.sparse import csc_matrix as gpu_coo_matrix -from scipy.sparse import csr_matrix as cpu_csr_matrix -from scipy.sparse import csc_matrix as cpu_csc_matrix -from scipy.sparse import csc_matrix as cpu_coo_matrix -from scipy import sparse as cpu_sparse from cupyx.scipy import sparse as gpu_sparse - -from pandas import DataFrame as pdDataFrame -from cudf import DataFrame as cuDataFrame +from scipy import sparse as cpu_sparse +from scipy.sparse import csc_matrix as cpu_coo_matrix +from scipy.sparse import csc_matrix as cpu_csc_matrix +from cuml.internals.safe_imports import cpu_only_import_from +from cupyx.scipy.sparse import csc_matrix as gpu_coo_matrix +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.global_settings import GlobalSettings +from cuml.internals.input_utils import input_to_cupy_array, input_to_host_array +from cuml.internals.safe_imports import gpu_only_import +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') +cp = gpu_only_import('cupy') +gpu_csr_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csr_matrix') +gpu_csc_matrix = gpu_only_import_from('cupyx.scipy.sparse', 'csc_matrix') +cpu_csr_matrix = cpu_only_import_from('scipy.sparse', 'csr_matrix') + +pdDataFrame = cpu_only_import_from('pandas', 'DataFrame') +cuDataFrame = gpu_only_import_from('cudf', 'DataFrame') numeric_types = [ np.int8, np.int16, np.int32, np.int64, diff --git a/python/cuml/thirdparty_adapters/sparsefuncs_fast.py b/python/cuml/thirdparty_adapters/sparsefuncs_fast.py index 8cc043aca5..631d1541b9 100644 --- a/python/cuml/thirdparty_adapters/sparsefuncs_fast.py +++ b/python/cuml/thirdparty_adapters/sparsefuncs_fast.py @@ -15,10 +15,12 @@ # -import cupy as cp -import cupyx as cpx -from numba import cuda from math import ceil +from cuml.internals.safe_imports import gpu_only_import_from +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +cpx = gpu_only_import('cupyx') +cuda = gpu_only_import_from('numba', 'cuda') def csr_mean_variance_axis0(X): diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index cce7e5234c..e450251f83 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -16,7 +16,8 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import sys import nvtx diff --git a/python/cuml/tsa/auto_arima.pyx b/python/cuml/tsa/auto_arima.pyx index df9bc69c36..43ccc1a413 100644 --- a/python/cuml/tsa/auto_arima.pyx +++ b/python/cuml/tsa/auto_arima.pyx @@ -23,9 +23,11 @@ import itertools from libc.stdint cimport uintptr_t from libcpp cimport bool from libcpp.vector cimport vector -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') -import cupy as cp +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') import cuml.internals from cuml.internals import logger diff --git a/python/cuml/tsa/batched_lbfgs.py b/python/cuml/tsa/batched_lbfgs.py index e6d418eda4..4751120bc7 100644 --- a/python/cuml/tsa/batched_lbfgs.py +++ b/python/cuml/tsa/batched_lbfgs.py @@ -14,11 +14,11 @@ # limitations under the License. # -import cuml.internals.logger as logger -import numpy as np -import nvtx - from cuml.common import has_scipy +import nvtx +import cuml.internals.logger as logger +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') def _fd_fprime(x, f, h): diff --git a/python/cuml/tsa/holtwinters.pyx b/python/cuml/tsa/holtwinters.pyx index 753b179de6..62f09935dd 100644 --- a/python/cuml/tsa/holtwinters.pyx +++ b/python/cuml/tsa/holtwinters.pyx @@ -15,9 +15,12 @@ # distutils: language = c++ -import cudf -import cupy as cp -import numpy as np +from cuml.internals.safe_imports import gpu_only_import +cudf = gpu_only_import('cudf') +from cuml.internals.safe_imports import gpu_only_import +cp = gpu_only_import('cupy') +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libc.stdint cimport uintptr_t import cuml.internals diff --git a/python/cuml/tsa/seasonality.pyx b/python/cuml/tsa/seasonality.pyx index 98e701bb0c..37f94f716d 100644 --- a/python/cuml/tsa/seasonality.pyx +++ b/python/cuml/tsa/seasonality.pyx @@ -15,7 +15,8 @@ # distutils: language = c++ -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') import cuml.internals from cuml.internals.array import CumlArray diff --git a/python/cuml/tsa/stationarity.pyx b/python/cuml/tsa/stationarity.pyx index 1c51129e5f..d5ea6a589b 100644 --- a/python/cuml/tsa/stationarity.pyx +++ b/python/cuml/tsa/stationarity.pyx @@ -16,7 +16,8 @@ # distutils: language = c++ import ctypes -import numpy as np +from cuml.internals.safe_imports import cpu_only_import +np = cpu_only_import('numpy') from libc.stdint cimport uintptr_t from libcpp cimport bool as boolcpp