Skip to content

Commit

Permalink
Make all CPU/GPU only imports "safe" for respective package (#5117)
Browse files Browse the repository at this point in the history
Authors:
  - William Hicks (https://github.com/wphicks)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #5117
  • Loading branch information
wphicks authored Jan 9, 2023
1 parent 62a2661 commit 199c7d4
Show file tree
Hide file tree
Showing 249 changed files with 2,667 additions and 2,355 deletions.
7 changes: 4 additions & 3 deletions cpp/examples/kmeans/prepare_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
# Based on
# https://github.com/h2oai/h2o4gpu/blob/master/examples/py/demos/H2O4GPU_KMeans_Homesite.ipynb
# as received on December 6th 2018
import pandas as pd
import numpy as np
import sys
import os
import sys
from cuml.internals.safe_imports import cpu_only_import
pd = cpu_only_import('pandas')
np = cpu_only_import('numpy')

# read the data
train_file = "train.csv"
Expand Down
33 changes: 17 additions & 16 deletions cpp/examples/symreg/prepare_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
# limitations under the License.
#

import numpy as np
from sklearn.model_selection import train_test_split
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')

rng = np.random.RandomState(seed=2021)

Expand All @@ -26,21 +27,21 @@
X_test = rng.uniform(-1, 1, 100).reshape(50, 2)
y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1

print("Training set has n_rows=%d n_cols=%d" %(X_train.shape))
print("Test set has n_rows=%d n_cols=%d" %(X_test.shape))
print("Training set has n_rows=%d n_cols=%d" % (X_train.shape))
print("Test set has n_rows=%d n_cols=%d" % (X_test.shape))

train_data = "train_data.txt"
test_data = "test_data.txt"
train_labels = "train_labels.txt"
test_labels = "test_labels.txt"
train_data = "train_data.txt"
test_data = "test_data.txt"
train_labels = "train_labels.txt"
test_labels = "test_labels.txt"

# Save all datasets in col-major format
np.savetxt(train_data, X_train.T,fmt='%.7f')
np.savetxt(test_data, X_test.T,fmt='%.7f')
np.savetxt(train_labels, y_train,fmt='%.7f')
np.savetxt(test_labels, y_test,fmt='%.7f')

print("Wrote %d values to %s"%(X_train.size,train_data))
print("Wrote %d values to %s"%(X_test.size,test_data))
print("Wrote %d values to %s"%(y_train.size,train_labels))
print("Wrote %d values to %s"%(y_test.size,test_labels))
np.savetxt(train_data, X_train.T, fmt='%.7f')
np.savetxt(test_data, X_test.T, fmt='%.7f')
np.savetxt(train_labels, y_train, fmt='%.7f')
np.savetxt(test_labels, y_test, fmt='%.7f')

print("Wrote %d values to %s" % (X_train.size, train_data))
print("Wrote %d values to %s" % (X_test.size, test_data))
print("Wrote %d values to %s" % (y_train.size, train_labels))
print("Wrote %d values to %s" % (y_test.size, test_labels))
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
# Authors mentioned above do not endorse or promote this production.


from ..preprocessing import FunctionTransformer
from ....thirdparty_adapters import check_array
from ..utils.validation import check_is_fitted
from ..utils.skl_dependencies import TransformerMixin, BaseComposition, \
BaseEstimator
from cuml.internals import _deprecate_pos_args
from cuml.internals.array_sparse import SparseCumlArray
from cuml.internals.global_settings import _global_settings_data
import cuml
from itertools import chain
from itertools import compress
from joblib import Parallel
Expand All @@ -24,25 +33,18 @@
from collections import defaultdict
import warnings

from scipy import sparse as sp_sparse
from cupyx.scipy import sparse as cu_sparse
import numpy as cpu_np
import cupy as np
import numba
from cuml.internals.safe_imports import cpu_only_import_from
from cuml.internals.safe_imports import gpu_only_import_from
from cuml.internals.safe_imports import cpu_only_import
from cuml.internals.safe_imports import gpu_only_import

import pandas as pd
import cudf

import cuml
from cuml.internals.global_settings import _global_settings_data
from cuml.internals.array_sparse import SparseCumlArray
from cuml.internals.import_utils import has_sklearn
from cuml.internals import _deprecate_pos_args
from ..utils.skl_dependencies import TransformerMixin, BaseComposition, \
BaseEstimator
from ..utils.validation import check_is_fitted
from ....thirdparty_adapters import check_array
from ..preprocessing import FunctionTransformer
cpu_np = cpu_only_import('numpy')
cu_sparse = gpu_only_import_from('cupyx.scipy', 'sparse')
np = gpu_only_import('cupy')
numba = gpu_only_import('numba')
pd = cpu_only_import('pandas')
sp_sparse = cpu_only_import_from('scipy', 'sparse')
cudf = gpu_only_import('cudf')


_ERR_MSG_1DCOLUMN = ("1D data passed to a transformer that expects 2D data. "
Expand Down Expand Up @@ -361,6 +363,7 @@ def delayed_function(*args, **kwargs):

class _FuncWrapper:
""""Load the global configuration before calling the function."""

def __init__(self, function):
self.function = function
self.config = _global_settings_data.shared_state
Expand Down Expand Up @@ -825,7 +828,7 @@ def _fit_transform(self, X, y, func, fitted=False):
message_clsname='ColumnTransformer',
message=self._log_message(name, idx, len(transformers)))
for idx, (name, trans, column, weight) in enumerate(
self._iter(fitted=fitted, replace_strings=True), 1))
self._iter(fitted=fitted, replace_strings=True), 1))
except ValueError as e:
if "Expected 2D array, got 1D array instead" in str(e):
raise ValueError(_ERR_MSG_1DCOLUMN) from e
Expand Down Expand Up @@ -1161,6 +1164,7 @@ class make_column_selector:
[-0.30151134, 0. , 1. , 0. ],
[ 0.90453403, 0. , 0. , 1. ]])
"""

def __init__(self, pattern=None, *, dtype_include=None,
dtype_exclude=None):
self.pattern = pattern
Expand Down
62 changes: 31 additions & 31 deletions python/cuml/_thirdparty/sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,42 @@
# Authors mentioned above do not endorse or promote this production.


from ....internals.memory_utils import using_output_type
from ....internals import _deprecate_pos_args
from ....internals import api_return_generic
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array_sparse import SparseCumlArray
from ....internals.array import CumlArray
from ....thirdparty_adapters.sparsefuncs_fast import \
(inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2,
csr_polynomial_expansion)
from ..utils.sparsefuncs import (inplace_column_scale,
min_max_axis,
mean_variance_axis)
from ..utils.validation import (check_is_fitted, FLOAT_DTYPES,
check_random_state)
from ..utils.extmath import _incremental_mean_and_var
from ..utils.extmath import row_norms
from ....thirdparty_adapters import check_array
from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \
StatelessTagMixin
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from scipy.special import boxcox
from scipy import optimize
from cuml.internals.safe_imports import cpu_only_import_from
from cuml.internals.safe_imports import gpu_only_import_from
from cuml.internals.safe_imports import gpu_only_import
from itertools import chain, combinations
import numbers
import warnings
from itertools import combinations_with_replacement as combinations_w_r

import numpy as cpu_np
import cupy as np
from cupyx.scipy import sparse
from scipy import stats
from scipy import optimize
from scipy.special import boxcox

from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \
StatelessTagMixin
from ....thirdparty_adapters import check_array
from ..utils.extmath import row_norms
from ..utils.extmath import _incremental_mean_and_var
from ..utils.validation import (check_is_fitted, FLOAT_DTYPES,
check_random_state)

from ..utils.sparsefuncs import (inplace_column_scale,
min_max_axis,
mean_variance_axis)
from cuml.internals.safe_imports import cpu_only_import
cpu_np = cpu_only_import('numpy')
np = gpu_only_import('cupy')
sparse = gpu_only_import_from('cupyx.scipy', 'sparse')
stats = cpu_only_import_from('scipy', 'stats')

from ....thirdparty_adapters.sparsefuncs_fast import \
(inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2,
csr_polynomial_expansion)

from ....internals.array import CumlArray
from ....internals.array_sparse import SparseCumlArray
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals import api_return_generic
from ....internals import _deprecate_pos_args

from ....internals.memory_utils import using_output_type

BOUNDS_THRESHOLD = 1e-7

Expand Down Expand Up @@ -723,7 +723,7 @@ def partial_fit(self, X, y=None) -> "StandardScaler":

if not hasattr(self, 'n_samples_seen_'):
self.n_samples_seen_ = (
X.shape[0] - counts_nan).astype(np.int64, copy=False)
X.shape[0] - counts_nan).astype(np.int64, copy=False)

if self.with_std:
# First pass
Expand Down
32 changes: 16 additions & 16 deletions python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@
# Authors mentioned above do not endorse or promote this production.


import numbers
import cupy as np
import numpy as cpu_np
import warnings

from cuml.preprocessing import OneHotEncoder
from cuml.cluster import KMeans

from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from cuml.internals.mixins import SparseInputTagMixin
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ....thirdparty_adapters import check_array
from ....internals.array_sparse import SparseCumlArray
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.memory_utils import using_output_type
from ....internals import _deprecate_pos_args
from ....internals.memory_utils import using_output_type
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array_sparse import SparseCumlArray
from ....thirdparty_adapters import check_array
from ..utils.validation import FLOAT_DTYPES
from ..utils.validation import check_is_fitted
from cuml.internals.mixins import SparseInputTagMixin
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from cuml.cluster import KMeans
from cuml.preprocessing import OneHotEncoder
import warnings
from cuml.internals.safe_imports import cpu_only_import
import numbers
from cuml.internals.safe_imports import gpu_only_import
np = gpu_only_import('cupy')
cpu_np = cpu_only_import('numpy')


def digitize(x, bins):
Expand Down
36 changes: 19 additions & 17 deletions python/cuml/_thirdparty/sklearn/preprocessing/_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,28 @@
# Authors mentioned above do not endorse or promote this production.


import numbers
import warnings

import numpy
import cupy as np
import cuml
from cupyx.scipy import sparse

from ....internals import _deprecate_pos_args
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array_sparse import SparseCumlArray
from ..utils.validation import FLOAT_DTYPES
from ..utils.validation import check_is_fitted
from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \
StringInputTagMixin
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from ....thirdparty_adapters import (_get_mask,
_masked_column_median,
_masked_column_mean,
_masked_column_mode)
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \
StringInputTagMixin
from ..utils.validation import check_is_fitted
from ..utils.validation import FLOAT_DTYPES
from ....internals.array_sparse import SparseCumlArray
from ....common.array_descriptor import CumlArrayDescriptor
from ....internals import _deprecate_pos_args
from cuml.internals.safe_imports import gpu_only_import_from
import cuml
from cuml.internals.safe_imports import gpu_only_import
import numbers
import warnings

from cuml.internals.safe_imports import cpu_only_import
numpy = cpu_only_import('numpy')
np = gpu_only_import('cupy')
sparse = gpu_only_import_from('cupyx.scipy', 'sparse')


def is_scalar_nan(x):
Expand Down Expand Up @@ -136,7 +138,7 @@ def _concatenate_indicator(self, X_imputed, X_indicator):
"Data from the missing indicator are not provided. Call "
"_fit_indicator and _transform_indicator in the imputer "
"implementation."
)
)

return hstack((X_imputed, X_indicator))

Expand Down
8 changes: 5 additions & 3 deletions python/cuml/_thirdparty/sklearn/utils/extmath.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
# Authors mentioned above do not endorse or promote this production.


import cupy as np
import cupyx
from cupyx.scipy import sparse
from cuml.internals.safe_imports import gpu_only_import_from
from cuml.internals.safe_imports import gpu_only_import
np = gpu_only_import('cupy')
cupyx = gpu_only_import('cupyx')
sparse = gpu_only_import_from('cupyx.scipy', 'sparse')


def row_norms(X, squared=False):
Expand Down
13 changes: 8 additions & 5 deletions python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
# Authors mentioned above do not endorse or promote this production.


from scipy import sparse as cpu_sp
from cupyx.scipy import sparse as gpu_sp
import cupy as np
import numpy as cpu_np

from ....thirdparty_adapters.sparsefuncs_fast import (
csr_mean_variance_axis0 as _csr_mean_var_axis0,
csc_mean_variance_axis0 as _csc_mean_var_axis0)
from cuml.internals.safe_imports import cpu_only_import
from cuml.internals.safe_imports import gpu_only_import
from cuml.internals.safe_imports import gpu_only_import_from
from cuml.internals.safe_imports import cpu_only_import_from
cpu_sp = cpu_only_import_from('scipy', 'sparse')
gpu_sp = gpu_only_import_from('cupyx.scipy', 'sparse')
np = gpu_only_import('cupy')
cpu_np = cpu_only_import('numpy')


def iscsr(X):
Expand Down
15 changes: 8 additions & 7 deletions python/cuml/_thirdparty/sklearn/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
# Authors mentioned above do not endorse or promote this production.


import numbers
import numpy as np
import cupy as cp
import cupyx.scipy.sparse as sp
from inspect import isclass

from ....common.exceptions import NotFittedError
from ....thirdparty_adapters import check_array
from ....common.exceptions import NotFittedError
from inspect import isclass
from cuml.internals.safe_imports import gpu_only_import
import numbers
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
cp = gpu_only_import('cupy')
sp = gpu_only_import('cupyx.scipy.sparse')


FLOAT_DTYPES = (np.float64, np.float32, np.float16)
Expand Down
Loading

0 comments on commit 199c7d4

Please sign in to comment.