Merge pull request #5911 from dantegd/branch-24.08-merge-24.06

Fix conflict of forward-merge #5905 of branch-24.06 into branch-24.08
rapidsai · Jun 3, 2024 · 9ca9f12 · 9ca9f12
2 parents 1560886 + 5e13cbf
commit 9ca9f12
Show file tree

Hide file tree

Showing 41 changed files with 387 additions and 169 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -7,6 +7,11 @@ FROM ${BASE} as pip-base
 
 ENV DEFAULT_VIRTUAL_ENV=rapids
 
+RUN apt update -y \
+ && DEBIAN_FRONTEND=noninteractive apt install -y \
+    libblas-dev liblapack-dev \
+ && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;
+
 FROM ${BASE} as conda-base
 
 ENV DEFAULT_CONDA_ENV=rapids

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {

diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,20 +5,17 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.8": {
-      "version": "1.15.0"
-    },
-    "ghcr.io/rapidsai/devcontainers/features/cuda:24.8": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "11.8",
       "installcuBLAS": true,
       "installcuSOLVER": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],

diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -11,7 +11,7 @@
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-conda"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {

diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,20 +5,17 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.2-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.2-ucx1.15.0-openmpi-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-pip"
+    "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/ucx:24.8": {
-      "version": "1.15.0"
-    },
-    "ghcr.io/rapidsai/devcontainers/features/cuda:24.8": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": {
       "version": "12.2",
       "installcuBLAS": true,
       "installcuSOLVER": true,
@@ -28,7 +25,6 @@
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
-    "ghcr.io/rapidsai/devcontainers/features/ucx",
     "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -33,7 +33,7 @@ jobs:
     with:
       enable_check_generated_files: false
       ignored_pr_jobs: >-
-        conda-python-tests-cudf-pandas-integration
+        optional-job-conda-python-tests-cudf-pandas-integration
   clang-tidy:
     needs: checks
     secrets: inherit
@@ -77,11 +77,12 @@ jobs:
     with:
       build_type: pull-request
       script: "ci/test_python_singlegpu.sh"
-  conda-python-tests-cudf-pandas-integration:
+  optional-job-conda-python-tests-cudf-pandas-integration:
     needs: conda-python-build
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
+      matrix_filter: map(select(.ARCH == "amd64"))
       build_type: pull-request
       script: "ci/test_python_integration.sh"
   conda-python-tests-dask:

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -63,7 +63,7 @@ dependencies:
 - recommonmark
 - rmm==24.8.*
 - scikit-build-core>=0.7.0
-- scikit-learn==1.2
+- scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -59,7 +59,7 @@ dependencies:
 - recommonmark
 - rmm==24.8.*
 - scikit-build-core>=0.7.0
-- scikit-learn==1.2
+- scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -61,6 +61,10 @@ if(BUILD_CUML_BENCH)
   set_target_properties(
     ${CUML_CPP_BENCH_TARGET}
     PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+               CXX_STANDARD                      17
+               CXX_STANDARD_REQUIRED             ON
+               CUDA_STANDARD                     17
+               CUDA_STANDARD_REQUIRED            ON
   )
 
   install(

diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,8 +31,8 @@ endif()
 list(APPEND CUML_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-error=sign-compare)
 
 if(DISABLE_DEPRECATION_WARNINGS)
-    list(APPEND CUML_CXX_FLAGS -Wno-deprecated-declarations)
-    list(APPEND CUML_CUDA_FLAGS -Wno-deprecated-declarations -Xcompiler=-Wno-deprecated-declarations)
+    list(APPEND CUML_CXX_FLAGS -Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
+    list(APPEND CUML_CUDA_FLAGS -Wno-deprecated-declarations -Xcompiler=-Wno-deprecated-declarations -DRAFT_HIDE_DEPRECATION_WARNINGS)
 endif()
 
 # make sure we produce smallest binary size

diff --git a/cpp/src/arima/batched_arima.cu b/cpp/src/arima/batched_arima.cu
@@ -84,7 +84,7 @@ struct is_missing {
   typedef T argument_type;
   typedef T result_type;
 
-  __thrust_exec_check_disable__ __device__ const T operator()(const T& x) const { return isnan(x); }
+  __device__ const T operator()(const T& x) const { return isnan(x); }
 };  // end is_missing
 
 bool detect_missing(raft::handle_t& handle, const double* d_y, int n_elem)

diff --git a/cpp/src/hdbscan/condensed_hierarchy.cu b/cpp/src/hdbscan/condensed_hierarchy.cu
@@ -26,6 +26,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/execution_policy.h>
@@ -156,7 +157,7 @@ void CondensedHierarchy<value_idx, value_t>::condense(value_idx* full_parents,
     thrust::cuda::par.on(stream),
     full_sizes,
     full_sizes + size,
-    [=] __device__(value_idx a) { return a != -1; },
+    cuda::proclaim_return_type<bool>([=] __device__(value_idx a) -> bool { return a != -1; }),
     0,
     thrust::plus<value_idx>());
 

diff --git a/cpp/src/hdbscan/detail/select.cuh b/cpp/src/hdbscan/detail/select.cuh
@@ -216,13 +216,14 @@ void excess_of_mass(const raft::handle_t& handle,
     value_t subtree_stability = 0.0;
 
     if (indptr_h[node + 1] - indptr_h[node] > 0) {
-      subtree_stability = thrust::transform_reduce(
-        exec_policy,
-        children + indptr_h[node],
-        children + indptr_h[node + 1],
-        [=] __device__(value_idx a) { return stability[a]; },
-        0.0,
-        thrust::plus<value_t>());
+      subtree_stability =
+        thrust::transform_reduce(exec_policy,
+                                 children + indptr_h[node],
+                                 children + indptr_h[node + 1],
+                                 cuda::proclaim_return_type<value_t>(
+                                   [=] __device__(value_idx a) -> value_t { return stability[a]; }),
+                                 0.0,
+                                 thrust::plus<value_t>());
     }
 
     if (subtree_stability > node_stability || cluster_sizes_h[node] > max_cluster_size) {

diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h
@@ -114,7 +114,7 @@ Common::CondensedHierarchy<value_idx, value_t> make_cluster_tree(
     thrust_policy,
     sizes,
     sizes + condensed_tree.get_n_edges(),
-    [=] __device__(value_idx a) { return a > 1; },
+    cuda::proclaim_return_type<bool>([=] __device__(value_idx a) -> bool { return a > 1; }),
     0,
     thrust::plus<value_idx>());
 

diff --git a/cpp/src/tsne/distances.cuh b/cpp/src/tsne/distances.cuh
@@ -33,6 +33,7 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/functional.h>
 #include <thrust/transform_reduce.h>
 
@@ -162,7 +163,8 @@ void get_distances(const raft::handle_t& handle,
 template <typename value_t>
 void normalize_distances(value_t* distances, const size_t total_nn, cudaStream_t stream)
 {
-  auto abs_f      = [] __device__(const value_t& x) { return abs(x); };
+  auto abs_f = cuda::proclaim_return_type<value_t>(
+    [] __device__(const value_t& x) -> value_t { return abs(x); });
   value_t maxNorm = thrust::transform_reduce(rmm::exec_policy(stream),
                                              distances,
                                              distances + total_nn,

diff --git a/cpp/src/tsne/utils.cuh b/cpp/src/tsne/utils.cuh
@@ -39,6 +39,7 @@
 #include <sys/time.h>
 #include <unistd.h>
 
+#include <cfloat>
 #include <chrono>
 #include <iostream>
 

diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
@@ -85,6 +85,10 @@ function(ConfigureTest)
   set_target_properties(
     ${_CUML_TEST_NAME}
     PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib"
+               CXX_STANDARD                      17
+               CXX_STANDARD_REQUIRED             ON
+               CUDA_STANDARD                     17
+               CUDA_STANDARD_REQUIRED            ON
   )
 
   set(_CUML_TEST_COMPONENT_NAME testing)

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -356,7 +356,7 @@ dependencies:
           # https://github.com/pydata/pydata-sphinx-theme/issues/1539
           - pydata-sphinx-theme!=0.14.2
           - recommonmark
-          - &scikit_learn scikit-learn==1.2
+          - &scikit_learn scikit-learn==1.5
           - sphinx<6
           - sphinx-copybutton
           - sphinx-markdown-tables

diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_data.py b/python/cuml/_thirdparty/sklearn/preprocessing/_data.py
@@ -14,6 +14,19 @@
 # This code is under BSD 3 clause license.
 # Authors mentioned above do not endorse or promote this production.
 
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 from ....internals.memory_utils import using_output_type
 from ....internals import _deprecate_pos_args
@@ -32,6 +45,7 @@
 from ..utils.extmath import _incremental_mean_and_var
 from ..utils.extmath import row_norms
 from ....thirdparty_adapters import check_array
+from sklearn.utils._indexing import resample
 from cuml.internals.mixins import AllowNaNTagMixin, SparseInputTagMixin, \
     StatelessTagMixin
 from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
@@ -2284,17 +2298,14 @@ def _dense_fit(self, X, random_state):
         n_samples, n_features = X.shape
         references = np.asnumpy(self.references_ * 100)
 
-        self.quantiles_ = []
-        for col in X.T:
-            if self.subsample < n_samples:
-                subsample_idx = random_state.choice(n_samples,
-                                                    size=self.subsample,
-                                                    replace=False)
-                col = col.take(subsample_idx)
-            self.quantiles_.append(
-                cpu_np.nanpercentile(np.asnumpy(col), references)
+        X = np.asnumpy(X)
+        if self.subsample is not None and self.subsample < n_samples:
+            # Take a subsample of `X`
+            X = resample(
+                X, replace=False, n_samples=self.subsample, random_state=random_state
             )
-        self.quantiles_ = cpu_np.transpose(self.quantiles_)
+
+        self.quantiles_ = cpu_np.nanpercentile(X, references, axis=0)
         # Due to floating-point precision error in `np.nanpercentile`,
         # make sure that quantiles are monotonically increasing.
         # Upstream issue in numpy:

diff --git a/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py b/python/cuml/_thirdparty/sklearn/preprocessing/_discretization.py
@@ -10,6 +10,20 @@
 # This code is under BSD 3 clause license.
 # Authors mentioned above do not endorse or promote this production.
 
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 
 from ....internals import _deprecate_pos_args
 from ....internals.memory_utils import using_output_type
@@ -240,7 +254,7 @@ def fit(self, X, y=None) -> "KBinsDiscretizer":
         if 'onehot' in self.encode:
             self._encoder = OneHotEncoder(
                 categories=np.array([np.arange(i) for i in self.n_bins_]),
-                sparse=self.encode == 'onehot', output_type='cupy')
+                sparse_output=self.encode == 'onehot', output_type='cupy')
             # Fit the OneHotEncoder with toy datasets
             # so that it's ready for use after the KBinsDiscretizer is fitted
             self._encoder.fit(np.zeros((1, len(self.n_bins_)), dtype=int))