diff --git a/cpp/src/hdbscan/detail/soft_clustering.cuh b/cpp/src/hdbscan/detail/soft_clustering.cuh index 10085d0e49..2ace77f5d0 100644 --- a/cpp/src/hdbscan/detail/soft_clustering.cuh +++ b/cpp/src/hdbscan/detail/soft_clustering.cuh @@ -105,7 +105,7 @@ void dist_membership_vector(const raft::handle_t& handle, distance( handle, query + batch_offset * n, exemplars_dense.data(), dist.data(), samples_per_batch, n_exemplars, n, true); break; - default: ASSERT(false, "Incorrect metric passed!"); + default: RAFT_EXPECTS(false, "Incorrect metric passed!"); } // compute the minimum distances to exemplars of each cluster @@ -396,7 +396,8 @@ void all_points_membership_vectors(const raft::handle_t& handle, size_t n = prediction_data.n_cols; if (batch_size > m) batch_size = m; - RAFT_EXPECTS(0 < batch_size && batch_size <= m, "Invalid batch_size. batch_size should be > 0 and <= the number of samples in the training data"); + RAFT_EXPECTS(0 < batch_size && batch_size <= m, + "Invalid batch_size. batch_size should be > 0 and <= the number of samples in the training data"); auto parents = condensed_tree.get_parents(); auto children = condensed_tree.get_children(); @@ -522,7 +523,8 @@ void membership_vector(const raft::handle_t& handle, value_t* lambdas = condensed_tree.get_lambdas(); if (batch_size > n_prediction_points) batch_size = n_prediction_points; - RAFT_EXPECTS(0 < batch_size && batch_size <= n_prediction_points, "Invalid batch_size. batch_size should be > 0 and <= the number of samples in the training data"); + RAFT_EXPECTS(0 < batch_size && batch_size <= n_prediction_points, + "Invalid batch_size. batch_size should be > 0 and <= the number of prediction points"); rmm::device_uvector dist_membership_vec(n_prediction_points * n_selected_clusters, stream); diff --git a/python/cuml/cluster/hdbscan/prediction.pyx b/python/cuml/cluster/hdbscan/prediction.pyx index c3db0be8d7..6a6803b186 100644 --- a/python/cuml/cluster/hdbscan/prediction.pyx +++ b/python/cuml/cluster/hdbscan/prediction.pyx @@ -146,12 +146,12 @@ def all_points_membership_vectors(clusterer, batch_size=4096): had ``prediction_data=True`` set. batch_size : int, optional, default=min(4096, n_rows) - Lowers memory requirement by computing distance-based membership in - smaller batches of points in the training data. Batch size of 0 uses - all of the training points, batch size of 1000 computes distances for - 1000 points at a time. The default batch_size is 4096. If the number - of rows in the original dataset is less than 4096, this defaults to - the number of rows. + Lowers memory requirement by computing distance-based membership + in smaller batches of points in the training data. A batch size + of 1000 computes distance based memberships for 1000 points at a + time. The default batch size is 4096. If the number of rows in + the original dataset is less than 4096, this defaults to the + number of rows. Returns ------- @@ -251,12 +251,12 @@ def membership_vector(clusterer, points_to_predict, batch_size=4096, convert_dty clusterer was fit. batch_size : int, optional, default=min(4096, n_points_to_predict) - Lowers memory requirement by computing distance-based membership in - smaller batches of points in the training data. Batch size of 0 uses - all of the training points, batch size of 1000 computes distances for - 1000 points at a time. The default batch_size is 4096. If the number - of rows in the original dataset is less than 4096, this defaults to - the number of rows. + Lowers memory requirement by computing distance-based membership + in smaller batches of points in the prediction data. A batch size + of 1000 computes distance based memberships for 1000 points at a + time. The default batch_size is 4096. If the number of rows in + the prediction dataset is less than 4096, this defaults to the + number of rows. Returns -------