Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hiding symbols from shared object libraries #1723

Closed
wants to merge 10 commits into from
18 changes: 9 additions & 9 deletions cpp/include/raft/cluster/detail/agglomerative.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ void build_dendrogram_host(raft::resources const& handle,
}

template <typename value_idx>
__global__ void write_levels_kernel(const value_idx* children,
value_idx* parents,
value_idx n_vertices)
_RAFT_KERNEL void write_levels_kernel(const value_idx* children,
value_idx* parents,
value_idx n_vertices)
{
value_idx tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_vertices) {
Expand All @@ -179,12 +179,12 @@ __global__ void write_levels_kernel(const value_idx* children,
* @param labels
*/
template <typename value_idx>
__global__ void inherit_labels(const value_idx* children,
const value_idx* levels,
std::size_t n_leaves,
value_idx* labels,
int cut_level,
value_idx n_vertices)
_RAFT_KERNEL void inherit_labels(const value_idx* children,
const value_idx* levels,
std::size_t n_leaves,
value_idx* labels,
int cut_level,
value_idx n_vertices)
{
value_idx tid = blockDim.x * blockIdx.x + threadIdx.x;

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/cluster/detail/connectivities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ struct distance_graph_impl<raft::cluster::LinkageDistance::KNN_GRAPH, value_idx,
};

template <typename value_idx>
__global__ void fill_indices2(value_idx* indices, size_t m, size_t nnz)
_RAFT_KERNEL void fill_indices2(value_idx* indices, size_t m, size_t nnz)
{
value_idx tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid >= nnz) return;
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/cluster/detail/kmeans_balanced.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ template <uint32_t BlockDimY,
typename LabelT,
typename CounterT,
typename MappingOpT>
__global__ void __launch_bounds__((WarpSize * BlockDimY))
_RAFT_KERNEL void __launch_bounds__((WarpSize * BlockDimY))
adjust_centers_kernel(MathT* centers, // [n_clusters, dim]
IdxT n_clusters,
IdxT dim,
Expand Down
46 changes: 23 additions & 23 deletions cpp/include/raft/cluster/detail/kmeans_deprecated.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,12 @@ constexpr unsigned int BSIZE_DIV_WSIZE = (BLOCK_SIZE / WARP_SIZE);
* initialized to zero.
*/
template <typename index_type_t, typename value_type_t>
static __global__ void computeDistances(index_type_t n,
index_type_t d,
index_type_t k,
const value_type_t* __restrict__ obs,
const value_type_t* __restrict__ centroids,
value_type_t* __restrict__ dists)
static _RAFT_KERNEL void computeDistances(index_type_t n,
index_type_t d,
index_type_t k,
const value_type_t* __restrict__ obs,
const value_type_t* __restrict__ centroids,
value_type_t* __restrict__ dists)
{
// Loop index
index_type_t i;
Expand Down Expand Up @@ -173,11 +173,11 @@ static __global__ void computeDistances(index_type_t n,
* cluster. Entries must be initialized to zero.
*/
template <typename index_type_t, typename value_type_t>
static __global__ void minDistances(index_type_t n,
index_type_t k,
value_type_t* __restrict__ dists,
index_type_t* __restrict__ codes,
index_type_t* __restrict__ clusterSizes)
static _RAFT_KERNEL void minDistances(index_type_t n,
index_type_t k,
value_type_t* __restrict__ dists,
index_type_t* __restrict__ codes,
index_type_t* __restrict__ clusterSizes)
{
// Loop index
index_type_t i, j;
Expand Down Expand Up @@ -233,11 +233,11 @@ static __global__ void minDistances(index_type_t n,
* @param code_new Index associated with new centroid.
*/
template <typename index_type_t, typename value_type_t>
static __global__ void minDistances2(index_type_t n,
value_type_t* __restrict__ dists_old,
const value_type_t* __restrict__ dists_new,
index_type_t* __restrict__ codes_old,
index_type_t code_new)
static _RAFT_KERNEL void minDistances2(index_type_t n,
value_type_t* __restrict__ dists_old,
const value_type_t* __restrict__ dists_new,
index_type_t* __restrict__ codes_old,
index_type_t code_new)
{
// Loop index
index_type_t i = threadIdx.x + blockIdx.x * blockDim.x;
Expand Down Expand Up @@ -275,9 +275,9 @@ static __global__ void minDistances2(index_type_t n,
* cluster. Entries must be initialized to zero.
*/
template <typename index_type_t>
static __global__ void computeClusterSizes(index_type_t n,
const index_type_t* __restrict__ codes,
index_type_t* __restrict__ clusterSizes)
static _RAFT_KERNEL void computeClusterSizes(index_type_t n,
const index_type_t* __restrict__ codes,
index_type_t* __restrict__ clusterSizes)
{
index_type_t i = threadIdx.x + blockIdx.x * blockDim.x;
while (i < n) {
Expand Down Expand Up @@ -308,10 +308,10 @@ static __global__ void computeClusterSizes(index_type_t n,
* column is the mean position of a cluster).
*/
template <typename index_type_t, typename value_type_t>
static __global__ void divideCentroids(index_type_t d,
index_type_t k,
const index_type_t* __restrict__ clusterSizes,
value_type_t* __restrict__ centroids)
static _RAFT_KERNEL void divideCentroids(index_type_t d,
index_type_t k,
const index_type_t* __restrict__ clusterSizes,
value_type_t* __restrict__ centroids)
{
// Global indices
index_type_t gidx, gidy;
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/common/detail/scatter.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@
namespace raft::detail {

template <typename DataT, int VecLen, typename Lambda, typename IdxT>
__global__ void scatterKernel(DataT* out, const DataT* in, const IdxT* idx, IdxT len, Lambda op)
_RAFT_KERNEL void scatterKernel(DataT* out, const DataT* in, const IdxT* idx, IdxT len, Lambda op)
{
typedef TxN_t<DataT, VecLen> DataVec;
typedef TxN_t<IdxT, VecLen> IdxVec;
Expand Down
8 changes: 8 additions & 0 deletions cpp/include/raft/core/detail/macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@
#define RAFT_DEVICE_INLINE_FUNCTION _RAFT_DEVICE _RAFT_FORCEINLINE
#endif

#if defined(_RAFT_HAS_CUDA)
#define _RAFT_KERNEL static __global__
#else
#define _RAFT_KERNEL
#endif

#define _RAFT_FUNC

// The RAFT_INLINE_CONDITIONAL is a conditional inline specifier that removes
// the inline specification when RAFT_COMPILED is defined.
//
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/distance/detail/compress_to_bits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace raft::distance::detail {
* Note: the division (`/`) is a ceilDiv.
*/
template <typename T = uint64_t, typename = std::enable_if_t<std::is_integral<T>::value>>
__global__ void compress_to_bits_kernel(
_RAFT_KERNEL void compress_to_bits_kernel(
raft::device_matrix_view<const bool, int, raft::layout_c_contiguous> in,
raft::device_matrix_view<T, int, raft::layout_c_contiguous> out)
{
Expand Down
30 changes: 15 additions & 15 deletions cpp/include/raft/distance/detail/fused_l2_nn.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ struct MinReduceOpImpl {
};

template <typename DataT, typename OutT, typename IdxT, typename ReduceOpT>
__global__ void initKernel(OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
_RAFT_KERNEL void initKernel(OutT* min, IdxT m, DataT maxVal, ReduceOpT redOp)
{
auto tid = IdxT(blockIdx.x) * blockDim.x + threadIdx.x;
if (tid < m) { redOp.init(min + tid, maxVal); }
Expand Down Expand Up @@ -139,20 +139,20 @@ template <typename DataT,
typename KVPReduceOpT,
typename OpT,
typename FinalLambda>
__global__ __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min,
const DataT* x,
const DataT* y,
const DataT* xn,
const DataT* yn,
IdxT m,
IdxT n,
IdxT k,
DataT maxVal,
int* mutex,
ReduceOpT redOp,
KVPReduceOpT pairRedOp,
OpT distance_op,
FinalLambda fin_op)
_RAFT_KERNEL __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min,
const DataT* x,
const DataT* y,
const DataT* xn,
const DataT* yn,
IdxT m,
IdxT n,
IdxT k,
DataT maxVal,
int* mutex,
ReduceOpT redOp,
KVPReduceOpT pairRedOp,
OpT distance_op,
FinalLambda fin_op)
{
// compile only if below non-ampere arch.
#if __CUDA_ARCH__ < 800
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/raft/distance/detail/kernels/kernel_matrices.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace raft::distance::kernels::detail {
* @param offset
*/
template <typename math_t, typename exp_t>
__global__ void polynomial_kernel_nopad(
_RAFT_KERNEL void polynomial_kernel_nopad(
math_t* inout, size_t len, exp_t exponent, math_t gain, math_t offset)
{
for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < len;
Expand All @@ -56,7 +56,7 @@ __global__ void polynomial_kernel_nopad(
* @param offset
*/
template <typename math_t, typename exp_t>
__global__ void polynomial_kernel(
_RAFT_KERNEL void polynomial_kernel(
math_t* inout, int ld, int rows, int cols, exp_t exponent, math_t gain, math_t offset)
{
for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols;
Expand All @@ -75,7 +75,7 @@ __global__ void polynomial_kernel(
* @param offset
*/
template <typename math_t>
__global__ void tanh_kernel_nopad(math_t* inout, size_t len, math_t gain, math_t offset)
_RAFT_KERNEL void tanh_kernel_nopad(math_t* inout, size_t len, math_t gain, math_t offset)
{
for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < len;
tid += blockDim.x * gridDim.x) {
Expand All @@ -93,7 +93,7 @@ __global__ void tanh_kernel_nopad(math_t* inout, size_t len, math_t gain, math_t
* @param offset
*/
template <typename math_t>
__global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t gain, math_t offset)
_RAFT_KERNEL void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t gain, math_t offset)
{
for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols;
tidy += blockDim.y * gridDim.y)
Expand Down Expand Up @@ -121,7 +121,7 @@ __global__ void tanh_kernel(math_t* inout, int ld, int rows, int cols, math_t ga
* @param gain
*/
template <typename math_t>
__global__ void rbf_kernel_expanded(
_RAFT_KERNEL void rbf_kernel_expanded(
math_t* inout, int ld, int rows, int cols, math_t* norm_x, math_t* norm_y, math_t gain)
{
for (size_t tidy = threadIdx.y + blockIdx.y * blockDim.y; tidy < cols;
Expand Down
36 changes: 18 additions & 18 deletions cpp/include/raft/distance/detail/masked_nn.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,24 @@ template <typename DataT,
typename KVPReduceOpT,
typename CoreLambda,
typename FinalLambda>
__global__ __launch_bounds__(P::Nthreads, 2) void masked_l2_nn_kernel(OutT* min,
const DataT* x,
const DataT* y,
const DataT* xn,
const DataT* yn,
const uint64_t* adj,
const IdxT* group_idxs,
IdxT num_groups,
IdxT m,
IdxT n,
IdxT k,
bool sqrt,
DataT maxVal,
int* mutex,
ReduceOpT redOp,
KVPReduceOpT pairRedOp,
CoreLambda core_op,
FinalLambda fin_op)
_RAFT_KERNEL __launch_bounds__(P::Nthreads, 2) void masked_l2_nn_kernel(OutT* min,
const DataT* x,
const DataT* y,
const DataT* xn,
const DataT* yn,
const uint64_t* adj,
const IdxT* group_idxs,
IdxT num_groups,
IdxT m,
IdxT n,
IdxT k,
bool sqrt,
DataT maxVal,
int* mutex,
ReduceOpT redOp,
KVPReduceOpT pairRedOp,
CoreLambda core_op,
FinalLambda fin_op)
{
extern __shared__ char smem[];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ template <typename Policy,
typename DataT,
typename OutT,
typename FinOpT>
__global__ __launch_bounds__(Policy::Nthreads, 2) void pairwise_matrix_kernel(
_RAFT_KERNEL __launch_bounds__(Policy::Nthreads, 2) void pairwise_matrix_kernel(
OpT distance_op, pairwise_matrix_params<IdxT, DataT, OutT, FinOpT> params)
{
// Early exit to minimize the size of the kernel when it is not supposed to be compiled.
Expand Down
16 changes: 8 additions & 8 deletions cpp/include/raft/label/detail/classlabels.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -119,13 +119,13 @@ void getOvrlabels(
// +/-1, return array with the new class labels and corresponding indices.

template <typename Type, int TPB_X, typename Lambda>
__global__ void map_label_kernel(Type* map_ids,
size_t N_labels,
Type* in,
Type* out,
size_t N,
Lambda filter_op,
bool zero_based = false)
_RAFT_KERNEL void map_label_kernel(Type* map_ids,
size_t N_labels,
Type* in,
Type* out,
size_t N,
Lambda filter_op,
bool zero_based = false)
{
int tid = threadIdx.x + blockIdx.x * TPB_X;
if (tid < N) {
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/label/detail/merge_labels.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,7 +32,7 @@ namespace detail {
* For an additional cost we can build the graph with edges
* E={(A[i], B[i]) | M[i]=1} and make this step faster */
template <typename value_idx, int TPB_X = 256>
__global__ void __launch_bounds__(TPB_X)
_RAFT_KERNEL void __launch_bounds__(TPB_X)
propagate_label_kernel(const value_idx* __restrict__ labels_a,
const value_idx* __restrict__ labels_b,
value_idx* __restrict__ R,
Expand Down Expand Up @@ -65,7 +65,7 @@ __global__ void __launch_bounds__(TPB_X)
}

template <typename value_idx, int TPB_X = 256>
__global__ void __launch_bounds__(TPB_X)
_RAFT_KERNEL void __launch_bounds__(TPB_X)
reassign_label_kernel(value_idx* __restrict__ labels_a,
const value_idx* __restrict__ labels_b,
const value_idx* __restrict__ R,
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/raft/linalg/detail/add.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,10 +38,10 @@ void add(OutT* out, const InT* in1, const InT* in2, IdxType len, cudaStream_t st
}

template <class InT, typename IdxType, typename OutT = InT>
__global__ void add_dev_scalar_kernel(OutT* outDev,
const InT* inDev,
const InT* singleScalarDev,
IdxType len)
_RAFT_KERNEL void add_dev_scalar_kernel(OutT* outDev,
const InT* inDev,
const InT* singleScalarDev,
IdxType len)
{
IdxType i = ((IdxType)blockIdx.x * (IdxType)blockDim.x) + threadIdx.x;
if (i < len) { outDev[i] = inDev[i] + *singleScalarDev; }
Expand Down
Loading