From 078d9a33a188d7ff2ed821f0df7667bf3c3babd5 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 13 Nov 2024 20:04:43 +0100 Subject: [PATCH] Use `group_load_store` compiler extension (#2123) * Use group_load_store compiler extension * Tune description of dpnp.bincount --- dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 33 ++++++++++++++------- dpnp/dpnp_iface_histograms.py | 4 +-- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp index b65d72a4024..7d1bdb738ff 100644 --- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp @@ -41,6 +41,9 @@ using dpctl::tensor::kernels::alignment_utils::is_aligned; using dpctl::tensor::kernels::alignment_utils::required_alignment; +using sycl::ext::oneapi::experimental::group_load; +using sycl::ext::oneapi::experimental::group_store; + template constexpr T dispatch_erf_op(T elem) { @@ -523,33 +526,41 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) _DataType_input2, \ _DataType_output>) \ { \ - sycl::vec<_DataType_input1, vec_sz> x1 = \ - sg.load(input1_multi_ptr); \ - sycl::vec<_DataType_input2, vec_sz> x2 = \ - sg.load(input2_multi_ptr); \ + sycl::vec<_DataType_input1, vec_sz> x1{}; \ + sycl::vec<_DataType_input2, vec_sz> x2{}; \ + \ + group_load(sg, input1_multi_ptr, x1); \ + group_load(sg, input2_multi_ptr, x2); \ \ res_vec = __vec_operation__; \ } \ else /* input types don't match result type, so \ explicit casting is required */ \ { \ + sycl::vec<_DataType_input1, vec_sz> tmp_x1{}; \ + sycl::vec<_DataType_input2, vec_sz> tmp_x2{}; \ + \ + group_load(sg, input1_multi_ptr, tmp_x1); \ + group_load(sg, input2_multi_ptr, tmp_x2); \ + \ sycl::vec<_DataType_output, vec_sz> x1 = \ dpnp_vec_cast<_DataType_output, \ _DataType_input1, vec_sz>( \ - sg.load(input1_multi_ptr)); \ + tmp_x1); \ sycl::vec<_DataType_output, vec_sz> x2 = \ dpnp_vec_cast<_DataType_output, \ _DataType_input2, vec_sz>( \ - sg.load(input2_multi_ptr)); \ + tmp_x2); \ \ res_vec = __vec_operation__; \ } \ } \ else { \ - sycl::vec<_DataType_input1, vec_sz> x1 = \ - sg.load(input1_multi_ptr); \ - sycl::vec<_DataType_input2, vec_sz> x2 = \ - sg.load(input2_multi_ptr); \ + sycl::vec<_DataType_input1, vec_sz> x1{}; \ + sycl::vec<_DataType_input2, vec_sz> x2{}; \ + \ + group_load(sg, input1_multi_ptr, x1); \ + group_load(sg, input2_multi_ptr, x2); \ \ for (size_t k = 0; k < vec_sz; ++k) { \ const _DataType_output input1_elem = x1[k]; \ @@ -557,7 +568,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap) res_vec[k] = __operation__; \ } \ } \ - sg.store(result_multi_ptr, res_vec); \ + group_store(sg, res_vec, result_multi_ptr); \ } \ else { \ for (size_t k = start + sg.get_local_id()[0]; \ diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 809cb4e5e99..7981f350b87 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -325,7 +325,7 @@ def bincount(x, weights=None, minlength=None): ------- out : dpnp.ndarray of ints The result of binning the input array. - The length of `out` is equal to ``np.amax(x) + 1``. + The length of `out` is equal to ``dpnp.max(x) + 1``. See Also -------- @@ -353,7 +353,7 @@ def bincount(x, weights=None, minlength=None): ... TypeError: x must be an integer array - A possible use of ``bincount`` is to perform sums over + A possible use of :obj:`dpnp.bincount` is to perform sums over variable-size chunks of an array, using the `weights` keyword. >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6], dtype=np.float32) # weights