From e972a937a1b1c21f12c71f5f92214d1979493fb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Karpi=C5=84ski?= Date: Tue, 14 Dec 2021 13:30:44 +0100 Subject: [PATCH] Rename coalesced_pixels to coalesced_values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Szymon KarpiƄski --- dali/kernels/slice/slice_gpu.cuh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dali/kernels/slice/slice_gpu.cuh b/dali/kernels/slice/slice_gpu.cuh index 9dda873d522..62a81ed71f8 100644 --- a/dali/kernels/slice/slice_gpu.cuh +++ b/dali/kernels/slice/slice_gpu.cuh @@ -73,7 +73,7 @@ struct SliceBlockDesc { }; template -constexpr int coalesced_pixels = sizeof(OutputType) >= 4 ? 1 : 4 / sizeof(OutputType); +constexpr int coalesced_values = sizeof(OutputType) >= 4 ? 1 : 4 / sizeof(OutputType); /** * @brief Simplified algorithm when no padding is necessary @@ -90,9 +90,9 @@ __device__ void SliceFuncNoPad(OutputType *__restrict__ out, const InputType *__ return; } - for (; offset < block_end; offset += blockDim.x * coalesced_pixels) { + for (; offset < block_end; offset += blockDim.x * coalesced_values) { #pragma unroll - for (uint64_t i = 0; i < coalesced_pixels; i++) { + for (uint64_t i = 0; i < coalesced_values; i++) { uint64_t idx = offset + i; if (idx >= block_end) break; uint64_t out_idx = idx; @@ -138,9 +138,9 @@ __device__ void SliceFunc(OutputType *__restrict__ out, const InputType *__restr inner_in_extent = Dims > 1 ? in_strides[LastDim - 1] : in_shape[LastDim] * in_strides[LastDim]; } - for (; offset < block_end; offset += blockDim.x * coalesced_pixels) { + for (; offset < block_end; offset += blockDim.x * coalesced_values) { #pragma unroll - for (uint64_t i = 0; i < coalesced_pixels; i++) { + for (uint64_t i = 0; i < coalesced_values; i++) { uint64_t idx = offset + i; if (idx >= block_end) break; uint64_t out_idx = idx; @@ -179,7 +179,7 @@ __device__ void SliceFunc(OutputType *__restrict__ out, const InputType *__restr template __global__ void SliceKernel(const SliceSampleDesc *samples, const SliceBlockDesc *blocks) { int sampleIdx = blocks[blockIdx.x].sampleIdx; - uint64_t offset = blocks[blockIdx.x].offset + threadIdx.x * coalesced_pixels; + uint64_t offset = blocks[blockIdx.x].offset + threadIdx.x * coalesced_values; uint64_t block_end = blocks[blockIdx.x].offset + blocks[blockIdx.x].size; auto sample = samples[sampleIdx]; auto *out = static_cast(sample.out);