Skip to content

Commit

Permalink
Rename coalesced_pixels to coalesced_values
Browse files Browse the repository at this point in the history
Signed-off-by: Szymon Karpiński <[email protected]>
  • Loading branch information
szkarpinski committed Dec 14, 2021
1 parent 2259397 commit e972a93
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions dali/kernels/slice/slice_gpu.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ struct SliceBlockDesc {
};

template<typename OutputType>
constexpr int coalesced_pixels = sizeof(OutputType) >= 4 ? 1 : 4 / sizeof(OutputType);
constexpr int coalesced_values = sizeof(OutputType) >= 4 ? 1 : 4 / sizeof(OutputType);

/**
* @brief Simplified algorithm when no padding is necessary
Expand All @@ -90,9 +90,9 @@ __device__ void SliceFuncNoPad(OutputType *__restrict__ out, const InputType *__
return;
}

for (; offset < block_end; offset += blockDim.x * coalesced_pixels<OutputType>) {
for (; offset < block_end; offset += blockDim.x * coalesced_values<OutputType>) {
#pragma unroll
for (uint64_t i = 0; i < coalesced_pixels<OutputType>; i++) {
for (uint64_t i = 0; i < coalesced_values<OutputType>; i++) {
uint64_t idx = offset + i;
if (idx >= block_end) break;
uint64_t out_idx = idx;
Expand Down Expand Up @@ -138,9 +138,9 @@ __device__ void SliceFunc(OutputType *__restrict__ out, const InputType *__restr
inner_in_extent = Dims > 1 ? in_strides[LastDim - 1] : in_shape[LastDim] * in_strides[LastDim];
}

for (; offset < block_end; offset += blockDim.x * coalesced_pixels<OutputType>) {
for (; offset < block_end; offset += blockDim.x * coalesced_values<OutputType>) {
#pragma unroll
for (uint64_t i = 0; i < coalesced_pixels<OutputType>; i++) {
for (uint64_t i = 0; i < coalesced_values<OutputType>; i++) {
uint64_t idx = offset + i;
if (idx >= block_end) break;
uint64_t out_idx = idx;
Expand Down Expand Up @@ -179,7 +179,7 @@ __device__ void SliceFunc(OutputType *__restrict__ out, const InputType *__restr
template <typename OutputType, typename InputType, int Dims, bool SupportPad>
__global__ void SliceKernel(const SliceSampleDesc<Dims> *samples, const SliceBlockDesc *blocks) {
int sampleIdx = blocks[blockIdx.x].sampleIdx;
uint64_t offset = blocks[blockIdx.x].offset + threadIdx.x * coalesced_pixels<OutputType>;
uint64_t offset = blocks[blockIdx.x].offset + threadIdx.x * coalesced_values<OutputType>;
uint64_t block_end = blocks[blockIdx.x].offset + blocks[blockIdx.x].size;
auto sample = samples[sampleIdx];
auto *out = static_cast<OutputType*>(sample.out);
Expand Down

0 comments on commit e972a93

Please sign in to comment.