Skip to content

Commit

Permalink
RVV: make clang happy by dropping VLAs in sgemm
Browse files Browse the repository at this point in the history
  • Loading branch information
thelastlin committed Aug 6, 2022
1 parent b09d8e9 commit 83d7d50
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
15 changes: 15 additions & 0 deletions src/layer/riscv/convolution_sgemm_packnto1.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,26 @@ static void im2col_sgemm_packnto1_rvv(const Mat& bottom_im2col, Mat& top_blob, c
int nn_outch = outch / packn;
int remain_outch_start = nn_outch * packn;

#ifdef __clang__
// clang complains about VLA in the following loop
float* _zero_tmp = new float [packn]();
for(int _zero_clean_idx =0; _zero_clean_idx < packn; _zero_clean_idx++)
{
_zero_tmp[_zero_clean_idx] = 0.f;
}
#endif // __clang__
#pragma omp parallel for num_threads(opt.num_threads)
for (int pp = 0; pp < nn_outch; pp++)
{
int p = pp * packn;

float* outptr0 = top_blob.channel(p);

#ifdef __clang__
const float* zeros = _zero_tmp;
#else
const float zeros[packn] = {0.f};
#endif // __clang__
const float* biasptr = bias ? bias + p : zeros;

int i = 0;
Expand Down Expand Up @@ -343,6 +355,9 @@ static void im2col_sgemm_packnto1_rvv(const Mat& bottom_im2col, Mat& top_blob, c
outptr0 += 1;
}
}
#ifdef __clang__
delete[] _zero_tmp;
#endif

#pragma omp parallel for num_threads(opt.num_threads)
for (int p = remain_outch_start; p < outch; p++)
Expand Down
15 changes: 15 additions & 0 deletions src/layer/riscv/convolution_sgemm_packnto1_fp16s.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,26 @@ static void im2col_sgemm_packnto1_fp16sa_rvv(const Mat& bottom_im2col, Mat& top_
int nn_outch = outch / packn;
int remain_outch_start = nn_outch * packn;

// make clang happy with the following loop
#ifdef __clang__
__fp16* _zero_tmp = new __fp16 [packn]();
for(int _zero_clean_idx =0; _zero_clean_idx < packn; _zero_clean_idx++)
{
_zero_tmp[_zero_clean_idx] = 0.f;
}
#endif // __clang__
#pragma omp parallel for num_threads(opt.num_threads)
for (int pp = 0; pp < nn_outch; pp++)
{
int p = pp * packn;

__fp16* outptr0 = top_blob.channel(p);

#ifdef __clang__
const __fp16* zeros = _zero_tmp;
#else
const __fp16 zeros[packn] = {0.f};
#endif // __clang__
const __fp16* biasptr = bias ? bias + p : zeros;

int i = 0;
Expand Down Expand Up @@ -343,6 +355,9 @@ static void im2col_sgemm_packnto1_fp16sa_rvv(const Mat& bottom_im2col, Mat& top_
outptr0 += 1;
}
}
#ifdef __clang__
delete[] _zero_tmp;
#endif // __clang__

#pragma omp parallel for num_threads(opt.num_threads)
for (int p = remain_outch_start; p < outch; p++)
Expand Down

0 comments on commit 83d7d50

Please sign in to comment.