Skip to content

Commit

Permalink
RVV: replace word_type to size_t (Tencent#4100, Tencent#4118)
Browse files Browse the repository at this point in the history
  • Loading branch information
thelastlin committed Oct 1, 2022
1 parent c20a7ca commit 6948e83
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
16 changes: 8 additions & 8 deletions src/layer/riscv/layernorm_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static inline int layernorm_rvv_pack1_procedure(int size, float* ptr, const floa
float* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_sum, vl);
_sum = vfredusum_vs_f32m8_f32m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredusum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -64,7 +64,7 @@ static inline int layernorm_rvv_pack1_procedure(int size, float* ptr, const floa
float* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_sqsum, vl);
_p = vfsub_vf_f32m8(_p, mean, vl);
_sqsum = vfredusum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -88,7 +88,7 @@ static inline int layernorm_rvv_pack1_procedure(int size, float* ptr, const floa
{
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_store, vl);
_p = vfmul_vf_f32m8(_p, a, vl);
vfloat32m8_t _gamma = vle32_v_f32m8(ptr_gamma, vl);
Expand All @@ -107,7 +107,7 @@ static inline int layernorm_rvv_pack1_procedure(int size, float* ptr, const floa
{
while (n > 0)
{
word_type vl = vsetvl_e32m8(n);
size_t vl = vsetvl_e32m8(n);
vfloat32m8_t _p = vle32_v_f32m8(ptr_store, vl);
_p = vfmul_vf_f32m8(_p, a, vl);
_p = vfadd_vf_f32m8(_p, b, vl);
Expand All @@ -120,7 +120,7 @@ static inline int layernorm_rvv_pack1_procedure(int size, float* ptr, const floa
return 0;
}

static inline int layernorm_rvv_packn_procedure(int size, float* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const word_type vl)
static inline int layernorm_rvv_packn_procedure(int size, float* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const size_t vl)
{
// mean and var
vfloat32m1_t _sum = vfmv_v_f_f32m1(0.f, vl);
Expand Down Expand Up @@ -305,7 +305,7 @@ int LayerNorm_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) co
#if __riscv_vector
if (elempack == packn)
{
const word_type vl = vsetvl_e32m1(packn);
const size_t vl = vsetvl_e32m1(packn);
if (dims == 2)
{
int w = bottom_top_blob.w;
Expand Down Expand Up @@ -419,7 +419,7 @@ int LayerNorm_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& o
const int packn = csrr_vlenb() / 2; // fp16
if (elempack == packn)
{
const word_type vl = vsetvl_e16m1(packn);
const size_t vl = vsetvl_e16m1(packn);
if (dims == 2)
{
int w = bottom_top_blob.w;
Expand Down Expand Up @@ -532,7 +532,7 @@ int LayerNorm_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option&
const int packn = csrr_vlenb() / 2; // fp16
if (elempack == packn)
{
const word_type vl = vsetvl_e16m1(packn);
const size_t vl = vsetvl_e16m1(packn);
if (dims == 2)
{
int w = bottom_top_blob.w;
Expand Down
20 changes: 10 additions & 10 deletions src/layer/riscv/layernorm_rvv_fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static inline int layernorm_rvv_pack1_fp16s_procedure(int size, __fp16* ptr, con
__fp16* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m4(n);
size_t vl = vsetvl_e16m4(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_sum, vl), vl);
_sum = vfredusum_vs_f32m8_f32m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredusum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -40,7 +40,7 @@ static inline int layernorm_rvv_pack1_fp16s_procedure(int size, __fp16* ptr, con
__fp16* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m4(n);
size_t vl = vsetvl_e16m4(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_sqsum, vl), vl);
_p = vfsub_vf_f32m8(_p, mean, vl);
_sqsum = vfredusum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -64,7 +64,7 @@ static inline int layernorm_rvv_pack1_fp16s_procedure(int size, __fp16* ptr, con
{
while (n > 0)
{
word_type vl = vsetvl_e16m4(n);
size_t vl = vsetvl_e16m4(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_store, vl), vl);
_p = vfmul_vf_f32m8(_p, a, vl);
vfloat32m8_t _gamma = vle32_v_f32m8(ptr_gamma, vl);
Expand All @@ -83,7 +83,7 @@ static inline int layernorm_rvv_pack1_fp16s_procedure(int size, __fp16* ptr, con
{
while (n > 0)
{
word_type vl = vsetvl_e16m4(n);
size_t vl = vsetvl_e16m4(n);
vfloat32m8_t _p = vfwcvt_f_f_v_f32m8(vle16_v_f16m4(ptr_store, vl), vl);
_p = vfmul_vf_f32m8(_p, a, vl);
_p = vfadd_vf_f32m8(_p, b, vl);
Expand All @@ -96,7 +96,7 @@ static inline int layernorm_rvv_pack1_fp16s_procedure(int size, __fp16* ptr, con
return 0;
}

static inline int layernorm_rvv_packn_fp16s_procedure(int size, __fp16* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const word_type vl)
static inline int layernorm_rvv_packn_fp16s_procedure(int size, __fp16* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const size_t vl)
{
// mean and var
// f16m1 => f32m2
Expand Down Expand Up @@ -160,7 +160,7 @@ static inline int layernorm_rvv_pack1_fp16sa_procedure(int size, __fp16* ptr, co
__fp16* ptr_sum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m8(n);
size_t vl = vsetvl_e16m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_sum, vl);
_sum = vfredusum_vs_f16m8_f16m1(_sum, _p, /* scalar */ _sum, vl);
// _sqsum = vfredusum_vs_f32m8_f32m1(_sqsum, vfmul_vv_f32m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -176,7 +176,7 @@ static inline int layernorm_rvv_pack1_fp16sa_procedure(int size, __fp16* ptr, co
__fp16* ptr_sqsum = ptr;
while (n > 0)
{
word_type vl = vsetvl_e16m8(n);
size_t vl = vsetvl_e16m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_sqsum, vl);
_p = vfsub_vf_f16m8(_p, mean, vl);
_sqsum = vfredusum_vs_f16m8_f16m1(_sqsum, vfmul_vv_f16m8(_p, _p, vl), /* scalar */ _sqsum, vl);
Expand All @@ -200,7 +200,7 @@ static inline int layernorm_rvv_pack1_fp16sa_procedure(int size, __fp16* ptr, co
{
while (n > 0)
{
word_type vl = vsetvl_e16m4(n);
size_t vl = vsetvl_e16m4(n);
vfloat16m4_t _p = vle16_v_f16m4(ptr_store, vl);
_p = vfmul_vf_f16m4(_p, a, vl);
vfloat16m4_t _gamma = vfncvt_f_f_w_f16m4(vle32_v_f32m8(ptr_gamma, vl), vl);
Expand All @@ -219,7 +219,7 @@ static inline int layernorm_rvv_pack1_fp16sa_procedure(int size, __fp16* ptr, co
{
while (n > 0)
{
word_type vl = vsetvl_e16m8(n);
size_t vl = vsetvl_e16m8(n);
vfloat16m8_t _p = vle16_v_f16m8(ptr_store, vl);
_p = vfmul_vf_f16m8(_p, a, vl);
_p = vfadd_vf_f16m8(_p, b, vl);
Expand All @@ -232,7 +232,7 @@ static inline int layernorm_rvv_pack1_fp16sa_procedure(int size, __fp16* ptr, co
return 0;
}

static inline int layernorm_rvv_packn_fp16sa_procedure(int size, __fp16* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const word_type vl)
static inline int layernorm_rvv_packn_fp16sa_procedure(int size, __fp16* ptr, const float* gamma_data, const float* beta_data, float eps, int affine, const size_t vl)
{
// mean and var
vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl);
Expand Down

0 comments on commit 6948e83

Please sign in to comment.