diff --git a/include/fluent-bit/flb_simd.h b/include/fluent-bit/flb_simd.h index a0fa3f3b32b..19e212c67b6 100644 --- a/include/fluent-bit/flb_simd.h +++ b/include/fluent-bit/flb_simd.h @@ -77,8 +77,8 @@ typedef uint32x4_t flb_vector32; typedef vuint8m1_t flb_vector8; typedef vuint32m1_t flb_vector32; -/* Currently, VLEN is assumed to 128. */ -#define RVV_VEC_INST_LEN (128 / 8) /* 16 */ +#define RVV_VEC8_INST_LEN (128 / 8) /* 16 */ +#define RVV_VEC32_INST_LEN (128 / 8 / 4) /* 4 */ #else /* @@ -116,7 +116,7 @@ static inline void flb_vector8_load(flb_vector8 *v, const uint8_t *s) #elif defined(FLB_SIMD_NEON) *v = vld1q_u8(s); #elif defined(FLB_SIMD_RVV) - *v = __riscv_vle8_v_u8m1(s, 16); + *v = __riscv_vle8_v_u8m1(s, RVV_VEC8_INST_LEN); #else memset(v, 0, sizeof(flb_vector8)); #endif @@ -153,7 +153,7 @@ static inline flb_vector8 flb_vector8_ssub(const flb_vector8 v1, const flb_vecto #elif defined(FLB_SIMD_NEON) return vqsubq_u8(v1, v2); #elif defined(FLB_SIMD_RVV) - return __riscv_vssubu_vv_u8m1(v1, v2, 16); + return __riscv_vssubu_vv_u8m1(v1, v2, RVV_VEC8_INST_LEN); #endif } #endif /* ! FLB_SIMD_NONE */ @@ -170,8 +170,10 @@ static inline flb_vector8 flb_vector8_eq(const flb_vector8 v1, const flb_vector8 #elif defined(FLB_SIMD_NEON) return vceqq_u8(v1, v2); #elif defined(FLB_SIMD_RVV) - vbool8_t ret = __riscv_vmseq_vv_u8m1_b8(v1, v2, 16); - return __riscv_vmerge_vvm_u8m1(__riscv_vmv_v_x_u8m1(0, 16), __riscv_vmv_v_x_u8m1(UINT8_MAX, 16), ret, 16); + vbool8_t ret = __riscv_vmseq_vv_u8m1_b8(v1, v2, RVV_VEC8_INST_LEN); + return __riscv_vmerge_vvm_u8m1(__riscv_vmv_v_x_u8m1(0, RVV_VEC8_INST_LEN), + __riscv_vmv_v_x_u8m1(UINT8_MAX, RVV_VEC8_INST_LEN), + ret, RVV_VEC8_INST_LEN); #endif } #endif /* ! FLB_SIMD_NONE */ @@ -184,8 +186,10 @@ static inline flb_vector32 flb_vector32_eq(const flb_vector32 v1, const flb_vect #elif defined(FLB_SIMD_NEON) return vceqq_u32(v1, v2); #elif defined(FLB_SIMD_RVV) - vbool32_t ret = __riscv_vmseq_vv_u32m1_b32(v1, v2, 4); - return __riscv_vmerge_vvm_u32m1(__riscv_vmv_v_x_u32m1(0, 4), __riscv_vmv_v_x_u32m1(UINT32_MAX, 4), ret, 4); + vbool32_t ret = __riscv_vmseq_vv_u32m1_b32(v1, v2, RVV_VEC32_INST_LEN); + return __riscv_vmerge_vvm_u32m1(__riscv_vmv_v_x_u32m1(0, RVV_VEC32_INST_LEN), + __riscv_vmv_v_x_u32m1(UINT32_MAX, RVV_VEC32_INST_LEN), + ret, RVV_VEC32_INST_LEN); #endif } #endif /* ! FLB_SIMD_NONE */ @@ -200,7 +204,7 @@ static inline flb_vector8 flb_vector8_broadcast(const uint8_t c) #elif defined(FLB_SIMD_NEON) return vdupq_n_u8(c); #elif defined(FLB_SIMD_RVV) - return __riscv_vmv_v_x_u8m1(c, 16); + return __riscv_vmv_v_x_u8m1(c, RVV_VEC8_INST_LEN); #else return ~UINT64CONST(0) / 0xFF * c; #endif @@ -216,7 +220,9 @@ static inline bool flb_vector8_is_highbit_set(const flb_vector8 v) #elif defined(FLB_SIMD_NEON) return vmaxvq_u8(v) > 0x7F; #elif defined(FLB_SIMD_RVV) - return __riscv_vmv_x_s_u8m1_u8(__riscv_vredmaxu_vs_u8m1_u8m1(v, __riscv_vmv_v_x_u8m1(0, 16), 16)); + return __riscv_vmv_x_s_u8m1_u8(__riscv_vredmaxu_vs_u8m1_u8m1(v, + __riscv_vmv_v_x_u8m1(0, RVV_VEC8_INST_LEN), + RVV_VEC8_INST_LEN)); #else return v & flb_vector8_broadcast(0x80); #endif diff --git a/src/flb_utils.c b/src/flb_utils.c index 7ba7da534c5..20d09b04d5c 100644 --- a/src/flb_utils.c +++ b/src/flb_utils.c @@ -802,7 +802,7 @@ int flb_utils_write_str(char *buf, int *off, size_t size, const char *str, size_ char tmp[16]; char *p; #if defined(FLB_SIMD_RVV) - const size_t inst_len = RVV_VEC_INST_LEN; + const size_t inst_len = RVV_VEC8_INST_LEN; #else const size_t inst_len = sizeof(flb_vector8); #endif