diff --git a/include/verilated_funcs.h b/include/verilated_funcs.h index 4cf04ff20c7..e550b4bca23 100644 --- a/include/verilated_funcs.h +++ b/include/verilated_funcs.h @@ -325,6 +325,19 @@ uint64_t vl_time_pow10(int n) VL_PURE; //=================================================================== // SETTING OPERATORS +VL_ATTR_ALWINLINE +static inline WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static inline WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE { + return static_cast(std::memset(owp, 0xff, words * sizeof(EData))); +} +VL_ATTR_ALWINLINE +static inline WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE { + return static_cast(std::memcpy(owp, iwp, words * sizeof(EData))); +} + // Output clean // EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits; #define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits))) @@ -339,18 +352,16 @@ static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE } static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { const int words = VL_WORDS_I(obits); - for (int i = 0; (i < (words - 1)); ++i) owp[i] = lwp[i]; + VL_MEMCPY_W(owp, lwp, words - 1); owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits); return owp; } static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = 0; - return owp; + return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits)); } static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { const int words = VL_WORDS_I(obits); - for (int i = 0; i < (words - 1); ++i) owp[i] = ~VL_EUL(0); + VL_MEMSET_ONES_W(owp, words - 1); owp[words - 1] = VL_MASK_E(obits); return owp; } @@ -359,9 +370,7 @@ static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE { // For now, we always have a clean rhs. // Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing. static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = lwp[i]; - return owp; + return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits)); } // EMIT_RULE: VL_ASSIGNBIT: rclean=clean; @@ -519,19 +528,20 @@ static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE { static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE { // Note for extracts that obits != lbits owp[0] = ld; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); return owp; } static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE { VL_SET_WQ(owp, ld); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); return owp; } static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i]; - for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; - return owp; + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + return VL_MEMCPY_W(owp, lwp, lwords); } // EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits; @@ -547,26 +557,37 @@ static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE { } static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE { - const EData sign = VL_SIGNONES_E(lbits, static_cast(ld)); - owp[0] = ld | (sign & ~VL_MASK_E(lbits)); - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + owp[0] = ld; + if (VL_SIGN_E(lbits, owp[0])) { + owp[0] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1); + } else { + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); + } return owp; } static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE { VL_SET_WQ(owp, ld); - const EData sign = VL_SIGNONES_E(lbits, owp[1]); - owp[1] |= sign & ~VL_MASK_E(lbits); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = sign; + if (VL_SIGN_E(lbits, owp[1])) { + owp[1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } else { + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); + } return owp; } static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(lbits) - 1; ++i) owp[i] = lwp[i]; - const int lmsw = VL_WORDS_I(lbits) - 1; - const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); - owp[lmsw] = lwp[lmsw] | (sign & ~VL_MASK_E(lbits)); - for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = sign; - return owp; + const int lwords = VL_WORDS_I(lbits); + VL_PREFETCH_RD(lwp); + owp[lwords - 1] = lwp[lwords - 1]; + if (VL_SIGN_E(lbits, lwp[lwords - 1])) { + owp[lwords - 1] |= ~VL_MASK_E(lbits); + VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } else { + VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords); + } + return VL_MEMCPY_W(owp, lwp, lwords - 1); } //=================================================================== @@ -1549,63 +1570,66 @@ static inline QData VL_DYN_TO_Q(const VlQueue& q, int elem_size) { static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld, IData rd) VL_MT_SAFE { owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp, WDataInP const lwp, IData rd) VL_MT_SAFE { owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld, WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld, QData rd) VL_MT_SAFE { VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); _vl_insert_WI(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld, IData rd) VL_MT_SAFE { owp[0] = rd; - for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1); _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld, QData rd) VL_MT_SAFE { VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp, WDataInP const lwp, QData rd) VL_MT_SAFE { VL_SET_WQ(owp, rd); - for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0; + VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E); _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld, WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); _vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits); return owp; } static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp, WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE { - for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i]; - for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0; + const int rwords = VL_WORDS_I(rbits); + VL_MEMCPY_W(owp, rwp, rwords); + VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords); _vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits); return owp; } @@ -1829,10 +1853,13 @@ static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOut EData overshift = 0; // Huge shift 1>>32 or more for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i]; if (VL_UNLIKELY(overshift || rwp[0] >= static_cast(obits))) { - const int lmsw = VL_WORDS_I(obits) - 1; - const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]); - for (int j = 0; j <= lmsw; ++j) owp[j] = sign; - owp[lmsw] &= VL_MASK_E(lbits); + const int owords = VL_WORDS_I(obits); + if (VL_SIGN_E(lbits, lwp[owords - 1])) { + VL_MEMSET_ONES_W(owp, owords); + owp[owords - 1] &= VL_MASK_E(lbits); + } else { + VL_MEMSET_ZERO_W(owp, owords); + } return owp; } return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]); @@ -2043,9 +2070,7 @@ static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p, WDataInP const w2p) VL_MT_SAFE { - const int words = VL_WORDS_I(obits); - for (int i = 0; i < words; ++i) owp[i] = cond ? w1p[i] : w2p[i]; - return owp; + return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits)); } //====================================================================== @@ -2058,7 +2083,7 @@ static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataIn // If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW #define VL_C_END_(obits, wordsSet) \ - for (int i = (wordsSet); i < VL_WORDS_I(obits); ++i) o[i] = 0; \ + VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \ return o // clang-format off