Skip to content

Commit

Permalink
Improve W primitive operations with -Oz
Browse files Browse the repository at this point in the history
  • Loading branch information
gezalore committed Dec 2, 2023
1 parent f048cff commit 7de0206
Showing 1 changed file with 70 additions and 45 deletions.
115 changes: 70 additions & 45 deletions include/verilated_funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,19 @@ uint64_t vl_time_pow10(int n) VL_PURE;
//===================================================================
// SETTING OPERATORS

VL_ATTR_ALWINLINE
static WDataOutP VL_MEMSET_ZERO_W(WDataOutP owp, int words) VL_MT_SAFE {
return static_cast<WDataOutP>(std::memset(owp, 0, words * sizeof(EData)));
}
VL_ATTR_ALWINLINE
static WDataOutP VL_MEMSET_ONES_W(WDataOutP owp, int words) VL_MT_SAFE {
return static_cast<WDataOutP>(std::memset(owp, 0xff, words * sizeof(EData)));
}
VL_ATTR_ALWINLINE
static WDataOutP VL_MEMCPY_W(WDataOutP owp, WDataInP const iwp, int words) VL_MT_SAFE {
return static_cast<WDataOutP>(std::memcpy(owp, iwp, words * sizeof(EData)));
}

// Output clean
// EMIT_RULE: VL_CLEAN: oclean=clean; obits=lbits;
#define VL_CLEAN_II(obits, lbits, lhs) ((lhs) & (VL_MASK_I(obits)))
Expand All @@ -339,18 +352,16 @@ static inline WDataOutP _vl_clean_inplace_w(int obits, WDataOutP owp) VL_MT_SAFE
}
static inline WDataOutP VL_CLEAN_WW(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE {
const int words = VL_WORDS_I(obits);
for (int i = 0; (i < (words - 1)); ++i) owp[i] = lwp[i];
VL_MEMCPY_W(owp, lwp, words - 1);
owp[words - 1] = lwp[words - 1] & VL_MASK_E(obits);
return owp;
}
static inline WDataOutP VL_ZERO_W(int obits, WDataOutP owp) VL_MT_SAFE {
const int words = VL_WORDS_I(obits);
for (int i = 0; i < words; ++i) owp[i] = 0;
return owp;
return VL_MEMSET_ZERO_W(owp, VL_WORDS_I(obits));
}
static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE {
const int words = VL_WORDS_I(obits);
for (int i = 0; i < (words - 1); ++i) owp[i] = ~VL_EUL(0);
VL_MEMSET_ONES_W(owp, words - 1);
owp[words - 1] = VL_MASK_E(obits);
return owp;
}
Expand All @@ -359,9 +370,7 @@ static inline WDataOutP VL_ALLONES_W(int obits, WDataOutP owp) VL_MT_SAFE {
// For now, we always have a clean rhs.
// Note: If a ASSIGN isn't clean, use VL_ASSIGNCLEAN instead to do the same thing.
static inline WDataOutP VL_ASSIGN_W(int obits, WDataOutP owp, WDataInP const lwp) VL_MT_SAFE {
const int words = VL_WORDS_I(obits);
for (int i = 0; i < words; ++i) owp[i] = lwp[i];
return owp;
return VL_MEMCPY_W(owp, lwp, VL_WORDS_I(obits));
}

// EMIT_RULE: VL_ASSIGNBIT: rclean=clean;
Expand Down Expand Up @@ -519,19 +528,20 @@ static inline void VL_ASSIGNBIT_WO(int bit, WDataOutP owp) VL_MT_SAFE {
static inline WDataOutP VL_EXTEND_WI(int obits, int, WDataOutP owp, IData ld) VL_MT_SAFE {
// Note for extracts that obits != lbits
owp[0] = ld;
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
return owp;
}
static inline WDataOutP VL_EXTEND_WQ(int obits, int, WDataOutP owp, QData ld) VL_MT_SAFE {
VL_SET_WQ(owp, ld);
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
return owp;
}
static inline WDataOutP VL_EXTEND_WW(int obits, int lbits, WDataOutP owp,
WDataInP const lwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(lbits); ++i) owp[i] = lwp[i];
for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
return owp;
const int lwords = VL_WORDS_I(lbits);
VL_PREFETCH_RD(lwp);
VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords);
return VL_MEMCPY_W(owp, lwp, lwords);
}

// EMIT_RULE: VL_EXTENDS: oclean=*dirty*; obits=lbits;
Expand All @@ -547,26 +557,37 @@ static inline QData VL_EXTENDS_QQ(int, int lbits, QData lhs) VL_PURE {
}

static inline WDataOutP VL_EXTENDS_WI(int obits, int lbits, WDataOutP owp, IData ld) VL_MT_SAFE {
const EData sign = VL_SIGNONES_E(lbits, static_cast<EData>(ld));
owp[0] = ld | (sign & ~VL_MASK_E(lbits));
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = sign;
owp[0] = ld;
if (VL_SIGN_E(lbits, owp[0])) {
owp[0] |= ~VL_MASK_E(lbits);
VL_MEMSET_ONES_W(owp + 1, VL_WORDS_I(obits) - 1);
} else {
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
}
return owp;
}
static inline WDataOutP VL_EXTENDS_WQ(int obits, int lbits, WDataOutP owp, QData ld) VL_MT_SAFE {
VL_SET_WQ(owp, ld);
const EData sign = VL_SIGNONES_E(lbits, owp[1]);
owp[1] |= sign & ~VL_MASK_E(lbits);
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = sign;
if (VL_SIGN_E(lbits, owp[1])) {
owp[1] |= ~VL_MASK_E(lbits);
VL_MEMSET_ONES_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
} else {
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
}
return owp;
}
static inline WDataOutP VL_EXTENDS_WW(int obits, int lbits, WDataOutP owp,
WDataInP const lwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(lbits) - 1; ++i) owp[i] = lwp[i];
const int lmsw = VL_WORDS_I(lbits) - 1;
const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]);
owp[lmsw] = lwp[lmsw] | (sign & ~VL_MASK_E(lbits));
for (int i = VL_WORDS_I(lbits); i < VL_WORDS_I(obits); ++i) owp[i] = sign;
return owp;
const int lwords = VL_WORDS_I(lbits);
VL_PREFETCH_RD(lwp);
owp[lwords - 1] = lwp[lwords - 1];
if (VL_SIGN_E(lbits, lwp[lwords - 1])) {
owp[lwords - 1] |= ~VL_MASK_E(lbits);
VL_MEMSET_ONES_W(owp + lwords, VL_WORDS_I(obits) - lwords);
} else {
VL_MEMSET_ZERO_W(owp + lwords, VL_WORDS_I(obits) - lwords);
}
return VL_MEMCPY_W(owp, lwp, lwords - 1);
}

//===================================================================
Expand Down Expand Up @@ -1549,63 +1570,66 @@ static inline QData VL_DYN_TO_Q(const VlQueue<T>& q, int elem_size) {
static inline WDataOutP VL_CONCAT_WII(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
IData rd) VL_MT_SAFE {
owp[0] = rd;
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WWI(int obits, int lbits, int rbits, WDataOutP owp,
WDataInP const lwp, IData rd) VL_MT_SAFE {
owp[0] = rd;
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WIW(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
WDataInP const rwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
const int rwords = VL_WORDS_I(rbits);
VL_MEMCPY_W(owp, rwp, rwords);
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WIQ(int obits, int lbits, int rbits, WDataOutP owp, IData ld,
QData rd) VL_MT_SAFE {
VL_SET_WQ(owp, rd);
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
_vl_insert_WI(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WQI(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
IData rd) VL_MT_SAFE {
owp[0] = rd;
for (int i = 1; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + 1, VL_WORDS_I(obits) - 1);
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WQQ(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
QData rd) VL_MT_SAFE {
VL_SET_WQ(owp, rd);
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WWQ(int obits, int lbits, int rbits, WDataOutP owp,
WDataInP const lwp, QData rd) VL_MT_SAFE {
VL_SET_WQ(owp, rd);
for (int i = VL_WQ_WORDS_E; i < VL_WORDS_I(obits); ++i) owp[i] = 0;
VL_MEMSET_ZERO_W(owp + VL_WQ_WORDS_E, VL_WORDS_I(obits) - VL_WQ_WORDS_E);
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WQW(int obits, int lbits, int rbits, WDataOutP owp, QData ld,
WDataInP const rwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
const int rwords = VL_WORDS_I(rbits);
VL_MEMCPY_W(owp, rwp, rwords);
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
_vl_insert_WQ(owp, ld, rbits + lbits - 1, rbits);
return owp;
}
static inline WDataOutP VL_CONCAT_WWW(int obits, int lbits, int rbits, WDataOutP owp,
WDataInP const lwp, WDataInP const rwp) VL_MT_SAFE {
for (int i = 0; i < VL_WORDS_I(rbits); ++i) owp[i] = rwp[i];
for (int i = VL_WORDS_I(rbits); i < VL_WORDS_I(obits); ++i) owp[i] = 0;
const int rwords = VL_WORDS_I(rbits);
VL_MEMCPY_W(owp, rwp, rwords);
VL_MEMSET_ZERO_W(owp + rwords, VL_WORDS_I(obits) - rwords);
_vl_insert_WW(owp, lwp, rbits + lbits - 1, rbits);
return owp;
}
Expand Down Expand Up @@ -1829,10 +1853,13 @@ static inline WDataOutP VL_SHIFTRS_WWW(int obits, int lbits, int rbits, WDataOut
EData overshift = 0; // Huge shift 1>>32 or more
for (int i = 1; i < VL_WORDS_I(rbits); ++i) overshift |= rwp[i];
if (VL_UNLIKELY(overshift || rwp[0] >= static_cast<IData>(obits))) {
const int lmsw = VL_WORDS_I(obits) - 1;
const EData sign = VL_SIGNONES_E(lbits, lwp[lmsw]);
for (int j = 0; j <= lmsw; ++j) owp[j] = sign;
owp[lmsw] &= VL_MASK_E(lbits);
const int owords = VL_WORDS_I(obits);
if (VL_SIGN_E(lbits, lwp[owords - 1])) {
VL_MEMSET_ONES_W(owp, owords);
owp[owords - 1] &= VL_MASK_E(lbits);
} else {
VL_MEMSET_ZERO_W(owp, owords);
}
return owp;
}
return VL_SHIFTRS_WWI(obits, lbits, 32, owp, lwp, rwp[0]);
Expand Down Expand Up @@ -2043,9 +2070,7 @@ static inline void VL_ASSIGNSEL_WW(int rbits, int obits, int lsb, WDataOutP iowp

static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataInP const w1p,
WDataInP const w2p) VL_MT_SAFE {
const int words = VL_WORDS_I(obits);
for (int i = 0; i < words; ++i) owp[i] = cond ? w1p[i] : w2p[i];
return owp;
return VL_MEMCPY_W(owp, cond ? w1p : w2p, VL_WORDS_I(obits));
}

//======================================================================
Expand All @@ -2058,7 +2083,7 @@ static inline WDataOutP VL_COND_WIWW(int obits, WDataOutP owp, int cond, WDataIn
// If changing the number of functions here, also change EMITCINLINES_NUM_CONSTW

#define VL_C_END_(obits, wordsSet) \
for (int i = (wordsSet); i < VL_WORDS_I(obits); ++i) o[i] = 0; \
VL_MEMSET_ZERO_W(o + (wordsSet), VL_WORDS_I(obits) - (wordsSet)); \
return o

// clang-format off
Expand Down

0 comments on commit 7de0206

Please sign in to comment.