From 0fb0a04d71cb353b9af2537c5294b3d895763464 Mon Sep 17 00:00:00 2001 From: Paul Date: Sat, 29 Jul 2023 15:34:22 -0400 Subject: [PATCH] Handle __restrict errors by using or providing in place operations (#14) Addresses https://github.com/surge-synthesizer/surge/issues/7081 --- include/sst/effects/Bonsai.h | 172 +++++++++++++++++++++++++--------- include/sst/effects/Delay.h | 6 +- include/sst/effects/Reverb1.h | 2 +- 3 files changed, 131 insertions(+), 49 deletions(-) diff --git a/include/sst/effects/Bonsai.h b/include/sst/effects/Bonsai.h index f4a53a3..7694ee7 100644 --- a/include/sst/effects/Bonsai.h +++ b/include/sst/effects/Bonsai.h @@ -213,6 +213,23 @@ inline void sum2(float *__restrict one, float two, float *__restrict dst) dst[i] = one[i] + two; } } +template +inline void sum2(float *__restrict srcDst, float *__restrict plus) +{ + for (auto i = 0U; i < blockSize; ++i) + { + srcDst[i] = srcDst[i] + plus[i]; + } +} +template +inline void sum2(float *__restrict srcDst, float plus) +{ + for (auto i = 0U; i < blockSize; ++i) + { + srcDst[i] = srcDst[i] + plus; + } +} + template inline void sum3(float *__restrict one, float *__restrict two, float *__restrict three, float *__restrict dst) @@ -239,6 +256,15 @@ inline void minus2(float *__restrict one, float *__restrict two, float *__restri } } +template +inline void minus2(float *__restrict sdst, float *__restrict two) +{ + for (auto i = 0U; i < blockSize; ++i) + { + sdst[i] = sdst[i] - two[i]; + } +} + template inline void mul(float *__restrict src1, float src2, float *__restrict dst) { @@ -256,6 +282,25 @@ inline void mul(float *__restrict src1, float *__restrict src2, float *__restric } } +template +inline void mul(float *__restrict src1, float *__restrict src2) +{ + for (auto i = 0U; i < blockSize; ++i) + { + src1[i] = src1[i] * src2[i]; + } +} + +template +inline void mul(float *__restrict src1, float by) +{ + for (auto i = 0U; i < blockSize; ++i) + { + src1[i] = src1[i] * by; + } +} + + template inline void div(float *__restrict src1, float src2, float *__restrict dst) { @@ -349,7 +394,7 @@ inline float clip_inv_sinh(float invlevel, float level, float src) return logf(abs2x + sqrt(abs2x * abs2x + 1)) * 0.5 * sgn(scaledown) * level; } template -inline void clip_inv_sinh(float invlevel, float level, float *__restrict src, float *__restrict dst) +inline void clip_inv_sinh(float invlevel, float level, float * src, float * dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -359,7 +404,7 @@ inline void clip_inv_sinh(float invlevel, float level, float *__restrict src, fl } } template -inline void clip_inv_sinh(float level, float *__restrict src, float *__restrict dst) +inline void clip_inv_sinh(float level, float * src, float *dst) { const float invlevel = 1 / level; for (auto i = 0U; i < blockSize; ++i) @@ -370,7 +415,7 @@ inline void clip_inv_sinh(float level, float *__restrict src, float *__restrict } } template -inline void clip_inv_sinh(float *__restrict level, float *__restrict src, float *__restrict dst) +inline void clip_inv_sinh(float * level, float * src, float *dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -416,7 +461,7 @@ inline float fasttanh78(float x, float invlevel, float level) return fasttanh78(x * invlevel) * level; } template -inline void clip_tanh78(float invlevel, float level, float *__restrict src, float *__restrict dst) +inline void clip_tanh78(float invlevel, float level, float * src, float *dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -424,7 +469,7 @@ inline void clip_tanh78(float invlevel, float level, float *__restrict src, floa } } template -inline void clip_tanh78(float level, float *__restrict src, float *__restrict dst) +inline void clip_tanh78(float level, float * src, float *dst) { const float invlevel = 1 / level; for (auto i = 0U; i < blockSize; ++i) @@ -433,7 +478,7 @@ inline void clip_tanh78(float level, float *__restrict src, float *__restrict ds } } template -inline void clip_tanh78(float *__restrict level, float *__restrict src, float *__restrict dst) +inline void clip_tanh78(float * level, float * src, float * dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -472,8 +517,8 @@ inline void clip_tanh_foldback(float level, float *__restrict src, float *__rest } } template -inline void clip_tanh_foldback(float *__restrict level, float *__restrict src, - float *__restrict dst) +inline void clip_tanh_foldback(float * level, float * src, + float * dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -511,7 +556,7 @@ inline void clip_sine_tanh(float level, float *__restrict src, float *__restrict } } template -inline void clip_sine_tanh(float *__restrict level, float *__restrict src, float *__restrict dst) +inline void clip_sine_tanh(float * level, float * src, float * dst) { for (auto i = 0U; i < blockSize; ++i) { @@ -638,6 +683,15 @@ inline void max(float *__restrict src, float value, float *__restrict dst) } } +template +inline void max(float *__restrict srcdst, float value) +{ + for (auto i = 0U; i < blockSize; ++i) + { + srcdst[i] = fmax(srcdst[i], value); + } +} + template inline void lerp(float *__restrict src1, float *__restrict src2, float mix, float *__restrict dst) { @@ -655,6 +709,16 @@ inline void lerp(float *__restrict src1, float *__restrict src2, float *__restri dst[i] = (src2[i] - src1[i]) * mix[i] + src1[i]; } } +// inplace version +template +inline void lerp(float *__restrict srcDest, float *__restrict src2, float *__restrict mix) +{ + for (auto i = 0U; i < blockSize; ++i) + { + srcDest[i] = (src2[i] - srcDest[i]) * mix[i] + srcDest[i]; + } +} + // TODO - move this to basic blocks as abs template inline void blockabs(float *__restrict src, float *__restrict dst) @@ -665,6 +729,14 @@ template inline void blockabs(float *__restrict src, float *_ } } +template inline void blockabs(float * src) +{ + for (auto i = 0U; i < blockSize; ++i) + { + src[i] = fabs(src[i]); + } +} + inline int32_t super_simple_noise(int32_t last) { return last * 1103515245 + 12345; } template inline void noise(float &last, float *__restrict dst) { @@ -1044,17 +1116,23 @@ inline void shelf_gain(float &last, float coef, float *__restrict pre, float bufA alignas(16)[blockSize] = {}; float bufB alignas(16)[blockSize] = {}; mul(pre, postdistgain, bufA); - mul(bufA, 6.f, bufA); - blockabs(bufA, bufA); + mul(bufA, 6.f); + blockabs(bufA); mul(post, 6.f, bufB); - blockabs(bufB, bufB); - minus2(bufB, bufA, bufB); + blockabs(bufB); + minus2(bufB, bufA); onepole_lp(last, coef, bufB, bufA); - blockabs(bufA, bufA); - sum2(bufA, 1.f, bufA); - div(1.f, bufA, bufA); - mul(bufA, bufA, dstsq); - mul(dstsq, bufA, dstcb); + blockabs(bufA); + sum2(bufA, 1.f); + // div(1.f, bufA, bufA); + //mul(bufA, bufA, dstsq); + //mul(dstsq, bufA, dstcb); + for (int i=0; i::tape_sat(float last[], int lastmin, float sat, int tilt1_post(last[lastmin + 16], last[lastmin + 17], bufA, bufB); // L } // tilt1_post(last[lastmin + 12], last[lastmin + 13], bufA, bufB); - mul(bufB, sat_halfsq_db, bufB); + mul(bufB, sat_halfsq_db); clip_inv_sinh(10, 0.1, bufB, bufB); // 1/0.15 shelf_gain(last[lastmin + 20], this->coef20, srcScaledL, sat_halfsq_db, bufB, shelfGain2, shelfGain3); @@ -1159,7 +1237,7 @@ inline void Bonsai::tape_sat(float last[], int lastmin, float sat, int high_shelf(last[lastmin + 22], this->coef8000, shelfGain3, bufA, dstL); // tilt1_post(last[lastmin + 16], last[lastmin + 17], bufC, bufB); - mul(bufC, sat_halfsq_db, bufC); + mul(bufC, sat_halfsq_db); clip_inv_sinh(10, 0.1, bufC, bufC); // 1/0.15 shelf_gain(last[lastmin + 23], this->coef20, srcScaledR, sat_halfsq_db, bufC, shelfGain2, shelfGain3); @@ -1210,11 +1288,11 @@ inline void Bonsai::bass_boost(float last[], int lastmin, float boost, { case 1: sum2(srcL, srcR, bufC); - mul(bufC, 0.5, bufC); + mul(bufC, 0.5); break; case 0: default: - sum2(srcL, bufC, bufC); + sum2(bufC, srcL); break; } onepole_hp(last[lastmin + 7], this->coef20, bufC, bufA); @@ -1229,20 +1307,20 @@ inline void Bonsai::bass_boost(float last[], int lastmin, float boost, mul(reused, lerp2_block, bufA); clampbi(0.01, reused, bufA); onepole_hp(last[lastmin + 13], this->coef200, bufA, branch3); - mul(branch3, lerp3_block, branch3); + mul(branch3, lerp3_block); mul(reused, lerp4_block, bufB); clip_tanh78(lerp5_block, bufB, bufB); onepole_lp(last[lastmin + 14], this->coef200, bufB, bufA); - mul(bufA, 2.f, bufA); + mul(bufA, 2.f); sum2(branch3, bufA, bufB); onepole_hp(last[lastmin + 15], this->coef30, bufB, bufA); sum3(branch1, branch2, bufA, bufB); - mul(bufB, 0.16666666666666666666666, bufB); + mul(bufB, 0.16666666666666666666666); // change the above constant to adjust the default gain, so the // slider is negative less often previous value: // 0.3333333333333333333333333 onepole_lp(last[lastmin + 16], this->coef500, bufB, bufA); - mul(bufA, boost_block, bufA); + mul(bufA, boost_block); clip_inv_sinh(lerp6_block, bufA, bufA); // mul(bufA, rerange01(dist01, 1.25, // 0.75), bufA); @@ -1251,12 +1329,12 @@ inline void Bonsai::bass_boost(float last[], int lastmin, float boost, switch (this->deformType(b_bass_boost)) { case 1: - sum2(srcR, dstL, dstR); - sum2(srcL, dstL, dstL); + sum2(dstR, srcR); + sum2(dstL, srcL); break; case 0: default: - sum2(srcL, dstL, dstL); + sum2(dstL, srcL); onepole_hp(last[lastmin + 18], this->coef20, srcR, bufA); onepole_lp(last[lastmin + 19], this->coef50, bufA, bufB); @@ -1270,25 +1348,25 @@ inline void Bonsai::bass_boost(float last[], int lastmin, float boost, mul(reused, lerp2_block, bufA); clampbi(0.01, reused, bufA); onepole_hp(last[lastmin + 24], this->coef200, bufA, branch3); - mul(branch3, lerp3_block, branch3); + mul(branch3, lerp3_block); mul(reused, lerp4_block, bufB); clip_tanh78(lerp5_block, bufB, bufB); onepole_lp(last[lastmin + 25], this->coef200, bufB, bufA); - mul(bufA, 2.f, bufA); + mul(bufA, 2.f); sum2(branch3, bufA, bufB); onepole_hp(last[lastmin + 26], this->coef30, bufB, bufA); sum3(branch1, branch2, bufA, bufB); - mul(bufB, 0.16666666666666666666666, bufB); + mul(bufB, 0.16666666666666666666666); // change the above constant to adjust the default gain, so the // slider is negative less often previous value: // 0.3333333333333333333333333 onepole_lp(last[lastmin + 27], this->coef500, bufB, bufA); - mul(bufA, boost_block, bufA); + mul(bufA, boost_block); clip_inv_sinh(lerp6_block, bufA, bufA); // mul(bufA, rerange01(dist01, 1.25, // 0.75), bufA); onepole_lp(last[lastmin + 28], this->coef500, bufA, dstR); - sum2(srcR, dstR, dstR); + sum2(dstR, srcR); break; } } @@ -1317,10 +1395,14 @@ Bonsai::noise_channel(float last[], int lastmin, float *__restrict sen minus2(bufA, bufC, bufB); mul(bufB, sr_scaled, bufC); blockabs(bufC, bufB); - minus2(bufB, threshold, bufB); - max(bufB, 0.f, bufB); - mul(bufB, bufB, bufB); - mul(bufB, sens_lp_scale, bufB); + minus2(bufB, threshold); + max(bufB, 0.f); + // mul(bufB, bufB, bufB); + for (int i=0; i(bufB, sens_lp_scale); onepole_lp(last[lastmin + 6], sens_lp_coef, bufB, bufA); mul(bufA, noise_filt, bufB); onepole_hp(last[lastmin + 7], this->coef500, bufB, dst); @@ -1339,7 +1421,7 @@ inline void Bonsai::tape_noise(float last[], int lastmin, const float noise(last[lastmin + 0], bufA); noise(last[lastmin + 1], bufB); - mul(bufB, 0.5, bufB); + mul(bufB, 0.5); sum2(bufA, bufB, noiseL); minus2(bufA, bufB, noiseR); @@ -1359,14 +1441,14 @@ inline void Bonsai::tape_noise(float last[], int lastmin, const float noise_channel(last, lastmin + 6, sens_lp_scale, sens_lp_coef, threshold, sr_scaled, srcL, noiseL, bufA); - mul(bufA, gain_adj, bufA); + mul(bufA, gain_adj); clip_tanh78(10, 0.1, bufA, bufA); onepole_lp(last[lastmin + 14], this->coef2000, bufA, bufB); sum2(srcL, bufB, dstL); noise_channel(last, lastmin + 15, sens_lp_scale, sens_lp_coef, threshold, sr_scaled, srcR, noiseR, bufB); - mul(bufB, gain_adj, bufB); + mul(bufB, gain_adj); clip_tanh78(10, 0.1, bufB, bufB); onepole_lp(last[lastmin + 23], this->coef2000, bufB, bufA); sum2(srcR, bufA, dstR); @@ -1468,10 +1550,10 @@ inline void Bonsai::processBlock(float *__restrict dataL, float *__res age(last, 84, this->floatValue(b_dull), noiseL, noiseR, agedL, agedR); onepole_hp(last[100], coef10, agedL, outL); onepole_hp(last[101], coef10, agedR, outR); - mul(outL, gainOut, outL); - mul(outR, gainOut, outR); - lerp(dataL, outL, mixVal, dataL); - lerp(dataR, outR, mixVal, dataR); + mul(outL, gainOut); + mul(outR, gainOut); + lerp(dataL, outL, mixVal); + lerp(dataR, outR, mixVal); } } // namespace sst::effects::bonsai diff --git a/include/sst/effects/Delay.h b/include/sst/effects/Delay.h index 1b09a11..8dd051f 100644 --- a/include/sst/effects/Delay.h +++ b/include/sst/effects/Delay.h @@ -355,8 +355,8 @@ template inline void Delay::processBlock(float *da // negative feedback if (FBsign) { - mech::mul_block(tbufferL, -1.f, tbufferL); - mech::mul_block(tbufferR, -1.f, tbufferR); + mech::mul_block(tbufferL, -1.f); + mech::mul_block(tbufferR, -1.f); } // feedback path clipping modes @@ -425,7 +425,7 @@ template inline void Delay::processBlock(float *da // scale width this->applyWidth(tbufferL, tbufferR, width); - mix.fade_2_blocks_to(dataL, tbufferL, dataR, tbufferR, dataL, dataR, this->blockSize_quad); + mix.fade_2_blocks_inplace(dataL, tbufferL, dataR, tbufferR, this->blockSize_quad); wpos += FXConfig::blockSize; wpos = wpos & (max_delay_length - 1); diff --git a/include/sst/effects/Reverb1.h b/include/sst/effects/Reverb1.h index cb7a7a4..f7c0ae3 100644 --- a/include/sst/effects/Reverb1.h +++ b/include/sst/effects/Reverb1.h @@ -309,7 +309,7 @@ inline void Reverb1::processBlock(float *__restrict dataL, float *__re // scale width this->applyWidth(wetL, wetR, width); - mix.fade_2_blocks_to(dataL, wetL, dataR, wetR, dataL, dataR, this->blockSize_quad); + mix.fade_2_blocks_inplace(dataL, wetL, dataR, wetR, this->blockSize_quad); } template inline void Reverb1::loadpreset(int id)