Skip to content

Commit

Permalink
Fix buffer overflow in xcorr_kernel_sse4_1
Browse files Browse the repository at this point in the history
Before, an overflow can occur in the last loop if `len` is not a
multiple of 4 as OP_CVTEPI16_EPI32_M64 tries to load 64 bits, but there
are insufficient bits allocated in `x`.
  • Loading branch information
felicialim committed Dec 15, 2021
1 parent a8e6a77 commit ec64b3c
Showing 1 changed file with 45 additions and 3 deletions.
48 changes: 45 additions & 3 deletions celt/x86/pitch_sse4_1.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32
__m128i sum0, sum1, sum2, sum3, vecSum;
__m128i initSum;

#ifdef OPUS_CHECK_ASM
opus_val32 sum_c[4]={0,0,0,0};
xcorr_kernel_c(x, y, sum_c, len);
#endif

celt_assert(len >= 3);

sum0 = _mm_setzero_si128();
Expand Down Expand Up @@ -177,19 +182,56 @@ void xcorr_kernel_sse4_1(const opus_val16 * x, const opus_val16 * y, opus_val32
vecSum = _mm_add_epi32(vecSum, sum2);
}

for (;j<len;j++)
vecX = OP_CVTEPI16_EPI32_M64(&x[len - 4]);
if (len - j == 3)
{
vecX = OP_CVTEPI16_EPI32_M64(&x[j + 0]);
vecX0 = _mm_shuffle_epi32(vecX, 0x00);
vecX0 = _mm_shuffle_epi32(vecX, 0x55);
vecX1 = _mm_shuffle_epi32(vecX, 0xaa);
vecX2 = _mm_shuffle_epi32(vecX, 0xff);

vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);
vecY2 = OP_CVTEPI16_EPI32_M64(&y[j + 2]);

sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);
sum2 = _mm_mullo_epi32(vecX2, vecY2);

vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum1);
vecSum = _mm_add_epi32(vecSum, sum2);
}
else if (len - j == 2)
{
vecX0 = _mm_shuffle_epi32(vecX, 0xaa);
vecX1 = _mm_shuffle_epi32(vecX, 0xff);

vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);
vecY1 = OP_CVTEPI16_EPI32_M64(&y[j + 1]);

sum0 = _mm_mullo_epi32(vecX0, vecY0);
sum1 = _mm_mullo_epi32(vecX1, vecY1);

vecSum = _mm_add_epi32(vecSum, sum0);
vecSum = _mm_add_epi32(vecSum, sum1);
}
else if (len - j == 1)
{
vecX0 = _mm_shuffle_epi32(vecX, 0xff);

vecY0 = OP_CVTEPI16_EPI32_M64(&y[j + 0]);

sum0 = _mm_mullo_epi32(vecX0, vecY0);

vecSum = _mm_add_epi32(vecSum, sum0);
}

initSum = _mm_loadu_si128((__m128i *)(&sum[0]));
initSum = _mm_add_epi32(initSum, vecSum);
_mm_storeu_si128((__m128i *)sum, initSum);

#ifdef OPUS_CHECK_ASM
celt_assert(!memcmp(sum_c, sum, sizeof(sum_c)));
#endif
}
#endif

0 comments on commit ec64b3c

Please sign in to comment.