Skip to content

Commit

Permalink
simplified fill algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
itzpr3d4t0r committed Apr 5, 2024
1 parent 5cda34f commit 43161f7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 26 deletions.
10 changes: 3 additions & 7 deletions src_c/simd_surface_fill_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,20 @@ _pg_has_avx2()
}

/* Setup for RUN_16BIT_SHUFFLE_OUT */
#define SETUP_SHUFFLE \
__m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB, mm256_zero; \
mm256_zero = _mm256_setzero_si256(); \
mm256_colorA = _mm256_unpacklo_epi8(mm256_color, mm256_zero); \
mm256_colorB = _mm256_unpackhi_epi8(mm256_color, mm256_zero);
#define SETUP_SHUFFLE \
__m256i shuff_dst, _shuff16_temp, mm256_zero = _mm256_setzero_si256(); \
mm256_color = _mm256_unpacklo_epi8(mm256_color, mm256_zero);

#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
/* ==== shuffle pixels out into two registers each, src */ \
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
shuff_dst = _mm256_unpacklo_epi8(mm256_dst, mm256_zero); \
mm256_color = mm256_colorA; \
\
{FILL_CODE} \
\
_shuff16_temp = shuff_dst; \
\
shuff_dst = _mm256_unpackhi_epi8(mm256_dst, mm256_zero); \
mm256_color = mm256_colorB; \
\
{FILL_CODE} \
\
Expand Down
35 changes: 16 additions & 19 deletions src_c/simd_surface_fill_sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,26 +72,23 @@ _pg_HasSSE_NEON()

/* Setup for RUN_16BIT_SHUFFLE_OUT */
#define SETUP_SHUFFLE \
__m128i shuff_dst, _shuff16_temp, mm128_colorA, mm128_colorB; \
mm128_colorA = _mm_unpacklo_epi8(mm128_color, _mm_setzero_si128()); \
mm128_colorB = _mm_unpackhi_epi8(mm128_color, _mm_setzero_si128());
__m128i shuff_dst, _shuff16_temp, mm128_zero = _mm_setzero_si128(); \
mm128_color = _mm_unpacklo_epi8(mm128_color, mm128_zero);

#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
/* ==== shuffle pixels out into two registers each, src */ \
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
shuff_dst = _mm_unpacklo_epi8(mm128_dst, _mm_setzero_si128()); \
mm128_color = mm128_colorA; \
\
{FILL_CODE} \
\
_shuff16_temp = shuff_dst; \
\
shuff_dst = _mm_unpackhi_epi8(mm128_dst, _mm_setzero_si128()); \
mm128_color = mm128_colorB; \
\
{FILL_CODE} \
\
/* ==== recombine A and B pixels ==== */ \
#define RUN_16BIT_SHUFFLE_OUT(FILL_CODE) \
/* ==== shuffle pixels out into two registers each, src */ \
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
shuff_dst = _mm_unpacklo_epi8(mm128_dst, mm128_zero); \
\
{FILL_CODE} \
\
_shuff16_temp = shuff_dst; \
\
shuff_dst = _mm_unpackhi_epi8(mm128_dst, mm128_zero); \
\
{FILL_CODE} \
\
/* ==== recombine A and B pixels ==== */ \
mm128_dst = _mm_packus_epi16(_shuff16_temp, shuff_dst);

#define FILLERS(NAME, COLOR_PROCESS_CODE, FILL_CODE) \
Expand Down

0 comments on commit 43161f7

Please sign in to comment.