Skip to content

Commit

Permalink
Use SSE2 in the x86_64 C version of encodeUtf8
Browse files Browse the repository at this point in the history
  • Loading branch information
ethercrow committed Mar 28, 2021
1 parent aab96e1 commit 0625ea0
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 22 deletions.
3 changes: 2 additions & 1 deletion benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ main = do
, env (DecodeUtf8.initEnv (tf "russian.txt")) (DecodeUtf8.benchmark "russian")
, env (DecodeUtf8.initEnv (tf "japanese.txt")) (DecodeUtf8.benchmark "japanese")
, env (DecodeUtf8.initEnv (tf "ascii.txt")) (DecodeUtf8.benchmarkASCII)
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
, EncodeUtf8.benchmark "non-ASCII" "επανάληψη 竺法蘭共譯"
, EncodeUtf8.benchmark "ASCII" "lorem ipsum"
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
, FileRead.benchmark (tf "russian.txt")
, FoldLines.benchmark (tf "russian.txt")
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/haskell/Benchmarks/EncodeUtf8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> Benchmark
benchmark string =
benchmark :: String -> String -> Benchmark
benchmark name string =
bgroup "EncodeUtf8"
[ bench "Text" $ whnf (B.length . T.encodeUtf8) text
, bench "LazyText" $ whnf (BL.length . TL.encodeUtf8) lazyText
[ bench ("Text (" ++ name ++ ")") $ whnf (B.length . T.encodeUtf8) text
, bench ("LazyText (" ++ name ++ ")") $ whnf (BL.length . TL.encodeUtf8) lazyText
]
where
-- The string in different formats
Expand Down
41 changes: 24 additions & 17 deletions cbits/cbits.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,29 +276,36 @@ _hs_text_encode_utf8(uint8_t **destp, const uint16_t *src, size_t srcoff,

ascii:
#if defined(__x86_64__)
while (srcend - src >= 4) {
uint64_t w = *((uint64_t *) src);
while (srcend - src >= 8) {
union { uint64_t halves[2]; __m128i whole; } eight_chars;
eight_chars.whole = _mm_loadu_si128((__m128i *) src);

const uint64_t w = eight_chars.halves[0];
if (w & 0xFF80FF80FF80FF80ULL) {
if (!(w & 0x000000000000FF80ULL)) {
*dest++ = w & 0xFFFF;
src++;
if (!(w & 0x00000000FF800000ULL)) {
*dest++ = (w >> 16) & 0xFFFF;
src++;
if (!(w & 0x0000FF8000000000ULL)) {
*dest++ = (w >> 32) & 0xFFFF;
src++;
}
}
*dest++ = w & 0xFFFF;
src++;
if (!(w & 0x00000000FF800000ULL)) {
*dest++ = (w >> 16) & 0xFFFF;
src++;
if (!(w & 0x0000FF8000000000ULL)) {
*dest++ = (w >> 32) & 0xFFFF;
src++;
}
}
}
break;
}
*dest++ = w & 0xFFFF;
*dest++ = (w >> 16) & 0xFFFF;
*dest++ = (w >> 32) & 0xFFFF;
*dest++ = w >> 48;
src += 4;

if (eight_chars.halves[1] & 0xFF80FF80FF80FF80ULL) {
break;
}

const __m128i eight_ascii_chars = _mm_packus_epi16(eight_chars.whole, eight_chars.whole);
_mm_storel_epi64((__m128i *)dest, eight_ascii_chars);

dest += 8;
src += 8;
}
#endif

Expand Down

0 comments on commit 0625ea0

Please sign in to comment.