From 76a4ed9b1b658d220ab951f24df18375bbb8dcf8 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Sun, 3 Jul 2022 21:48:57 +0200 Subject: [PATCH] Speed up Data.ByteString.Short.unpack --- Data/ByteString/Short/Internal.hs | 60 +++---------------------------- bench/BenchShort.hs | 12 +++++++ 2 files changed, 16 insertions(+), 56 deletions(-) diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index cc18e3a31..ff07b5d1f 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -561,67 +561,15 @@ packLenBytes len ws0 = writeWord8Array mba i w go mba (i+1) ws --- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand --- we would like to write a tight loop that just blats the list into memory, on --- the other hand we want it to be unpacked lazily so we don't end up with a --- massive list data structure in memory. --- --- Our strategy is to combine both: we will unpack lazily in reasonable sized --- chunks, where each chunk is unpacked strictly. --- --- unpackChars does the lazy loop, while unpackAppendBytes and --- unpackAppendChars do the chunks strictly. unpackChars :: ShortByteString -> [Char] -unpackChars sbs = unpackAppendCharsLazy sbs [] +unpackChars sbs = let ix = length sbs - 1 + in List.map (indexCharArray (asBA sbs)) [0..ix] unpackBytes :: ShortByteString -> [Word8] -unpackBytes sbs = unpackAppendBytesLazy sbs [] +unpackBytes sbs = let ix = length sbs - 1 + in List.map (unsafeIndex sbs) [0..ix] --- Why 100 bytes you ask? Because on a 64bit machine the list we allocate --- takes just shy of 4k which seems like a reasonable amount. --- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes) - -unpackAppendCharsLazy :: ShortByteString -> [Char] -> [Char] -unpackAppendCharsLazy sbs = go 0 (length sbs) - where - sz = 100 - - go off len cs - | len <= sz = unpackAppendCharsStrict sbs off len cs - | otherwise = unpackAppendCharsStrict sbs off sz remainder - where remainder = go (off+sz) (len-sz) cs - -unpackAppendBytesLazy :: ShortByteString -> [Word8] -> [Word8] -unpackAppendBytesLazy sbs = go 0 (length sbs) - where - sz = 100 - - go off len ws - | len <= sz = unpackAppendBytesStrict sbs off len ws - | otherwise = unpackAppendBytesStrict sbs off sz remainder - where remainder = go (off+sz) (len-sz) ws - --- For these unpack functions, since we're unpacking the whole list strictly we --- build up the result list in an accumulator. This means we have to build up --- the list starting at the end. So our traversal starts at the end of the --- buffer and loops down until we hit the sentinal: - -unpackAppendCharsStrict :: ShortByteString -> Int -> Int -> [Char] -> [Char] -unpackAppendCharsStrict !sbs off len = go (off-1) (off-1 + len) - where - go !sentinal !i !acc - | i == sentinal = acc - | otherwise = let !c = indexCharArray (asBA sbs) i - in go sentinal (i-1) (c:acc) - -unpackAppendBytesStrict :: ShortByteString -> Int -> Int -> [Word8] -> [Word8] -unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len) - where - go !sentinal !i !acc - | i == sentinal = acc - | otherwise = let !w = indexWord8Array (asBA sbs) i - in go sentinal (i-1) (w:acc) ------------------------------------------------------------------------ diff --git a/bench/BenchShort.hs b/bench/BenchShort.hs index f6c37662e..88892cfdf 100644 --- a/bench/BenchShort.hs +++ b/bench/BenchShort.hs @@ -231,5 +231,17 @@ benchShort = bgroup "ShortByteString" , bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong , bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong ] + , bgroup "ShortByteString unpack" $ + [ bench "unpack and look at first 100 elements" $ nf (unpackX) absurdlong + , bench "unpackLast" $ nf (unpackLast) absurdlong + ] ] + +unpackX :: ShortByteString -> Bool +unpackX sbs = case S.unpack sbs of + (_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_) -> True + _ -> False + +unpackLast :: ShortByteString -> Word8 +unpackLast sbs = Prelude.last $ S.unpack sbs