From 76a4ed9b1b658d220ab951f24df18375bbb8dcf8 Mon Sep 17 00:00:00 2001
From: Julian Ospald <hasufell@posteo.de>
Date: Sun, 3 Jul 2022 21:48:57 +0200
Subject: [PATCH] Speed up Data.ByteString.Short.unpack

---
 Data/ByteString/Short/Internal.hs | 60 +++----------------------------
 bench/BenchShort.hs               | 12 +++++++
 2 files changed, 16 insertions(+), 56 deletions(-)

diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs
index cc18e3a31..ff07b5d1f 100644
--- a/Data/ByteString/Short/Internal.hs
+++ b/Data/ByteString/Short/Internal.hs
@@ -561,67 +561,15 @@ packLenBytes len ws0 =
       writeWord8Array mba i w
       go mba (i+1) ws
 
--- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
--- we would like to write a tight loop that just blats the list into memory, on
--- the other hand we want it to be unpacked lazily so we don't end up with a
--- massive list data structure in memory.
---
--- Our strategy is to combine both: we will unpack lazily in reasonable sized
--- chunks, where each chunk is unpacked strictly.
---
--- unpackChars does the lazy loop, while unpackAppendBytes and
--- unpackAppendChars do the chunks strictly.
 
 unpackChars :: ShortByteString -> [Char]
-unpackChars sbs = unpackAppendCharsLazy sbs []
+unpackChars sbs = let ix = length sbs - 1
+                  in List.map (indexCharArray (asBA sbs)) [0..ix]
 
 unpackBytes :: ShortByteString -> [Word8]
-unpackBytes sbs = unpackAppendBytesLazy sbs []
+unpackBytes sbs = let ix = length sbs - 1
+                  in List.map (unsafeIndex sbs) [0..ix]
 
--- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
--- takes just shy of 4k which seems like a reasonable amount.
--- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
-
-unpackAppendCharsLazy :: ShortByteString -> [Char] -> [Char]
-unpackAppendCharsLazy sbs = go 0 (length sbs)
-  where
-    sz = 100
-
-    go off len cs
-      | len <= sz = unpackAppendCharsStrict sbs off len cs
-      | otherwise = unpackAppendCharsStrict sbs off sz  remainder
-                      where remainder = go (off+sz) (len-sz) cs
-
-unpackAppendBytesLazy :: ShortByteString -> [Word8] -> [Word8]
-unpackAppendBytesLazy sbs = go 0 (length sbs)
-  where
-    sz = 100
-
-    go off len ws
-      | len <= sz = unpackAppendBytesStrict sbs off len ws
-      | otherwise = unpackAppendBytesStrict sbs off sz  remainder
-                      where remainder = go (off+sz) (len-sz) ws
-
--- For these unpack functions, since we're unpacking the whole list strictly we
--- build up the result list in an accumulator. This means we have to build up
--- the list starting at the end. So our traversal starts at the end of the
--- buffer and loops down until we hit the sentinal:
-
-unpackAppendCharsStrict :: ShortByteString -> Int -> Int -> [Char] -> [Char]
-unpackAppendCharsStrict !sbs off len = go (off-1) (off-1 + len)
-  where
-    go !sentinal !i !acc
-      | i == sentinal = acc
-      | otherwise     = let !c = indexCharArray (asBA sbs) i
-                        in go sentinal (i-1) (c:acc)
-
-unpackAppendBytesStrict :: ShortByteString -> Int -> Int -> [Word8] -> [Word8]
-unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
-  where
-    go !sentinal !i !acc
-      | i == sentinal = acc
-      | otherwise     = let !w = indexWord8Array (asBA sbs) i
-                         in go sentinal (i-1) (w:acc)
 
 
 ------------------------------------------------------------------------
diff --git a/bench/BenchShort.hs b/bench/BenchShort.hs
index f6c37662e..88892cfdf 100644
--- a/bench/BenchShort.hs
+++ b/bench/BenchShort.hs
@@ -231,5 +231,17 @@ benchShort = bgroup "ShortByteString"
         , bench "FindIndex/inlined"       $ nf (S.findIndex      (== nl)) absurdlong
         , bench "FindIndex/non-inlined"   $ nf (S.findIndex   (nilEq nl)) absurdlong
         ]
+    , bgroup "ShortByteString unpack" $
+        [ bench "unpack and look at first 100 elements" $ nf (unpackX) absurdlong
+        , bench "unpackLast"                            $ nf (unpackLast) absurdlong
+        ]
     ]
 
+
+unpackX :: ShortByteString -> Bool
+unpackX sbs = case S.unpack sbs of
+                (_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_:_) -> True
+                _                                                                                                       -> False
+
+unpackLast :: ShortByteString -> Word8
+unpackLast sbs = Prelude.last $ S.unpack sbs