Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up Data.ByteString.Short.unpack #526

Merged
merged 5 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should probably stop using -fexpose-all-unfolding. Originally I thought that this flag would have the same effect as marking every definition as INLINEABLE, but it actually exposes the optimized unfoldings which may not interact with RULEs etc. like the unoptimized unfoldings would. The GHC issues https://gitlab.haskell.org/ghc/ghc/-/issues/22202 and https://gitlab.haskell.org/ghc/ghc/-/issues/22203 are relevant in this context.

This doesn't need to happen in this PR though.

-- Not all architectures are forgiving of unaligned accesses; whitelist ones
-- which are known not to trap (either to the kernel for emulation, or crash).
#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
Expand Down Expand Up @@ -267,7 +266,6 @@ import Prelude

import qualified Data.ByteString.Internal as BS

import qualified Data.Foldable as Foldable
import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
Expand Down Expand Up @@ -338,15 +336,15 @@ instance Read ShortByteString where
instance GHC.Exts.IsList ShortByteString where
type Item ShortByteString = Word8
fromList = packBytes
toList = unpackBytes
toList = unpack

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpackBytes txt
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"
Expand Down Expand Up @@ -543,7 +541,20 @@ pack = packBytes

-- | /O(n)/. Convert a 'ShortByteString' into a list.
unpack :: ShortByteString -> [Word8]
unpack = unpackBytes
unpack sbs = GHC.Exts.build (unpackFoldr sbs)
{-# INLINE unpack #-}

--
-- Have unpack fuse with good list consumers
--
unpackFoldr :: ShortByteString -> (Word8 -> a -> a) -> a -> a
unpackFoldr sbs k z = foldr k z sbs
{-# INLINE [0] unpackFoldr #-}

{-# RULES
"ShortByteString unpack-list" [1] forall bs .
unpackFoldr bs (:) [] = unpackBytes bs
#-}

packChars :: [Char] -> ShortByteString
packChars = \cs -> packLenBytes (List.length cs) (List.map BS.c2w cs)
Expand Down Expand Up @@ -578,6 +589,7 @@ unpackChars sbs = unpackAppendCharsLazy sbs []
unpackBytes :: ShortByteString -> [Word8]
unpackBytes sbs = unpackAppendBytesLazy sbs []


-- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
hasufell marked this conversation as resolved.
Show resolved Hide resolved
-- takes just shy of 4k which seems like a reasonable amount.
-- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
Expand Down Expand Up @@ -610,15 +622,15 @@ unpackAppendBytesLazy sbs = go 0 (length sbs)
unpackAppendCharsStrict :: ShortByteString -> Int -> Int -> [Char] -> [Char]
unpackAppendCharsStrict !sbs off len = go (off-1) (off-1 + len)
where
go !sentinal !i !acc
go !sentinal !i acc
| i == sentinal = acc
| otherwise = let !c = indexCharArray (asBA sbs) i
in go sentinal (i-1) (c:acc)

unpackAppendBytesStrict :: ShortByteString -> Int -> Int -> [Word8] -> [Word8]
unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
where
go !sentinal !i !acc
go !sentinal !i acc
| i == sentinal = acc
| otherwise = let !w = indexWord8Array (asBA sbs) i
in go sentinal (i-1) (w:acc)
Expand Down Expand Up @@ -910,13 +922,27 @@ foldl' f v = List.foldl' f v . unpack
--
-- @since 0.11.3.0
foldr :: (Word8 -> a -> a) -> a -> ShortByteString -> a
foldr f v = List.foldr f v . unpack
foldr k v = \sbs ->
let l = length sbs
ba = asBA sbs
w = indexWord8Array ba
go !n | n >= l = v
| otherwise = k (w n) (go (n + 1))
in go 0
{-# INLINE foldr #-}

-- | 'foldr'' is like 'foldr', but strict in the accumulator.
--
-- @since 0.11.3.0
foldr' :: (Word8 -> a -> a) -> a -> ShortByteString -> a
foldr' k v = Foldable.foldr' k v . unpack
foldr' k v = \sbs ->
let l = length sbs
ba = asBA sbs
w = indexWord8Array ba
go !ix !v' | ix < 0 = v'
| otherwise = go (ix - 1) (k (w ix) v')
in go (l - 1) v
{-# INLINE foldr' #-}

-- | 'foldl1' is a variant of 'foldl' that has no starting value
-- argument, and thus must be applied to non-empty 'ShortByteString's.
Expand Down Expand Up @@ -1103,6 +1129,7 @@ breakEnd p = \sbs -> splitAt (findFromEndUntil p sbs) sbs
-- @since 0.11.3.0
break :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
break = \p -> \sbs -> case findIndexOrLength p sbs of n -> (take n sbs, drop n sbs)
{-# INLINE break #-}
hasufell marked this conversation as resolved.
Show resolved Hide resolved

-- | Similar to 'Prelude.span',
-- returns the longest (possibly empty) prefix of elements
Expand Down Expand Up @@ -1296,6 +1323,7 @@ unfoldrN i f = \x0 ->
Just (w, x'') -> do
writeWord8Array mba n' w
go' x'' (n'+1)
{-# INLINE unfoldrN #-}



Expand Down Expand Up @@ -1455,6 +1483,7 @@ filter k = \sbs -> let l = length sbs
go' (br+1) (bw+1)
else
go' (br+1) bw
{-# INLINE filter #-}

-- | /O(n)/ The 'find' function takes a predicate and a ShortByteString,
-- and returns the first element in matching the predicate, or 'Nothing'
Expand All @@ -1467,6 +1496,7 @@ find :: (Word8 -> Bool) -> ShortByteString -> Maybe Word8
find f = \sbs -> case findIndex f sbs of
Just n -> Just (sbs `index` n)
_ -> Nothing
{-# INLINE find #-}

-- | /O(n)/ The 'partition' function takes a predicate a ShortByteString and returns
-- the pair of ShortByteStrings with elements which do and do not satisfy the
Expand Down Expand Up @@ -1548,6 +1578,7 @@ findIndex k = \sbs ->
| k (w n) = Just n
| otherwise = go (n + 1)
in go 0
{-# INLINE findIndex #-}


-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the
Expand Down
19 changes: 17 additions & 2 deletions bench/BenchShort.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

module BenchShort (benchShort) where

import Control.DeepSeq (force)
import Data.Foldable (foldMap)
import Data.Maybe (listToMaybe)
import Data.Monoid
Expand Down Expand Up @@ -107,6 +108,9 @@ w = fromIntegral
hashWord8 :: Word8 -> Word8
hashWord8 = fromIntegral . hashInt . fromIntegral

foldInputs' :: [[Word8]]
foldInputs' = force (S.unpack <$> foldInputs)

foldInputs :: [S.ShortByteString]
foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]

Expand Down Expand Up @@ -188,8 +192,12 @@ benchShort = bgroup "ShortByteString"
]
, bgroup "folds"
[ bgroup "strict"
[ bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
[ bgroup "foldl" $ map (\s -> bench (show $ S.length s) $
nf (S.foldl (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
, bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
, bgroup "foldr" $ map (\s -> bench (show $ S.length s) $
nf (S.foldr (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
, bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $
nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
, bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $
Expand Down Expand Up @@ -231,5 +239,12 @@ benchShort = bgroup "ShortByteString"
, bench "FindIndex/inlined" $ nf (S.findIndex (== nl)) absurdlong
, bench "FindIndex/non-inlined" $ nf (S.findIndex (nilEq nl)) absurdlong
]
, bgroup "ShortByteString conversions" $
[ bgroup "unpack" $ map (\s -> bench (show $ S.length s) $
nf (\x -> S.unpack x) s) foldInputs
, bgroup "pack" $ map (\s -> bench (show $ length s) $
nf S.pack s) foldInputs'
, bench "unpack and get last element" $ nf (\x -> last . S.unpack $ x) absurdlong
, bench "unpack and get first 120 elements" $ nf (\x -> take 120 . S.unpack $ x) absurdlong
]
]