diff --git a/Data/ByteString.hs b/Data/ByteString.hs index bcab21170..dfb015d89 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -230,7 +230,7 @@ import Prelude hiding (reverse,head,tail,last,init,null import Data.Bits (finiteBitSize, shiftL, (.|.), (.&.)) -import Data.ByteString.Internal +import Data.ByteString.Internal.Type import Data.ByteString.Lazy.Internal (fromStrict, toStrict) import Data.ByteString.Unsafe @@ -267,7 +267,6 @@ import GHC.IO (unsafePerformIO, unsafeDupablePerformIO) import GHC.Foreign (newCStringLen, peekCStringLen) import GHC.Stack.Types (HasCallStack) import Data.Char (ord) -import Foreign.Marshal.Utils (copyBytes) import GHC.Base (build) import GHC.Word hiding (Word8) @@ -375,16 +374,16 @@ infixl 5 `snoc` -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different -- complexity, as it requires making a copy. cons :: Word8 -> ByteString -> ByteString -cons c (BS x l) = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do - poke p c - memcpy (p `plusPtr` 1) f l +cons c (BS x l) = unsafeCreateFp (l+1) $ \p -> do + pokeFp p c + memcpyFp (p `plusForeignPtr` 1) x l {-# INLINE cons #-} -- | /O(n)/ Append a byte to the end of a 'ByteString' snoc :: ByteString -> Word8 -> ByteString -snoc (BS x l) c = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do - memcpy p f l - poke (p `plusPtr` l) c +snoc (BS x l) c = unsafeCreateFp (l+1) $ \p -> do + memcpyFp p x l + pokeFp (p `plusForeignPtr` l) c {-# INLINE snoc #-} -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty. @@ -459,8 +458,7 @@ append = mappend -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each -- element of @xs@. map :: (Word8 -> Word8) -> ByteString -> ByteString -map f (BS fp len) = unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \srcPtr -> - create len $ \dstPtr -> m srcPtr dstPtr +map f (BS srcPtr len) = unsafeCreateFp len $ \dstPtr -> m srcPtr dstPtr where m !p1 !p2 = map_ 0 where @@ -468,15 +466,17 @@ map f (BS fp len) = unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \srcPtr - map_ !n | n >= len = return () | otherwise = do - x <- peekByteOff p1 n - pokeByteOff p2 n (f x) + x <- peekFpByteOff p1 n + pokeFpByteOff p2 n (f x) map_ (n+1) {-# INLINE map #-} -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order. reverse :: ByteString -> ByteString -reverse (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f -> - c_reverse p f (fromIntegral l) +reverse (BS x l) = unsafeCreateFp l $ \fp -> + unsafeWithForeignPtr fp $ \p -> + unsafeWithForeignPtr x $ \f -> + c_reverse p f (fromIntegral l) -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a -- 'ByteString' and \`intersperses\' that byte between the elements of @@ -485,8 +485,10 @@ reverse (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f -> intersperse :: Word8 -> ByteString -> ByteString intersperse c ps@(BS x l) | length ps < 2 = ps - | otherwise = unsafeCreate (2*l-1) $ \p -> unsafeWithForeignPtr x $ \f -> - c_intersperse p f (fromIntegral l) c + | otherwise = unsafeCreateFp (2*l-1) $ \fp -> + unsafeWithForeignPtr fp $ \p -> + unsafeWithForeignPtr x $ \f -> + c_intersperse p f (fromIntegral l) c -- | The 'transpose' function transposes the rows and columns of its -- 'ByteString' argument. @@ -533,13 +535,13 @@ foldl' f v = \(BS fp len) -> let g ptr = go v ptr where - end = ptr `plusPtr` len + end = ptr `plusForeignPtr` len -- tail recursive; traverses array left to right go !z !p | p == end = return z - | otherwise = do x <- peek p - go (f z x) (p `plusPtr` 1) + | otherwise = do x <- peekFp p + go (f z x) (p `plusForeignPtr` 1) in - accursedUnutterablePerformIO $ unsafeWithForeignPtr fp g + accursedUnutterablePerformIO $ g fp {-# INLINE foldl' #-} -- | 'foldr', applied to a binary operator, a starting value @@ -567,15 +569,15 @@ foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a foldr' k v = \(BS fp len) -> -- see fold inlining let - g ptr = go v (end `plusPtr` len) + g ptr = go v (end `plusForeignPtr` len) where - end = ptr `plusPtr` (-1) + end = ptr `plusForeignPtr` (-1) -- tail recursive; traverses array right to left go !z !p | p == end = return z - | otherwise = do x <- peek p - go (k x z) (p `plusPtr` (-1)) + | otherwise = do x <- peekFp p + go (k x z) (p `plusForeignPtr` (-1)) in - accursedUnutterablePerformIO $ unsafeWithForeignPtr fp g + accursedUnutterablePerformIO $ g fp {-# INLINE foldr' #-} @@ -630,15 +632,15 @@ concatMap f = concat . foldr ((:) . f) [] -- any element of the 'ByteString' satisfies the predicate. any :: (Word8 -> Bool) -> ByteString -> Bool any _ (BS _ 0) = False -any f (BS x len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g +any f (BS x len) = accursedUnutterablePerformIO $ g x where g ptr = go ptr where - end = ptr `plusPtr` len + end = ptr `plusForeignPtr` len go !p | p == end = return False - | otherwise = do c <- peek p + | otherwise = do c <- peekFp p if f c then return True - else go (p `plusPtr` 1) + else go (p `plusForeignPtr` 1) {-# INLINE [1] any #-} {-# RULES @@ -659,15 +661,15 @@ anyByte c (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -- if all elements of the 'ByteString' satisfy the predicate. all :: (Word8 -> Bool) -> ByteString -> Bool all _ (BS _ 0) = True -all f (BS x len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g +all f (BS x len) = accursedUnutterablePerformIO $ g x where g ptr = go ptr where - end = ptr `plusPtr` len + end = ptr `plusForeignPtr` len go !p | p == end = return True -- end of list - | otherwise = do c <- peek p + | otherwise = do c <- peekFp p if f c - then go (p `plusPtr` 1) + then go (p `plusForeignPtr` 1) else return False {-# INLINE [1] all #-} @@ -705,7 +707,7 @@ minimum xs@(BS x l) -- passing an accumulating parameter from left to right, and returning a -- final value of this accumulator together with the new ByteString. mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> do +mapAccumL f acc = \(BS a len) -> unsafeDupablePerformIO $ do -- see fold inlining gp <- mallocByteString len let @@ -714,11 +716,11 @@ mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr mapAccumL_ !s !n | n >= len = return s | otherwise = do - x <- peekByteOff src n + x <- peekFpByteOff src n let (s', y) = f s x - pokeByteOff dst n y + pokeFpByteOff dst n y mapAccumL_ s' (n+1) - acc' <- unsafeWithForeignPtr gp (go a) + acc' <- go a gp return (acc', BS gp len) {-# INLINE mapAccumL #-} @@ -727,7 +729,7 @@ mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr -- passing an accumulating parameter from right to left, and returning a -- final value of this accumulator together with the new ByteString. mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString) -mapAccumR f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> do +mapAccumR f acc = \(BS a len) -> unsafeDupablePerformIO $ do -- see fold inlining gp <- mallocByteString len let @@ -735,11 +737,11 @@ mapAccumR f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr where mapAccumR_ !s (-1) = return s mapAccumR_ !s !n = do - x <- peekByteOff src n + x <- peekFpByteOff src n let (s', y) = f s x - pokeByteOff dst n y + pokeFpByteOff dst n y mapAccumR_ s' (n-1) - acc' <- unsafeWithForeignPtr gp (go a) + acc' <- go a gp return (acc', BS gp len) {-# INLINE mapAccumR #-} @@ -765,21 +767,20 @@ scanl -- ^ input of length n -> ByteString -- ^ output of length n+1 -scanl f v = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> +scanl f v = \(BS a len) -> unsafeCreateFp (len+1) $ \q -> do -- see fold inlining - create (len+1) $ \q -> do - poke q v + pokeFp q v let go src dst = scanl_ v 0 where scanl_ !z !n | n >= len = return () | otherwise = do - x <- peekByteOff src n + x <- peekFpByteOff src n let z' = f z x - pokeByteOff dst n z' + pokeFpByteOff dst n z' scanl_ z' (n+1) - go a (q `plusPtr` 1) + go a (q `plusForeignPtr` 1) {-# INLINE scanl #-} -- | 'scanl1' is a variant of 'scanl' that has no starting value argument. @@ -810,19 +811,18 @@ scanr -- ^ input of length n -> ByteString -- ^ output of length n+1 -scanr f v = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> +scanr f v = \(BS a len) -> unsafeCreateFp (len+1) $ \b -> do -- see fold inlining - create (len+1) $ \b -> do - poke (b `plusPtr` len) v + pokeFpByteOff b len v let go p q = scanr_ v (len-1) where scanr_ !z !n | n < 0 = return () | otherwise = do - x <- peekByteOff p n + x <- peekFpByteOff p n let z' = f x z - pokeByteOff q n z' + pokeFpByteOff q n z' scanr_ z' (n-1) go a b {-# INLINE scanr #-} @@ -846,7 +846,8 @@ scanr1 f ps = case unsnoc ps of replicate :: Int -> Word8 -> ByteString replicate w c | w <= 0 = empty - | otherwise = unsafeCreate w $ \ptr -> + | otherwise = unsafeCreateFp w $ \fptr -> + unsafeWithForeignPtr fptr $ \ptr -> void $ memset ptr c (fromIntegral w) {-# INLINE replicate #-} @@ -882,7 +883,7 @@ unfoldr f = concat . unfoldChunk 32 64 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a) unfoldrN i f x0 | i < 0 = (empty, Just x0) - | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0 + | otherwise = unsafePerformIO $ createFpAndTrim' i $ \p -> go p x0 0 where go !p !x !n = go' x n where @@ -890,7 +891,7 @@ unfoldrN i f x0 | n' == i = return (0, n', Just x') | otherwise = case f x' of Nothing -> return (0, n', Nothing) - Just (w,x'') -> do pokeByteOff p n' w + Just (w,x'') -> do pokeFpByteOff p n' w go' x'' (n'+1) {-# INLINE unfoldrN #-} @@ -1130,10 +1131,9 @@ splitWith _ (BS _ 0) = [] splitWith predicate (BS fp len) = splitWith0 0 len fp where splitWith0 !off' !len' !fp' = accursedUnutterablePerformIO $ - unsafeWithForeignPtr fp $ \p -> - splitLoop p 0 off' len' fp' + splitLoop fp 0 off' len' fp' - splitLoop :: Ptr Word8 + splitLoop :: ForeignPtr Word8 -> Int -> Int -> Int -> ForeignPtr Word8 -> IO [ByteString] @@ -1142,7 +1142,7 @@ splitWith predicate (BS fp len) = splitWith0 0 len fp go idx' | idx' >= len' = return [BS (plusForeignPtr fp' off') idx'] | otherwise = do - w <- peekElemOff p (off'+idx') + w <- peekFpByteOff p (off'+idx') if predicate w then return (BS (plusForeignPtr fp' off') idx' : splitWith0 (off'+idx'+1) (len'-idx'-1) fp') @@ -1213,19 +1213,16 @@ groupBy k xs = case uncons xs of intercalate :: ByteString -> [ByteString] -> ByteString intercalate _ [] = mempty intercalate _ [x] = x -- This branch exists for laziness, not speed -intercalate (BS fSepPtr sepLen) (BS fhPtr hLen : t) = - unsafeCreate totalLen $ \dstPtr0 -> - unsafeWithForeignPtr fSepPtr $ \sepPtr -> do - unsafeWithForeignPtr fhPtr $ \hPtr -> - memcpy dstPtr0 hPtr hLen +intercalate (BS sepPtr sepLen) (BS hPtr hLen : t) = + unsafeCreateFp totalLen $ \dstPtr0 -> do + memcpyFp dstPtr0 hPtr hLen let go _ [] = pure () - go dstPtr (BS fChunkPtr chunkLen : chunks) = do - memcpy dstPtr sepPtr sepLen - let destPtr' = dstPtr `plusPtr` sepLen - unsafeWithForeignPtr fChunkPtr $ \chunkPtr -> - memcpy destPtr' chunkPtr chunkLen - go (destPtr' `plusPtr` chunkLen) chunks - go (dstPtr0 `plusPtr` hLen) t + go dstPtr (BS chunkPtr chunkLen : chunks) = do + memcpyFp dstPtr sepPtr sepLen + let destPtr' = dstPtr `plusForeignPtr` sepLen + memcpyFp destPtr' chunkPtr chunkLen + go (destPtr' `plusForeignPtr` chunkLen) chunks + go (dstPtr0 `plusForeignPtr` hLen) t where totalLen = List.foldl' (\acc chunk -> acc +! sepLen +! length chunk) hLen t (+!) = checkedAdd "intercalate" @@ -1317,12 +1314,12 @@ count w (BS x m) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p -> -- returns the index of the first element in the ByteString -- satisfying the predicate. findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int -findIndex k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g +findIndex k (BS x l) = accursedUnutterablePerformIO $ g x where g !ptr = go 0 where go !n | n >= l = return Nothing - | otherwise = do w <- peek $ ptr `plusPtr` n + | otherwise = do w <- peekFp $ ptr `plusForeignPtr` n if k w then return (Just n) else go (n+1) @@ -1334,12 +1331,12 @@ findIndex k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g -- -- @since 0.10.12.0 findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int -findIndexEnd k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g +findIndexEnd k (BS x l) = accursedUnutterablePerformIO $ g x where g !ptr = go (l-1) where go !n | n < 0 = return Nothing - | otherwise = do w <- peekByteOff ptr n + | otherwise = do w <- peekFpByteOff ptr n if k w then return (Just n) else go (n-1) @@ -1382,24 +1379,25 @@ notElem c ps = not (c `elem` ps) -- returns a ByteString containing those characters that satisfy the -- predicate. filter :: (Word8 -> Bool) -> ByteString -> ByteString -filter k = \ps@(BS x l) -> +filter k = \ps@(BS pIn l) -> -- see fold inlining. if null ps then ps else - unsafePerformIO $ createAndTrim l $ \pOut -> unsafeWithForeignPtr x $ \pIn -> do + unsafeDupablePerformIO $ createFpAndTrim l $ \pOut -> do let go' pf pt = go pf pt where - end = pf `plusPtr` l + end = pf `plusForeignPtr` l go !f !t | f == end = return t | otherwise = do - w <- peek f + w <- peekFp f if k w - then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1) - else go (f `plusPtr` 1) t + then pokeFp t w + >> go (f `plusForeignPtr` 1) (t `plusForeignPtr` 1) + else go (f `plusForeignPtr` 1) t t <- go' pIn pOut - return $! t `minusPtr` pOut -- actual length + return $! t `minusForeignPtr` pOut -- actual length {-# INLINE filter #-} {- @@ -1445,34 +1443,33 @@ find f p = case findIndex f p of -- partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString) partition f s = unsafeDupablePerformIO $ - do fp' <- mallocByteString len - unsafeWithForeignPtr fp' $ \p -> - do let end = p `plusPtr` (len - 1) + do p <- mallocByteString len + let end = p `plusForeignPtr` (len - 1) mid <- sep 0 p end rev mid end - let i = mid `minusPtr` p - return (BS fp' i, - BS (plusForeignPtr fp' i) (len - i)) + let i = mid `minusForeignPtr` p + return (BS p i, + BS (p `plusForeignPtr` i) (len - i)) where len = length s - incr = (`plusPtr` 1) - decr = (`plusPtr` (-1)) + incr = (`plusForeignPtr` 1) + decr = (`plusForeignPtr` (-1)) sep !i !p1 !p2 | i == len = return p1 - | f w = do poke p1 w + | f w = do pokeFp p1 w sep (i + 1) (incr p1) p2 - | otherwise = do poke p2 w + | otherwise = do pokeFp p2 w sep (i + 1) p1 (decr p2) where w = s `unsafeIndex` i - rev !p1 !p2 + rev !p1 !p2 -- fixme: surely there are faster ways to do this | p1 >= p2 = return () - | otherwise = do a <- peek p1 - b <- peek p2 - poke p1 b - poke p2 a + | otherwise = do a <- peekFp p1 + b <- peekFp p2 + pokeFp p1 b + pokeFp p2 a rev (incr p1) (decr p2) -- -------------------------------------------------------------------- @@ -1663,20 +1660,18 @@ zipWith f ps qs = case uncons ps of -- -- @since 0.11.1.0 packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString -packZipWith f (BS fp l) (BS fq m) = unsafeDupablePerformIO $ - unsafeWithForeignPtr fp $ \a -> - unsafeWithForeignPtr fq $ \b -> - create len $ go a b +packZipWith f (BS a l) (BS b m) = unsafeDupablePerformIO $ + createFp len $ go a b where go p1 p2 = zipWith_ 0 where - zipWith_ :: Int -> Ptr Word8 -> IO () + zipWith_ :: Int -> ForeignPtr Word8 -> IO () zipWith_ !n !r | n >= len = return () | otherwise = do - x <- peekByteOff p1 n - y <- peekByteOff p2 n - pokeByteOff r n (f x y) + x <- peekFpByteOff p1 n + y <- peekFpByteOff p2 n + pokeFpByteOff r n (f x y) zipWith_ (n+1) r len = min l m @@ -1709,10 +1704,10 @@ tails p | null p = [empty] sort :: ByteString -> ByteString sort (BS input l) -- qsort outperforms counting sort for small arrays - | l <= 20 = unsafeCreate l $ \ptr -> unsafeWithForeignPtr input $ \inp -> do - memcpy ptr inp l - c_sort ptr (fromIntegral l) - | otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do + | l <= 20 = unsafeCreateFp l $ \destFP -> do + memcpyFp destFP input l + unsafeWithForeignPtr destFP $ \dest -> c_sort dest (fromIntegral l) + | otherwise = unsafeCreateFp l $ \p -> allocaArray 256 $ \arr -> do _ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize))) unsafeWithForeignPtr input (\x -> countOccurrences arr x l) @@ -1721,7 +1716,7 @@ sort (BS input l) go i !ptr = do n <- peekElemOff arr i when (n /= 0) $ void $ memset ptr (fromIntegral i) n go (i + 1) (ptr `plusPtr` fromIntegral n) - go 0 p + unsafeWithForeignPtr p (go 0) where -- Count the number of occurrences of each byte. -- Used by 'sort' @@ -1771,8 +1766,8 @@ packCString cstr = do -- The @ByteString@ is a normal Haskell value and will be managed on the -- Haskell heap. packCStringLen :: CStringLen -> IO ByteString -packCStringLen (cstr, len) | len >= 0 = create len $ \p -> - memcpy p (castPtr cstr) len +packCStringLen (cstr, len) | len >= 0 = createFp len $ \fp -> + unsafeWithForeignPtr fp $ \p -> memcpy p (castPtr cstr) len packCStringLen (_, len) = moduleErrorIO "packCStringLen" ("negative length: " ++ show len) @@ -1785,8 +1780,7 @@ packCStringLen (_, len) = -- is needed in the rest of the program. -- copy :: ByteString -> ByteString -copy (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f -> - memcpy p f l +copy (BS x l) = unsafeCreateFp l $ \p -> memcpyFp p x l -- --------------------------------------------------------------------- -- Line IO @@ -1845,8 +1839,7 @@ hGetLine h = mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString mkPS buf start end = - create len $ \p -> - withRawBuffer buf $ \pbuf -> copyBytes p (pbuf `plusPtr` start) len + createFp len $ \fp -> memcpyFp fp (buf `plusForeignPtr` start) len where len = end - start @@ -1899,7 +1892,8 @@ putStr = hPut stdout -- hGet :: Handle -> Int -> IO ByteString hGet h i - | i > 0 = createAndTrim i $ \p -> hGetBuf h p i + | i > 0 = createFpAndTrim i $ \fp -> + unsafeWithForeignPtr fp $ \p -> hGetBuf h p i | i == 0 = return empty | otherwise = illegalBufferSize h "hGet" i @@ -1913,7 +1907,8 @@ hGet h i -- hGetNonBlocking :: Handle -> Int -> IO ByteString hGetNonBlocking h i - | i > 0 = createAndTrim i $ \p -> hGetBufNonBlocking h p i + | i > 0 = createFpAndTrim i $ \fp -> + unsafeWithForeignPtr fp $ \p -> hGetBufNonBlocking h p i | i == 0 = return empty | otherwise = illegalBufferSize h "hGetNonBlocking" i @@ -1924,7 +1919,8 @@ hGetNonBlocking h i -- hGetSome :: Handle -> Int -> IO ByteString hGetSome hh i - | i > 0 = createAndTrim i $ \p -> hGetBufSome hh p i + | i > 0 = createFpAndTrim i $ \fp -> + unsafeWithForeignPtr fp $ \p -> hGetBufSome hh p i | i == 0 = return empty | otherwise = illegalBufferSize hh "hGetSome" i diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs index 800476154..a05beead9 100644 --- a/Data/ByteString/Builder/Internal.hs +++ b/Data/ByteString/Builder/Internal.hs @@ -133,7 +133,7 @@ import Data.Semigroup (Semigroup((<>))) #endif import qualified Data.ByteString as S -import qualified Data.ByteString.Internal as S +import qualified Data.ByteString.Internal.Type as S import qualified Data.ByteString.Lazy.Internal as L import qualified Data.ByteString.Short.Internal as Sh @@ -1099,8 +1099,8 @@ buildStepToCIOS (AllocationStrategy nextBuffer bufSize trim) = wrapChunk !op' mkCIOS | chunkSize == 0 = mkCIOS True | trim chunkSize size = do - bs <- S.create chunkSize $ \pbuf' -> - copyBytes pbuf' pbuf chunkSize + bs <- S.createFp chunkSize $ \fpbuf' -> + S.memcpyFp fpbuf' fpbuf chunkSize -- FIXME: We could reuse the trimmed buffer here. return $ Yield1 bs (mkCIOS False) | otherwise = diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 9b06468d3..1aa70e82f 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -275,7 +275,7 @@ import Data.ByteString (null,length,tail,init,append ,useAsCString,useAsCStringLen ) -import Data.ByteString.Internal +import Data.ByteString.Internal.Type import Data.ByteString.ReadInt import Data.ByteString.ReadNat @@ -975,11 +975,11 @@ unlines = \li -> let (+!) = checkedAdd "Char8.unlines" go [] _ = pure () - go (BS srcFP len : srcs) dest = do - unsafeWithForeignPtr srcFP $ \src -> memcpy dest src len - pokeElemOff dest len (c2w '\n') - go srcs $ dest `plusPtr` (len + 1) - in unsafeCreate totLen (go li) + go (BS src len : srcs) dest = do + memcpyFp dest src len + pokeFpByteOff dest len (c2w '\n') + go srcs $ dest `plusForeignPtr` (len + 1) + in unsafeCreateFp totLen (go li) -- | 'words' breaks a ByteString up into a list of words, which -- were delimited by Chars representing white space. diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index f8125a2da..b4481a833 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -1,11 +1,3 @@ -{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-} -{-# LANGUAGE UnliftedFFITypes, MagicHash, - UnboxedTuples, DeriveDataTypeable #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE PatternSynonyms, ViewPatterns #-} -{-# LANGUAGE Unsafe #-} -{-# LANGUAGE TemplateHaskellQuotes #-} {-# OPTIONS_HADDOCK not-home #-} -- | @@ -99,885 +91,4 @@ module Data.ByteString.Internal ( unsafeWithForeignPtr ) where -import Prelude hiding (concat, null) -import qualified Data.List as List - -import Control.Monad (void) - -import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) -import Foreign.Ptr (Ptr, FunPtr, plusPtr, minusPtr) -import Foreign.Storable (Storable(..)) -import Foreign.C.Types (CInt(..), CSize(..)) -import Foreign.C.String (CString) - -#if !MIN_VERSION_base(4,13,0) -import Data.Semigroup (Semigroup ((<>))) -#endif -import Data.Semigroup (Semigroup (sconcat, stimes)) -import Data.List.NonEmpty (NonEmpty ((:|))) - -import Control.DeepSeq (NFData(rnf)) - -import Data.String (IsString(..)) - -import Control.Exception (assert, throw, Exception) - -import Data.Bits ((.&.)) -import Data.Char (ord) -import Data.Word - -import Data.Typeable (Typeable) -import Data.Data (Data(..), mkNoRepType) - -import GHC.Base (nullAddr#,realWorld#,unsafeChr) -import GHC.Exts (IsList(..)) -import GHC.CString (unpackCString#) -import GHC.Prim (Addr#) - -#define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0) -#if TIMES_INT_2_AVAILABLE -import GHC.Prim (timesInt2#) -#else -import GHC.Prim ( timesWord2# - , or# - , uncheckedShiftRL# - , int2Word# - , word2Int# - ) -import Data.Bits (finiteBitSize) -#endif - -import GHC.IO (IO(IO),unsafeDupablePerformIO) -import GHC.ForeignPtr (ForeignPtr(ForeignPtr) -#if __GLASGOW_HASKELL__ < 900 - , newForeignPtr_ -#endif - , mallocPlainForeignPtrBytes) - -#if MIN_VERSION_base(4,10,0) -import GHC.ForeignPtr (plusForeignPtr) -#else -import GHC.Prim (plusAddr#) -#endif - -#if __GLASGOW_HASKELL__ >= 811 -import GHC.CString (cstringLength#) -import GHC.ForeignPtr (ForeignPtrContents(FinalPtr)) -#else -import GHC.Ptr (Ptr(..)) -#endif - -import GHC.Types (Int (..)) - -#if MIN_VERSION_base(4,15,0) -import GHC.ForeignPtr (unsafeWithForeignPtr) -#endif - -import qualified Language.Haskell.TH.Lib as TH -import qualified Language.Haskell.TH.Syntax as TH - -#if !MIN_VERSION_base(4,15,0) -unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b -unsafeWithForeignPtr = withForeignPtr -#endif - --- CFILES stuff is Hugs only -{-# CFILES cbits/fpstring.c #-} - -#if !MIN_VERSION_base(4,10,0) --- |Advances the given address by the given offset in bytes. --- --- The new 'ForeignPtr' shares the finalizer of the original, --- equivalent from a finalization standpoint to just creating another --- reference to the original. That is, the finalizer will not be --- called before the new 'ForeignPtr' is unreachable, nor will it be --- called an additional time due to this call, and the finalizer will --- be called with the same address that it would have had this call --- not happened, *not* the new address. -plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b -plusForeignPtr (ForeignPtr addr guts) (I# offset) = ForeignPtr (plusAddr# addr offset) guts -{-# INLINE [0] plusForeignPtr #-} -{-# RULES -"ByteString plusForeignPtr/0" forall fp . - plusForeignPtr fp 0 = fp - #-} -#endif - --- ----------------------------------------------------------------------------- - --- | A space-efficient representation of a 'Word8' vector, supporting many --- efficient operations. --- --- A 'ByteString' contains 8-bit bytes, or by using the operations from --- "Data.ByteString.Char8" it can be interpreted as containing 8-bit --- characters. --- -data ByteString = BS {-# UNPACK #-} !(ForeignPtr Word8) -- payload - {-# UNPACK #-} !Int -- length - -- ^ @since 0.11.0.0 - deriving (Typeable) - --- | Type synonym for the strict flavour of 'ByteString'. --- --- @since 0.11.2.0 -type StrictByteString = ByteString - --- | --- @'PS' foreignPtr offset length@ represents a 'ByteString' with data --- backed by a given @foreignPtr@, starting at a given @offset@ in bytes --- and of a specified @length@. --- --- This pattern is used to emulate the legacy 'ByteString' data --- constructor, so that pre-existing code generally doesn't need to --- change to benefit from the simplified 'BS' constructor and can --- continue to function unchanged. --- --- /Note:/ Matching with this constructor will always be given a 0 offset, --- as the base will be manipulated by 'plusForeignPtr' instead. --- -pattern PS :: ForeignPtr Word8 -> Int -> Int -> ByteString -pattern PS fp zero len <- BS fp ((0,) -> (zero, len)) where - PS fp o len = BS (plusForeignPtr fp o) len -#if __GLASGOW_HASKELL__ >= 802 -{-# COMPLETE PS #-} -#endif - -instance Eq ByteString where - (==) = eq - -instance Ord ByteString where - compare = compareBytes - -instance Semigroup ByteString where - (<>) = append - sconcat (b:|bs) = concat (b:bs) - {-# INLINE stimes #-} - stimes = stimesPolymorphic - -instance Monoid ByteString where - mempty = empty - mappend = (<>) - mconcat = concat - -instance NFData ByteString where - rnf BS{} = () - -instance Show ByteString where - showsPrec p ps r = showsPrec p (unpackChars ps) r - -instance Read ByteString where - readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ] - --- | @since 0.10.12.0 -instance IsList ByteString where - type Item ByteString = Word8 - fromList = packBytes - toList = unpackBytes - --- | Beware: 'fromString' truncates multi-byte characters to octets. --- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� -instance IsString ByteString where - {-# INLINE fromString #-} - fromString = packChars - -instance Data ByteString where - gfoldl f z txt = z packBytes `f` unpackBytes txt - toConstr _ = error "Data.ByteString.ByteString.toConstr" - gunfold _ _ = error "Data.ByteString.ByteString.gunfold" - dataTypeOf _ = mkNoRepType "Data.ByteString.ByteString" - --- | @since 0.11.2.0 -instance TH.Lift ByteString where -#if MIN_VERSION_template_haskell(2,16,0) - lift (BS ptr len) = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len)) -#else - lift bs@(BS _ len) = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.StringPrimL $ unpackBytes bs) -#endif - -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - ------------------------------------------------------------------------- --- Internal indexing - --- | 'findIndexOrLength' is a variant of findIndex, that returns the length --- of the string if no element is found, rather than Nothing. -findIndexOrLength :: (Word8 -> Bool) -> ByteString -> Int -findIndexOrLength k (BS x l) = - accursedUnutterablePerformIO $ unsafeWithForeignPtr x g - where - g ptr = go 0 - where - go !n | n >= l = return l - | otherwise = do w <- peek $ ptr `plusPtr` n - if k w - then return n - else go (n+1) -{-# INLINE findIndexOrLength #-} - ------------------------------------------------------------------------- --- Packing and unpacking from lists - -packBytes :: [Word8] -> ByteString -packBytes ws = unsafePackLenBytes (List.length ws) ws - -packChars :: [Char] -> ByteString -packChars cs = unsafePackLenChars (List.length cs) cs - -{-# INLINE [0] packChars #-} - -{-# RULES -"ByteString packChars/packAddress" forall s . - packChars (unpackCString# s) = unsafePackLiteral s - #-} - -unsafePackLenBytes :: Int -> [Word8] -> ByteString -unsafePackLenBytes len xs0 = - unsafeCreate len $ \p -> go p xs0 - where - go !_ [] = return () - go !p (x:xs) = poke p x >> go (p `plusPtr` 1) xs - -unsafePackLenChars :: Int -> [Char] -> ByteString -unsafePackLenChars len cs0 = - unsafeCreate len $ \p -> go p cs0 - where - go !_ [] = return () - go !p (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) cs - - --- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an --- Addr\# (an arbitrary machine address assumed to point outside the --- garbage-collected heap) into a @ByteString@. A much faster way to --- create an 'Addr#' is with an unboxed string literal, than to pack a --- boxed string. A unboxed string literal is compiled to a static @char --- []@ by GHC. Establishing the length of the string requires a call to --- @strlen(3)@, so the 'Addr#' must point to a null-terminated buffer (as --- is the case with @\"string\"\#@ literals in GHC). Use 'Data.ByteString.Unsafe.unsafePackAddressLen' --- if you know the length of the string statically. --- --- An example: --- --- > literalFS = unsafePackAddress "literal"# --- --- This function is /unsafe/. If you modify the buffer pointed to by the --- original 'Addr#' this modification will be reflected in the resulting --- @ByteString@, breaking referential transparency. --- --- Note this also won't work if your 'Addr#' has embedded @\'\\0\'@ characters in --- the string, as @strlen@ will return too short a length. --- -unsafePackAddress :: Addr# -> IO ByteString -unsafePackAddress addr# = do -#if __GLASGOW_HASKELL__ >= 811 - unsafePackLenAddress (I# (cstringLength# addr#)) addr# -#else - l <- c_strlen (Ptr addr#) - unsafePackLenAddress (fromIntegral l) addr# -#endif -{-# INLINE unsafePackAddress #-} - --- | See 'unsafePackAddress'. This function is similar, --- but takes an additional length argument rather then computing --- it with @strlen@. --- Therefore embedding @\'\\0\'@ characters is possible. --- --- @since 0.11.2.0 -unsafePackLenAddress :: Int -> Addr# -> IO ByteString -unsafePackLenAddress len addr# = do -#if __GLASGOW_HASKELL__ >= 811 - return (BS (ForeignPtr addr# FinalPtr) len) -#else - p <- newForeignPtr_ (Ptr addr#) - return $ BS p len -#endif -{-# INLINE unsafePackLenAddress #-} - --- | See 'unsafePackAddress'. This function has similar behavior. Prefer --- this function when the address in known to be an @Addr#@ literal. In --- that context, there is no need for the sequencing guarantees that 'IO' --- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data --- constructor for @ForeignPtrContents@. --- --- @since 0.11.1.0 -unsafePackLiteral :: Addr# -> ByteString -unsafePackLiteral addr# = -#if __GLASGOW_HASKELL__ >= 811 - unsafePackLenLiteral (I# (cstringLength# addr#)) addr# -#else - let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#)) - in unsafePackLenLiteral (fromIntegral len) addr# -#endif -{-# INLINE unsafePackLiteral #-} - - --- | See 'unsafePackLiteral'. This function is similar, --- but takes an additional length argument rather then computing --- it with @strlen@. --- Therefore embedding @\'\\0\'@ characters is possible. --- --- @since 0.11.2.0 -unsafePackLenLiteral :: Int -> Addr# -> ByteString -unsafePackLenLiteral len addr# = -#if __GLASGOW_HASKELL__ >= 811 - BS (ForeignPtr addr# FinalPtr) len -#else - -- newForeignPtr_ allocates a MutVar# internally. If that MutVar# - -- gets commoned up with the MutVar# of some unrelated ForeignPtr, - -- it may prevent automatic finalization for that other ForeignPtr. - -- So we avoid accursedUnutterablePerformIO here. - BS (unsafeDupablePerformIO (newForeignPtr_ (Ptr addr#))) len -#endif -{-# INLINE unsafePackLenLiteral #-} - -packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8]) -packUptoLenBytes len xs0 = - unsafeCreateUptoN' len $ \p0 -> - let p_end = plusPtr p0 len - go !p [] = return (p `minusPtr` p0, []) - go !p xs | p == p_end = return (len, xs) - go !p (x:xs) = poke p x >> go (p `plusPtr` 1) xs - in go p0 xs0 - -packUptoLenChars :: Int -> [Char] -> (ByteString, [Char]) -packUptoLenChars len cs0 = - unsafeCreateUptoN' len $ \p0 -> - let p_end = plusPtr p0 len - go !p [] = return (p `minusPtr` p0, []) - go !p cs | p == p_end = return (len, cs) - go !p (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) cs - in go p0 cs0 - --- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand --- we would like to write a tight loop that just blasts the list into memory, on --- the other hand we want it to be unpacked lazily so we don't end up with a --- massive list data structure in memory. --- --- Our strategy is to combine both: we will unpack lazily in reasonable sized --- chunks, where each chunk is unpacked strictly. --- --- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and --- unpackAppendChars do the chunks strictly. - -unpackBytes :: ByteString -> [Word8] -unpackBytes bs = unpackAppendBytesLazy bs [] - -unpackChars :: ByteString -> [Char] -unpackChars bs = unpackAppendCharsLazy bs [] - -unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8] -unpackAppendBytesLazy (BS fp len) xs - | len <= 100 = unpackAppendBytesStrict (BS fp len) xs - | otherwise = unpackAppendBytesStrict (BS fp 100) remainder - where - remainder = unpackAppendBytesLazy (BS (plusForeignPtr fp 100) (len-100)) xs - - -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate - -- takes just shy of 4k which seems like a reasonable amount. - -- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes) - -unpackAppendCharsLazy :: ByteString -> [Char] -> [Char] -unpackAppendCharsLazy (BS fp len) cs - | len <= 100 = unpackAppendCharsStrict (BS fp len) cs - | otherwise = unpackAppendCharsStrict (BS fp 100) remainder - where - remainder = unpackAppendCharsLazy (BS (plusForeignPtr fp 100) (len-100)) cs - --- For these unpack functions, since we're unpacking the whole list strictly we --- build up the result list in an accumulator. This means we have to build up --- the list starting at the end. So our traversal starts at the end of the --- buffer and loops down until we hit the sentinal: - -unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8] -unpackAppendBytesStrict (BS fp len) xs = - accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base -> - loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs - where - loop !sentinal !p acc - | p == sentinal = return acc - | otherwise = do x <- peek p - loop sentinal (p `plusPtr` (-1)) (x:acc) - -unpackAppendCharsStrict :: ByteString -> [Char] -> [Char] -unpackAppendCharsStrict (BS fp len) xs = - accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base -> - loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs - where - loop !sentinal !p acc - | p == sentinal = return acc - | otherwise = do x <- peek p - loop sentinal (p `plusPtr` (-1)) (w2c x:acc) - ------------------------------------------------------------------------- - --- | The 0 pointer. Used to indicate the empty Bytestring. -nullForeignPtr :: ForeignPtr Word8 -#if __GLASGOW_HASKELL__ >= 811 -nullForeignPtr = ForeignPtr nullAddr# FinalPtr -#else -nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr") -#endif - --- --------------------------------------------------------------------- --- Low level constructors - --- | /O(1)/ Build a ByteString from a ForeignPtr. --- --- If you do not need the offset parameter then you should be using --- 'Data.ByteString.Unsafe.unsafePackCStringLen' or --- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead. --- -fromForeignPtr :: ForeignPtr Word8 - -> Int -- ^ Offset - -> Int -- ^ Length - -> ByteString -fromForeignPtr fp o = BS (plusForeignPtr fp o) -{-# INLINE fromForeignPtr #-} - --- | @since 0.11.0.0 -fromForeignPtr0 :: ForeignPtr Word8 - -> Int -- ^ Length - -> ByteString -fromForeignPtr0 = BS -{-# INLINE fromForeignPtr0 #-} - --- | /O(1)/ Deconstruct a ForeignPtr from a ByteString -toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length) -toForeignPtr (BS ps l) = (ps, 0, l) -{-# INLINE toForeignPtr #-} - --- | /O(1)/ Deconstruct a ForeignPtr from a ByteString --- --- @since 0.11.0.0 -toForeignPtr0 :: ByteString -> (ForeignPtr Word8, Int) -- ^ (ptr, length) -toForeignPtr0 (BS ps l) = (ps, l) -{-# INLINE toForeignPtr0 #-} - --- | A way of creating ByteStrings outside the IO monad. The @Int@ --- argument gives the final size of the ByteString. -unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString -unsafeCreate l f = unsafeDupablePerformIO (create l f) -{-# INLINE unsafeCreate #-} - --- | Like 'unsafeCreate' but instead of giving the final size of the --- ByteString, it is just an upper bound. The inner action returns --- the actual size. Unlike 'createAndTrim' the ByteString is not --- reallocated if the final size is less than the estimated size. -unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString -unsafeCreateUptoN l f = unsafeDupablePerformIO (createUptoN l f) -{-# INLINE unsafeCreateUptoN #-} - --- | @since 0.10.12.0 -unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a) -unsafeCreateUptoN' l f = unsafeDupablePerformIO (createUptoN' l f) -{-# INLINE unsafeCreateUptoN' #-} - --- | Create ByteString of size @l@ and use action @f@ to fill its contents. -create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString -create l action = do - fp <- mallocByteString l - -- Cannot use unsafeWithForeignPtr, because action can diverge - withForeignPtr fp $ \p -> action p - return $! BS fp l -{-# INLINE create #-} - --- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString' --- starting at the given 'Ptr' and returns the actual utilized length, --- @`createUptoN'` l f@ returns the filled 'ByteString'. -createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString -createUptoN l action = do - fp <- mallocByteString l - -- Cannot use unsafeWithForeignPtr, because action can diverge - l' <- withForeignPtr fp $ \p -> action p - assert (l' <= l) $ return $! BS fp l' -{-# INLINE createUptoN #-} - --- | Like 'createUptoN', but also returns an additional value created by the --- action. --- --- @since 0.10.12.0 -createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a) -createUptoN' l action = do - fp <- mallocByteString l - -- Cannot use unsafeWithForeignPtr, because action can diverge - (l', res) <- withForeignPtr fp $ \p -> action p - assert (l' <= l) $ return (BS fp l', res) -{-# INLINE createUptoN' #-} - --- | Given the maximum size needed and a function to make the contents --- of a ByteString, createAndTrim makes the 'ByteString'. The generating --- function is required to return the actual final size (<= the maximum --- size), and the resulting byte array is reallocated to this size. --- --- createAndTrim is the main mechanism for creating custom, efficient --- ByteString functions, using Haskell or C functions to fill the space. --- -createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString -createAndTrim l action = do - fp <- mallocByteString l - -- Cannot use unsafeWithForeignPtr, because action can diverge - withForeignPtr fp $ \p -> do - l' <- action p - if assert (l' <= l) $ l' >= l - then return $! BS fp l - else create l' $ \p' -> memcpy p' p l' -{-# INLINE createAndTrim #-} - -createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) -createAndTrim' l action = do - fp <- mallocByteString l - -- Cannot use unsafeWithForeignPtr, because action can diverge - withForeignPtr fp $ \p -> do - (off, l', res) <- action p - if assert (l' <= l) $ l' >= l - then return (BS fp l, res) - else do ps <- create l' $ \p' -> - memcpy p' (p `plusPtr` off) l' - return (ps, res) -{-# INLINE createAndTrim' #-} - --- | Wrapper of 'Foreign.ForeignPtr.mallocForeignPtrBytes' with faster implementation for GHC --- -mallocByteString :: Int -> IO (ForeignPtr a) -mallocByteString = mallocPlainForeignPtrBytes -{-# INLINE mallocByteString #-} - ------------------------------------------------------------------------- --- Implementations for Eq, Ord and Monoid instances - -eq :: ByteString -> ByteString -> Bool -eq a@(BS fp len) b@(BS fp' len') - | len /= len' = False -- short cut on length - | fp == fp' = True -- short cut for the same string - | otherwise = compareBytes a b == EQ -{-# INLINE eq #-} --- ^ still needed - -compareBytes :: ByteString -> ByteString -> Ordering -compareBytes (BS _ 0) (BS _ 0) = EQ -- short cut for empty strings -compareBytes (BS fp1 len1) (BS fp2 len2) = - accursedUnutterablePerformIO $ - unsafeWithForeignPtr fp1 $ \p1 -> - unsafeWithForeignPtr fp2 $ \p2 -> do - i <- memcmp p1 p2 (min len1 len2) - return $! case i `compare` 0 of - EQ -> len1 `compare` len2 - x -> x - - --- | /O(1)/ The empty 'ByteString' -empty :: ByteString --- This enables bypassing #457 by not using (polymorphic) mempty in --- any definitions used by the (Monoid ByteString) instance -empty = BS nullForeignPtr 0 - -append :: ByteString -> ByteString -> ByteString -append (BS _ 0) b = b -append a (BS _ 0) = a -append (BS fp1 len1) (BS fp2 len2) = - unsafeCreate (checkedAdd "append" len1 len2) $ \destptr1 -> do - let destptr2 = destptr1 `plusPtr` len1 - unsafeWithForeignPtr fp1 $ \p1 -> memcpy destptr1 p1 len1 - unsafeWithForeignPtr fp2 $ \p2 -> memcpy destptr2 p2 len2 - -concat :: [ByteString] -> ByteString -concat = \bss0 -> goLen0 bss0 bss0 - -- The idea here is we first do a pass over the input list to determine: - -- - -- 1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@, - -- and @concat ["hello", mempty, mempty]@ can all be handled without - -- copying. - -- 2. if a copy is necessary, how large is the result going to be? - -- - -- If a copy is necessary then we create a buffer of the appropriate size - -- and do another pass over the input list, copying the chunks into the - -- buffer. Also, since foreign calls aren't entirely free we skip over - -- empty chunks while copying. - -- - -- We pass the original [ByteString] (bss0) through as an argument through - -- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing - -- it as an explicit argument avoids capturing it in these functions' - -- closures which would result in unnecessary closure allocation. - where - -- It's still possible that the result is empty - goLen0 _ [] = empty - goLen0 bss0 (BS _ 0 :bss) = goLen0 bss0 bss - goLen0 bss0 (bs :bss) = goLen1 bss0 bs bss - - -- It's still possible that the result is a single chunk - goLen1 _ bs [] = bs - goLen1 bss0 bs (BS _ 0 :bss) = goLen1 bss0 bs bss - goLen1 bss0 bs (BS _ len:bss) = goLen bss0 (checkedAdd "concat" len' len) bss - where BS _ len' = bs - - -- General case, just find the total length we'll need - goLen bss0 !total (BS _ len:bss) = goLen bss0 total' bss - where total' = checkedAdd "concat" total len - goLen bss0 total [] = - unsafeCreate total $ \ptr -> goCopy bss0 ptr - - -- Copy the data - goCopy [] !_ = return () - goCopy (BS _ 0 :bss) !ptr = goCopy bss ptr - goCopy (BS fp len:bss) !ptr = do - unsafeWithForeignPtr fp $ \p -> memcpy ptr p len - goCopy bss (ptr `plusPtr` len) -{-# NOINLINE concat #-} - -{-# RULES -"ByteString concat [] -> empty" - concat [] = empty -"ByteString concat [bs] -> bs" forall x. - concat [x] = x - #-} - --- | Repeats the given ByteString n times. --- Polymorphic wrapper to make sure any generated --- specializations are reasonably small. -stimesPolymorphic :: Integral a => a -> ByteString -> ByteString -{-# INLINABLE stimesPolymorphic #-} -stimesPolymorphic nRaw = \ !bs -> case checkedIntegerToInt n of - Just nInt - | nInt >= 0 -> stimesNonNegativeInt nInt bs - | otherwise -> stimesNegativeErr - Nothing - | n < 0 -> stimesNegativeErr - | BS _ 0 <- bs -> empty - | otherwise -> stimesOverflowErr - where n = toInteger nRaw - -- By exclusively using n instead of nRaw, the semantics are kept simple - -- and the likelihood of potentially dangerous mistakes minimized. - - -stimesNegativeErr :: ByteString -stimesNegativeErr - = error "stimes @ByteString: non-negative multiplier expected" - -stimesOverflowErr :: ByteString --- Although this only appears once, it is extracted here to prevent it --- from being duplicated in specializations of 'stimesPolymorphic' -stimesOverflowErr = overflowError "stimes" - --- | Repeats the given ByteString n times. -stimesNonNegativeInt :: Int -> ByteString -> ByteString -stimesNonNegativeInt n (BS fp len) - | n == 0 = empty - | n == 1 = BS fp len - | len == 0 = empty - | len == 1 = unsafeCreate n $ \destptr -> - unsafeWithForeignPtr fp $ \p -> do - byte <- peek p - void $ memset destptr byte (fromIntegral n) - | otherwise = unsafeCreate size $ \destptr -> - unsafeWithForeignPtr fp $ \p -> do - memcpy destptr p len - fillFrom destptr len - where - size = checkedMultiply "stimes" n len - halfSize = (size - 1) `div` 2 -- subtraction and division won't overflow - - fillFrom :: Ptr Word8 -> Int -> IO () - fillFrom destptr copied - | copied <= halfSize = do - memcpy (destptr `plusPtr` copied) destptr copied - fillFrom destptr (copied * 2) - | otherwise = memcpy (destptr `plusPtr` copied) destptr (size - copied) - - ------------------------------------------------------------------------- - --- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. -w2c :: Word8 -> Char -w2c = unsafeChr . fromIntegral -{-# INLINE w2c #-} - --- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and --- silently truncates to 8 bits Chars > '\255'. It is provided as --- convenience for ByteString construction. -c2w :: Char -> Word8 -c2w = fromIntegral . ord -{-# INLINE c2w #-} - --- | Selects words corresponding to white-space characters in the Latin-1 range -isSpaceWord8 :: Word8 -> Bool -isSpaceWord8 w8 = - -- Avoid the cost of narrowing arithmetic results to Word8, - -- the conversion from Word8 to Word is free. - let w :: Word - !w = fromIntegral w8 - in w .&. 0x50 == 0 -- Quick non-whitespace filter - && w - 0x21 > 0x7e -- Second non-whitespace filter - && ( w == 0x20 -- SP - || w == 0xa0 -- NBSP - || w - 0x09 < 5) -- HT, NL, VT, FF, CR -{-# INLINE isSpaceWord8 #-} - --- | Selects white-space characters in the Latin-1 range -isSpaceChar8 :: Char -> Bool -isSpaceChar8 = isSpaceWord8 . c2w -{-# INLINE isSpaceChar8 #-} - ------------------------------------------------------------------------- - --- | The type of exception raised by 'overflowError' --- and on failure by overflow-checked arithmetic operations. -newtype SizeOverflowException - = SizeOverflowException String - -instance Show SizeOverflowException where - show (SizeOverflowException err) = err - -instance Exception SizeOverflowException - --- | Raises a 'SizeOverflowException', --- with a message using the given function name. -overflowError :: String -> a -overflowError fun = throw $ SizeOverflowException msg - where msg = "Data.ByteString." ++ fun ++ ": size overflow" - --- | Add two non-negative numbers. --- Calls 'overflowError' on overflow. -checkedAdd :: String -> Int -> Int -> Int -{-# INLINE checkedAdd #-} -checkedAdd fun x y - | r >= 0 = r - | otherwise = overflowError fun - where r = assert (min x y >= 0) $ x + y - --- | Multiplies two non-negative numbers. --- Calls 'overflowError' on overflow. -checkedMultiply :: String -> Int -> Int -> Int -{-# INLINE checkedMultiply #-} -checkedMultiply fun !x@(I# x#) !y@(I# y#) = assert (min x y >= 0) $ -#if TIMES_INT_2_AVAILABLE - case timesInt2# x# y# of - (# 0#, _, result #) -> I# result - _ -> overflowError fun -#else - case timesWord2# (int2Word# x#) (int2Word# y#) of - (# hi, lo #) -> case or# hi (uncheckedShiftRL# lo shiftAmt) of - 0## -> I# (word2Int# lo) - _ -> overflowError fun - where !(I# shiftAmt) = finiteBitSize (0 :: Word) - 1 -#endif - - --- | Attempts to convert an 'Integer' value to an 'Int', returning --- 'Nothing' if doing so would result in an overflow. -checkedIntegerToInt :: Integer -> Maybe Int -{-# INLINE checkedIntegerToInt #-} --- We could use Data.Bits.toIntegralSized, but this hand-rolled --- version is currently a bit faster as of GHC 9.2. --- It's even faster to just match on the Integer constructors, but --- we'd still need a fallback implementation for integer-simple. -checkedIntegerToInt x - | x == toInteger res = Just res - | otherwise = Nothing - where res = fromInteger x :: Int - - ------------------------------------------------------------------------- - --- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but --- it is in fact a malevolent agent of chaos. It unpicks the seams of reality --- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you --- into thinking it is reasonable, but when you are not looking it stabs you --- in the back and aliases all of your mutable buffers. The carcass of many a --- seasoned Haskell programmer lie strewn at its feet. --- --- Witness the trail of destruction: --- --- * --- --- * --- --- * --- --- * --- --- * --- --- * --- --- Do not talk about \"safe\"! You do not know what is safe! --- --- Yield not to its blasphemous call! Flee traveller! Flee or you will be --- corrupted and devoured! --- -{-# INLINE accursedUnutterablePerformIO #-} -accursedUnutterablePerformIO :: IO a -> a -accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r - --- --------------------------------------------------------------------- --- --- Standard C functions --- - -foreign import ccall unsafe "string.h strlen" c_strlen - :: CString -> IO CSize - -foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer - :: FunPtr (Ptr Word8 -> IO ()) - -foreign import ccall unsafe "string.h memchr" c_memchr - :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) - -memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -memchr p w = c_memchr p (fromIntegral w) - -foreign import ccall unsafe "string.h memcmp" c_memcmp - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt - -memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt -memcmp p q s = c_memcmp p q (fromIntegral s) - -foreign import ccall unsafe "string.h memcpy" c_memcpy - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) - -memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO () -memcpy p q s = void $ c_memcpy p q (fromIntegral s) - -{- -foreign import ccall unsafe "string.h memmove" c_memmove - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) - -memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () -memmove p q s = do c_memmove p q s - return () --} - -foreign import ccall unsafe "string.h memset" c_memset - :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) - -memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) -memset p w = c_memset p (fromIntegral w) - --- --------------------------------------------------------------------- --- --- Uses our C code --- - -foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse - :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () - -foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse - :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO () - -foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum - :: Ptr Word8 -> CSize -> IO Word8 - -foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum - :: Ptr Word8 -> CSize -> IO Word8 - -foreign import ccall unsafe "static fpstring.h fps_count" c_count - :: Ptr Word8 -> CSize -> Word8 -> IO CSize - -foreign import ccall unsafe "static fpstring.h fps_sort" c_sort - :: Ptr Word8 -> CSize -> IO () +import Data.ByteString.Internal.Type diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs new file mode 100644 index 000000000..0f69096ff --- /dev/null +++ b/Data/ByteString/Internal/Type.hs @@ -0,0 +1,1065 @@ +{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-} +{-# LANGUAGE UnliftedFFITypes, MagicHash, + UnboxedTuples, DeriveDataTypeable #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE PatternSynonyms, ViewPatterns #-} +{-# LANGUAGE Unsafe #-} +{-# LANGUAGE TemplateHaskellQuotes #-} +{-# OPTIONS_HADDOCK not-home #-} + +-- | +-- Module : Data.ByteString.Internal.Type +-- Copyright : (c) Don Stewart 2006-2008 +-- (c) Duncan Coutts 2006-2012 +-- License : BSD-style +-- Maintainer : dons00@gmail.com, duncan@community.haskell.org +-- Stability : unstable +-- Portability : non-portable +-- +-- The 'ByteString' type, its instances, and whatever related +-- utilities the bytestring developers see fit to use internally. +-- +module Data.ByteString.Internal.Type ( + + -- * The @ByteString@ type and representation + ByteString + ( BS + , PS -- backwards compatibility shim + ), + + StrictByteString, + + -- * Internal indexing + findIndexOrLength, + + -- * Conversion with lists: packing and unpacking + packBytes, packUptoLenBytes, unsafePackLenBytes, + packChars, packUptoLenChars, unsafePackLenChars, + unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, + unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, + unsafePackAddress, unsafePackLenAddress, + unsafePackLiteral, unsafePackLenLiteral, + + -- * Low level imperative construction + empty, + createFp, + createFpUptoN, + createFpUptoN', + createFpAndTrim, + createFpAndTrim', + unsafeCreateFp, + unsafeCreateFpUptoN, + unsafeCreateFpUptoN', + create, + createUptoN, + createUptoN', + createAndTrim, + createAndTrim', + unsafeCreate, + unsafeCreateUptoN, + unsafeCreateUptoN', + mallocByteString, + + -- * Conversion to and from ForeignPtrs + fromForeignPtr, + toForeignPtr, + fromForeignPtr0, + toForeignPtr0, + + -- * Utilities + nullForeignPtr, + peekFp, + pokeFp, + peekFpByteOff, + pokeFpByteOff, + minusForeignPtr, + memcpyFp, + SizeOverflowException, + overflowError, + checkedAdd, + checkedMultiply, + + -- * Standard C Functions + c_strlen, + c_free_finalizer, + + memchr, + memcmp, + memcpy, + memset, + + -- * cbits functions + c_reverse, + c_intersperse, + c_maximum, + c_minimum, + c_count, + c_sort, + + -- * Chars + w2c, c2w, isSpaceWord8, isSpaceChar8, + + -- * Deprecated and unmentionable + accursedUnutterablePerformIO, + + -- * Exported compatibility shim + plusForeignPtr, + unsafeWithForeignPtr + ) where + +import Prelude hiding (concat, null) +import qualified Data.List as List + +import Control.Monad (void) + +import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, FunPtr, plusPtr) +import Foreign.Storable (Storable(..)) +import Foreign.C.Types (CInt(..), CSize(..)) +import Foreign.C.String (CString) + +#if !MIN_VERSION_base(4,13,0) +import Data.Semigroup (Semigroup ((<>))) +#endif +import Data.Semigroup (Semigroup (sconcat, stimes)) +import Data.List.NonEmpty (NonEmpty ((:|))) + +import Control.DeepSeq (NFData(rnf)) + +import Data.String (IsString(..)) + +import Control.Exception (assert, throw, Exception) + +import Data.Bits ((.&.)) +import Data.Char (ord) +import Data.Word + +import Data.Typeable (Typeable) +import Data.Data (Data(..), mkNoRepType) + +import GHC.Base (nullAddr#,realWorld#,unsafeChr) +import GHC.Exts (IsList(..)) +import GHC.CString (unpackCString#) +import GHC.Exts (Addr#, minusAddr#) + +#define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0) +#if TIMES_INT_2_AVAILABLE +import GHC.Prim (timesInt2#) +#else +import GHC.Prim ( timesWord2# + , or# + , uncheckedShiftRL# + , int2Word# + , word2Int# + ) +import Data.Bits (finiteBitSize) +#endif + +import GHC.IO (IO(IO),unsafeDupablePerformIO) +import GHC.ForeignPtr (ForeignPtr(ForeignPtr) +#if __GLASGOW_HASKELL__ < 900 + , newForeignPtr_ +#endif + , mallocPlainForeignPtrBytes) + +#if MIN_VERSION_base(4,10,0) +import GHC.ForeignPtr (plusForeignPtr) +#else +import GHC.Prim (plusAddr#) +#endif + +#if __GLASGOW_HASKELL__ >= 811 +import GHC.CString (cstringLength#) +import GHC.ForeignPtr (ForeignPtrContents(FinalPtr)) +#else +import GHC.Ptr (Ptr(..)) +#endif + +import GHC.Types (Int (..)) + +#if MIN_VERSION_base(4,15,0) +import GHC.ForeignPtr (unsafeWithForeignPtr) +#endif + +import qualified Language.Haskell.TH.Lib as TH +import qualified Language.Haskell.TH.Syntax as TH + +#if !MIN_VERSION_base(4,15,0) +unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b +unsafeWithForeignPtr = withForeignPtr +#endif + +-- CFILES stuff is Hugs only +{-# CFILES cbits/fpstring.c #-} + +#if !MIN_VERSION_base(4,10,0) +-- |Advances the given address by the given offset in bytes. +-- +-- The new 'ForeignPtr' shares the finalizer of the original, +-- equivalent from a finalization standpoint to just creating another +-- reference to the original. That is, the finalizer will not be +-- called before the new 'ForeignPtr' is unreachable, nor will it be +-- called an additional time due to this call, and the finalizer will +-- be called with the same address that it would have had this call +-- not happened, *not* the new address. +plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b +plusForeignPtr (ForeignPtr addr guts) (I# offset) = ForeignPtr (plusAddr# addr offset) guts +{-# INLINE [0] plusForeignPtr #-} +{-# RULES +"ByteString plusForeignPtr/0" forall fp . + plusForeignPtr fp 0 = fp + #-} +#endif + +minusForeignPtr :: ForeignPtr a -> ForeignPtr b -> Int +minusForeignPtr (ForeignPtr addr1 _) (ForeignPtr addr2 _) + = I# (minusAddr# addr1 addr2) + +peekFp :: Storable a => ForeignPtr a -> IO a +peekFp fp = unsafeWithForeignPtr fp peek + +pokeFp :: Storable a => ForeignPtr a -> a -> IO () +pokeFp fp val = unsafeWithForeignPtr fp $ \p -> poke p val + +peekFpByteOff :: Storable a => ForeignPtr a -> Int -> IO a +peekFpByteOff fp off = unsafeWithForeignPtr fp $ \p -> + peekByteOff p off + +pokeFpByteOff :: Storable a => ForeignPtr b -> Int -> a -> IO () +pokeFpByteOff fp off val = unsafeWithForeignPtr fp $ \p -> + pokeByteOff p off val + +-- ----------------------------------------------------------------------------- + +-- | A space-efficient representation of a 'Word8' vector, supporting many +-- efficient operations. +-- +-- A 'ByteString' contains 8-bit bytes, or by using the operations from +-- "Data.ByteString.Char8" it can be interpreted as containing 8-bit +-- characters. +-- +data ByteString = BS {-# UNPACK #-} !(ForeignPtr Word8) -- payload + {-# UNPACK #-} !Int -- length + -- ^ @since 0.11.0.0 + deriving (Typeable) + +-- | Type synonym for the strict flavour of 'ByteString'. +-- +-- @since 0.11.2.0 +type StrictByteString = ByteString + +-- | +-- @'PS' foreignPtr offset length@ represents a 'ByteString' with data +-- backed by a given @foreignPtr@, starting at a given @offset@ in bytes +-- and of a specified @length@. +-- +-- This pattern is used to emulate the legacy 'ByteString' data +-- constructor, so that pre-existing code generally doesn't need to +-- change to benefit from the simplified 'BS' constructor and can +-- continue to function unchanged. +-- +-- /Note:/ Matching with this constructor will always be given a 0 offset, +-- as the base will be manipulated by 'plusForeignPtr' instead. +-- +pattern PS :: ForeignPtr Word8 -> Int -> Int -> ByteString +pattern PS fp zero len <- BS fp ((0,) -> (zero, len)) where + PS fp o len = BS (plusForeignPtr fp o) len +#if __GLASGOW_HASKELL__ >= 802 +{-# COMPLETE PS #-} +#endif + +instance Eq ByteString where + (==) = eq + +instance Ord ByteString where + compare = compareBytes + +instance Semigroup ByteString where + (<>) = append + sconcat (b:|bs) = concat (b:bs) + {-# INLINE stimes #-} + stimes = stimesPolymorphic + +instance Monoid ByteString where + mempty = empty + mappend = (<>) + mconcat = concat + +instance NFData ByteString where + rnf BS{} = () + +instance Show ByteString where + showsPrec p ps r = showsPrec p (unpackChars ps) r + +instance Read ByteString where + readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ] + +-- | @since 0.10.12.0 +instance IsList ByteString where + type Item ByteString = Word8 + fromList = packBytes + toList = unpackBytes + +-- | Beware: 'fromString' truncates multi-byte characters to octets. +-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� +instance IsString ByteString where + {-# INLINE fromString #-} + fromString = packChars + +instance Data ByteString where + gfoldl f z txt = z packBytes `f` unpackBytes txt + toConstr _ = error "Data.ByteString.ByteString.toConstr" + gunfold _ _ = error "Data.ByteString.ByteString.gunfold" + dataTypeOf _ = mkNoRepType "Data.ByteString.ByteString" + +-- | @since 0.11.2.0 +instance TH.Lift ByteString where +#if MIN_VERSION_template_haskell(2,16,0) + lift (BS ptr len) = [| unsafePackLenLiteral |] + `TH.appE` TH.litE (TH.integerL (fromIntegral len)) + `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len)) +#else + lift bs@(BS _ len) = [| unsafePackLenLiteral |] + `TH.appE` TH.litE (TH.integerL (fromIntegral len)) + `TH.appE` TH.litE (TH.StringPrimL $ unpackBytes bs) +#endif + +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + +------------------------------------------------------------------------ +-- Internal indexing + +-- | 'findIndexOrLength' is a variant of findIndex, that returns the length +-- of the string if no element is found, rather than Nothing. +findIndexOrLength :: (Word8 -> Bool) -> ByteString -> Int +findIndexOrLength k (BS x l) = + accursedUnutterablePerformIO $ g x + where + g ptr = go 0 + where + go !n | n >= l = return l + | otherwise = do w <- peekFp $ ptr `plusForeignPtr` n + if k w + then return n + else go (n+1) +{-# INLINE findIndexOrLength #-} + +------------------------------------------------------------------------ +-- Packing and unpacking from lists + +packBytes :: [Word8] -> ByteString +packBytes ws = unsafePackLenBytes (List.length ws) ws + +packChars :: [Char] -> ByteString +packChars cs = unsafePackLenChars (List.length cs) cs + +{-# INLINE [0] packChars #-} + +{-# RULES +"ByteString packChars/packAddress" forall s . + packChars (unpackCString# s) = unsafePackLiteral s + #-} + +unsafePackLenBytes :: Int -> [Word8] -> ByteString +unsafePackLenBytes len xs0 = + unsafeCreateFp len $ \p -> go p xs0 + where + go !_ [] = return () + go !p (x:xs) = pokeFp p x >> go (p `plusForeignPtr` 1) xs + +unsafePackLenChars :: Int -> [Char] -> ByteString +unsafePackLenChars len cs0 = + unsafeCreateFp len $ \p -> go p cs0 + where + go !_ [] = return () + go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs + + +-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an +-- Addr\# (an arbitrary machine address assumed to point outside the +-- garbage-collected heap) into a @ByteString@. A much faster way to +-- create an 'Addr#' is with an unboxed string literal, than to pack a +-- boxed string. A unboxed string literal is compiled to a static @char +-- []@ by GHC. Establishing the length of the string requires a call to +-- @strlen(3)@, so the 'Addr#' must point to a null-terminated buffer (as +-- is the case with @\"string\"\#@ literals in GHC). Use 'Data.ByteString.Unsafe.unsafePackAddressLen' +-- if you know the length of the string statically. +-- +-- An example: +-- +-- > literalFS = unsafePackAddress "literal"# +-- +-- This function is /unsafe/. If you modify the buffer pointed to by the +-- original 'Addr#' this modification will be reflected in the resulting +-- @ByteString@, breaking referential transparency. +-- +-- Note this also won't work if your 'Addr#' has embedded @\'\\0\'@ characters in +-- the string, as @strlen@ will return too short a length. +-- +unsafePackAddress :: Addr# -> IO ByteString +unsafePackAddress addr# = do +#if __GLASGOW_HASKELL__ >= 811 + unsafePackLenAddress (I# (cstringLength# addr#)) addr# +#else + l <- c_strlen (Ptr addr#) + unsafePackLenAddress (fromIntegral l) addr# +#endif +{-# INLINE unsafePackAddress #-} + +-- | See 'unsafePackAddress'. This function is similar, +-- but takes an additional length argument rather then computing +-- it with @strlen@. +-- Therefore embedding @\'\\0\'@ characters is possible. +-- +-- @since 0.11.2.0 +unsafePackLenAddress :: Int -> Addr# -> IO ByteString +unsafePackLenAddress len addr# = do +#if __GLASGOW_HASKELL__ >= 811 + return (BS (ForeignPtr addr# FinalPtr) len) +#else + p <- newForeignPtr_ (Ptr addr#) + return $ BS p len +#endif +{-# INLINE unsafePackLenAddress #-} + +-- | See 'unsafePackAddress'. This function has similar behavior. Prefer +-- this function when the address in known to be an @Addr#@ literal. In +-- that context, there is no need for the sequencing guarantees that 'IO' +-- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data +-- constructor for @ForeignPtrContents@. +-- +-- @since 0.11.1.0 +unsafePackLiteral :: Addr# -> ByteString +unsafePackLiteral addr# = +#if __GLASGOW_HASKELL__ >= 811 + unsafePackLenLiteral (I# (cstringLength# addr#)) addr# +#else + let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#)) + in unsafePackLenLiteral (fromIntegral len) addr# +#endif +{-# INLINE unsafePackLiteral #-} + + +-- | See 'unsafePackLiteral'. This function is similar, +-- but takes an additional length argument rather then computing +-- it with @strlen@. +-- Therefore embedding @\'\\0\'@ characters is possible. +-- +-- @since 0.11.2.0 +unsafePackLenLiteral :: Int -> Addr# -> ByteString +unsafePackLenLiteral len addr# = +#if __GLASGOW_HASKELL__ >= 811 + BS (ForeignPtr addr# FinalPtr) len +#else + -- newForeignPtr_ allocates a MutVar# internally. If that MutVar# + -- gets commoned up with the MutVar# of some unrelated ForeignPtr, + -- it may prevent automatic finalization for that other ForeignPtr. + -- So we avoid accursedUnutterablePerformIO here. + BS (unsafeDupablePerformIO (newForeignPtr_ (Ptr addr#))) len +#endif +{-# INLINE unsafePackLenLiteral #-} + +packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8]) +packUptoLenBytes len xs0 = + unsafeCreateFpUptoN' len $ \p0 -> + let p_end = plusForeignPtr p0 len + go !p [] = return (p `minusForeignPtr` p0, []) + go !p xs | p == p_end = return (len, xs) + go !p (x:xs) = pokeFp p x >> go (p `plusForeignPtr` 1) xs + in go p0 xs0 + +packUptoLenChars :: Int -> [Char] -> (ByteString, [Char]) +packUptoLenChars len cs0 = + unsafeCreateFpUptoN' len $ \p0 -> + let p_end = plusForeignPtr p0 len + go !p [] = return (p `minusForeignPtr` p0, []) + go !p cs | p == p_end = return (len, cs) + go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs + in go p0 cs0 + +-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand +-- we would like to write a tight loop that just blasts the list into memory, on +-- the other hand we want it to be unpacked lazily so we don't end up with a +-- massive list data structure in memory. +-- +-- Our strategy is to combine both: we will unpack lazily in reasonable sized +-- chunks, where each chunk is unpacked strictly. +-- +-- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and +-- unpackAppendChars do the chunks strictly. + +unpackBytes :: ByteString -> [Word8] +unpackBytes bs = unpackAppendBytesLazy bs [] + +unpackChars :: ByteString -> [Char] +unpackChars bs = unpackAppendCharsLazy bs [] + +unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8] +unpackAppendBytesLazy (BS fp len) xs + | len <= 100 = unpackAppendBytesStrict (BS fp len) xs + | otherwise = unpackAppendBytesStrict (BS fp 100) remainder + where + remainder = unpackAppendBytesLazy (BS (plusForeignPtr fp 100) (len-100)) xs + + -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate + -- takes just shy of 4k which seems like a reasonable amount. + -- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes) + +unpackAppendCharsLazy :: ByteString -> [Char] -> [Char] +unpackAppendCharsLazy (BS fp len) cs + | len <= 100 = unpackAppendCharsStrict (BS fp len) cs + | otherwise = unpackAppendCharsStrict (BS fp 100) remainder + where + remainder = unpackAppendCharsLazy (BS (plusForeignPtr fp 100) (len-100)) cs + +-- For these unpack functions, since we're unpacking the whole list strictly we +-- build up the result list in an accumulator. This means we have to build up +-- the list starting at the end. So our traversal starts at the end of the +-- buffer and loops down until we hit the sentinal: + +unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8] +unpackAppendBytesStrict (BS fp len) xs = + accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base -> + loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs + where + loop !sentinal !p acc + | p == sentinal = return acc + | otherwise = do x <- peek p + loop sentinal (p `plusPtr` (-1)) (x:acc) + +unpackAppendCharsStrict :: ByteString -> [Char] -> [Char] +unpackAppendCharsStrict (BS fp len) xs = + accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base -> + loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs + where + loop !sentinal !p acc + | p == sentinal = return acc + | otherwise = do x <- peek p + loop sentinal (p `plusPtr` (-1)) (w2c x:acc) + +------------------------------------------------------------------------ + +-- | The 0 pointer. Used to indicate the empty Bytestring. +nullForeignPtr :: ForeignPtr Word8 +#if __GLASGOW_HASKELL__ >= 811 +nullForeignPtr = ForeignPtr nullAddr# FinalPtr +#else +nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr") +#endif + +-- --------------------------------------------------------------------- +-- Low level constructors + +-- | /O(1)/ Build a ByteString from a ForeignPtr. +-- +-- If you do not need the offset parameter then you should be using +-- 'Data.ByteString.Unsafe.unsafePackCStringLen' or +-- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead. +-- +fromForeignPtr :: ForeignPtr Word8 + -> Int -- ^ Offset + -> Int -- ^ Length + -> ByteString +fromForeignPtr fp o = BS (plusForeignPtr fp o) +{-# INLINE fromForeignPtr #-} + +-- | @since 0.11.0.0 +fromForeignPtr0 :: ForeignPtr Word8 + -> Int -- ^ Length + -> ByteString +fromForeignPtr0 = BS +{-# INLINE fromForeignPtr0 #-} + +-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString +toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length) +toForeignPtr (BS ps l) = (ps, 0, l) +{-# INLINE toForeignPtr #-} + +-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString +-- +-- @since 0.11.0.0 +toForeignPtr0 :: ByteString -> (ForeignPtr Word8, Int) -- ^ (ptr, length) +toForeignPtr0 (BS ps l) = (ps, l) +{-# INLINE toForeignPtr0 #-} + +-- | A way of creating ByteStrings outside the IO monad. The @Int@ +-- argument gives the final size of the ByteString. +unsafeCreateFp :: Int -> (ForeignPtr Word8 -> IO ()) -> ByteString +unsafeCreateFp l f = unsafeDupablePerformIO (createFp l f) +{-# INLINE unsafeCreateFp #-} + +-- | Like 'unsafeCreateFp' but instead of giving the final size of the +-- ByteString, it is just an upper bound. The inner action returns +-- the actual size. Unlike 'createFpAndTrim' the ByteString is not +-- reallocated if the final size is less than the estimated size. +unsafeCreateFpUptoN :: Int -> (ForeignPtr Word8 -> IO Int) -> ByteString +unsafeCreateFpUptoN l f = unsafeDupablePerformIO (createFpUptoN l f) +{-# INLINE unsafeCreateFpUptoN #-} + +unsafeCreateFpUptoN' + :: Int -> (ForeignPtr Word8 -> IO (Int, a)) -> (ByteString, a) +unsafeCreateFpUptoN' l f = unsafeDupablePerformIO (createFpUptoN' l f) +{-# INLINE unsafeCreateFpUptoN' #-} + +-- | Create ByteString of size @l@ and use action @f@ to fill its contents. +createFp :: Int -> (ForeignPtr Word8 -> IO ()) -> IO ByteString +createFp l action = do + fp <- mallocByteString l + action fp + return $! BS fp l +{-# INLINE createFp #-} + +-- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString' +-- starting at the given 'Ptr' and returns the actual utilized length, +-- @`createFpUptoN'` l f@ returns the filled 'ByteString'. +createFpUptoN :: Int -> (ForeignPtr Word8 -> IO Int) -> IO ByteString +createFpUptoN l action = do + fp <- mallocByteString l + l' <- action fp + assert (l' <= l) $ return $! BS fp l' +{-# INLINE createFpUptoN #-} + +-- | Like 'createFpUptoN', but also returns an additional value created by the +-- action. +createFpUptoN' :: Int -> (ForeignPtr Word8 -> IO (Int, a)) -> IO (ByteString, a) +createFpUptoN' l action = do + fp <- mallocByteString l + (l', res) <- action fp + assert (l' <= l) $ return (BS fp l', res) +{-# INLINE createFpUptoN' #-} + +-- | Given the maximum size needed and a function to make the contents +-- of a ByteString, createFpAndTrim makes the 'ByteString'. The generating +-- function is required to return the actual final size (<= the maximum +-- size), and the resulting byte array is reallocated to this size. +-- +-- createFpAndTrim is the main mechanism for creating custom, efficient +-- ByteString functions, using Haskell or C functions to fill the space. +-- +createFpAndTrim :: Int -> (ForeignPtr Word8 -> IO Int) -> IO ByteString +createFpAndTrim l action = do + fp <- mallocByteString l + l' <- action fp + if assert (0 <= l' && l' <= l) $ l' >= l + then return $! BS fp l + else createFp l' $ \fp' -> memcpyFp fp' fp l' +{-# INLINE createFpAndTrim #-} + +createFpAndTrim' :: Int -> (ForeignPtr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) +createFpAndTrim' l action = do + fp <- mallocByteString l + (off, l', res) <- action fp + if assert (0 <= l' && l' <= l) $ l' >= l + then return (BS fp l, res) + else do ps <- createFp l' $ \fp' -> + memcpyFp fp' (fp `plusForeignPtr` off) l' + return (ps, res) +{-# INLINE createFpAndTrim' #-} + + +wrapAction :: (Ptr Word8 -> IO res) -> ForeignPtr Word8 -> IO res +wrapAction = flip withForeignPtr + -- Cannot use unsafeWithForeignPtr, because action can diverge + +-- | A way of creating ByteStrings outside the IO monad. The @Int@ +-- argument gives the final size of the ByteString. +unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString +unsafeCreate l f = unsafeCreateFp l (wrapAction f) +{-# INLINE unsafeCreate #-} + +-- | Like 'unsafeCreate' but instead of giving the final size of the +-- ByteString, it is just an upper bound. The inner action returns +-- the actual size. Unlike 'createAndTrim' the ByteString is not +-- reallocated if the final size is less than the estimated size. +unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString +unsafeCreateUptoN l f = unsafeCreateFpUptoN l (wrapAction f) +{-# INLINE unsafeCreateUptoN #-} + +-- | @since 0.10.12.0 +unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a) +unsafeCreateUptoN' l f = unsafeCreateFpUptoN' l (wrapAction f) +{-# INLINE unsafeCreateUptoN' #-} + +-- | Create ByteString of size @l@ and use action @f@ to fill its contents. +create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString +create l action = createFp l (wrapAction action) +{-# INLINE create #-} + +-- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString' +-- starting at the given 'Ptr' and returns the actual utilized length, +-- @`createUptoN'` l f@ returns the filled 'ByteString'. +createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString +createUptoN l action = createFpUptoN l (wrapAction action) +{-# INLINE createUptoN #-} + +-- | Like 'createUptoN', but also returns an additional value created by the +-- action. +-- +-- @since 0.10.12.0 +createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a) +createUptoN' l action = createFpUptoN' l (wrapAction action) +{-# INLINE createUptoN' #-} + +-- | Given the maximum size needed and a function to make the contents +-- of a ByteString, createAndTrim makes the 'ByteString'. The generating +-- function is required to return the actual final size (<= the maximum +-- size), and the resulting byte array is reallocated to this size. +-- +-- createAndTrim is the main mechanism for creating custom, efficient +-- ByteString functions, using Haskell or C functions to fill the space. +-- +createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString +createAndTrim l action = createFpAndTrim l (wrapAction action) +{-# INLINE createAndTrim #-} + +createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a) +createAndTrim' l action = createFpAndTrim' l (wrapAction action) +{-# INLINE createAndTrim' #-} + + +-- | Wrapper of 'Foreign.ForeignPtr.mallocForeignPtrBytes' with faster implementation for GHC +-- +mallocByteString :: Int -> IO (ForeignPtr a) +mallocByteString = mallocPlainForeignPtrBytes +{-# INLINE mallocByteString #-} + +------------------------------------------------------------------------ +-- Implementations for Eq, Ord and Monoid instances + +eq :: ByteString -> ByteString -> Bool +eq a@(BS fp len) b@(BS fp' len') + | len /= len' = False -- short cut on length + | fp == fp' = True -- short cut for the same string + | otherwise = compareBytes a b == EQ +{-# INLINE eq #-} +-- ^ still needed + +compareBytes :: ByteString -> ByteString -> Ordering +compareBytes (BS _ 0) (BS _ 0) = EQ -- short cut for empty strings +compareBytes (BS fp1 len1) (BS fp2 len2) = + accursedUnutterablePerformIO $ + unsafeWithForeignPtr fp1 $ \p1 -> + unsafeWithForeignPtr fp2 $ \p2 -> do + i <- memcmp p1 p2 (min len1 len2) + return $! case i `compare` 0 of + EQ -> len1 `compare` len2 + x -> x + + +-- | /O(1)/ The empty 'ByteString' +empty :: ByteString +-- This enables bypassing #457 by not using (polymorphic) mempty in +-- any definitions used by the (Monoid ByteString) instance +empty = BS nullForeignPtr 0 + +append :: ByteString -> ByteString -> ByteString +append (BS _ 0) b = b +append a (BS _ 0) = a +append (BS fp1 len1) (BS fp2 len2) = + unsafeCreateFp (checkedAdd "append" len1 len2) $ \destptr1 -> do + let destptr2 = destptr1 `plusForeignPtr` len1 + memcpyFp destptr1 fp1 len1 + memcpyFp destptr2 fp2 len2 + +concat :: [ByteString] -> ByteString +concat = \bss0 -> goLen0 bss0 bss0 + -- The idea here is we first do a pass over the input list to determine: + -- + -- 1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@, + -- and @concat ["hello", mempty, mempty]@ can all be handled without + -- copying. + -- 2. if a copy is necessary, how large is the result going to be? + -- + -- If a copy is necessary then we create a buffer of the appropriate size + -- and do another pass over the input list, copying the chunks into the + -- buffer. Also, since foreign calls aren't entirely free we skip over + -- empty chunks while copying. + -- + -- We pass the original [ByteString] (bss0) through as an argument through + -- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing + -- it as an explicit argument avoids capturing it in these functions' + -- closures which would result in unnecessary closure allocation. + where + -- It's still possible that the result is empty + goLen0 _ [] = empty + goLen0 bss0 (BS _ 0 :bss) = goLen0 bss0 bss + goLen0 bss0 (bs :bss) = goLen1 bss0 bs bss + + -- It's still possible that the result is a single chunk + goLen1 _ bs [] = bs + goLen1 bss0 bs (BS _ 0 :bss) = goLen1 bss0 bs bss + goLen1 bss0 bs (BS _ len:bss) = goLen bss0 (checkedAdd "concat" len' len) bss + where BS _ len' = bs + + -- General case, just find the total length we'll need + goLen bss0 !total (BS _ len:bss) = goLen bss0 total' bss + where total' = checkedAdd "concat" total len + goLen bss0 total [] = + unsafeCreateFp total $ \ptr -> goCopy bss0 ptr + + -- Copy the data + goCopy [] !_ = return () + goCopy (BS _ 0 :bss) !ptr = goCopy bss ptr + goCopy (BS fp len:bss) !ptr = do + memcpyFp ptr fp len + goCopy bss (ptr `plusForeignPtr` len) +{-# NOINLINE concat #-} + +{-# RULES +"ByteString concat [] -> empty" + concat [] = empty +"ByteString concat [bs] -> bs" forall x. + concat [x] = x + #-} + +-- | Repeats the given ByteString n times. +-- Polymorphic wrapper to make sure any generated +-- specializations are reasonably small. +stimesPolymorphic :: Integral a => a -> ByteString -> ByteString +{-# INLINABLE stimesPolymorphic #-} +stimesPolymorphic nRaw = \ !bs -> case checkedIntegerToInt n of + Just nInt + | nInt >= 0 -> stimesNonNegativeInt nInt bs + | otherwise -> stimesNegativeErr + Nothing + | n < 0 -> stimesNegativeErr + | BS _ 0 <- bs -> empty + | otherwise -> stimesOverflowErr + where n = toInteger nRaw + -- By exclusively using n instead of nRaw, the semantics are kept simple + -- and the likelihood of potentially dangerous mistakes minimized. + + +stimesNegativeErr :: ByteString +stimesNegativeErr + = error "stimes @ByteString: non-negative multiplier expected" + +stimesOverflowErr :: ByteString +-- Although this only appears once, it is extracted here to prevent it +-- from being duplicated in specializations of 'stimesPolymorphic' +stimesOverflowErr = overflowError "stimes" + +-- | Repeats the given ByteString n times. +stimesNonNegativeInt :: Int -> ByteString -> ByteString +stimesNonNegativeInt n (BS fp len) + | n == 0 = empty + | n == 1 = BS fp len + | len == 0 = empty + | len == 1 = unsafeCreateFp n $ \destfptr -> do + byte <- peekFp fp + void $ unsafeWithForeignPtr destfptr $ \destptr -> + memset destptr byte (fromIntegral n) + | otherwise = unsafeCreateFp size $ \destptr -> do + memcpyFp destptr fp len + fillFrom destptr len + where + size = checkedMultiply "stimes" n len + halfSize = (size - 1) `div` 2 -- subtraction and division won't overflow + + fillFrom :: ForeignPtr Word8 -> Int -> IO () + fillFrom destptr copied + | copied <= halfSize = do + memcpyFp (destptr `plusForeignPtr` copied) destptr copied + fillFrom destptr (copied * 2) + | otherwise = memcpyFp (destptr `plusForeignPtr` copied) destptr (size - copied) + + +------------------------------------------------------------------------ + +-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op. +w2c :: Word8 -> Char +w2c = unsafeChr . fromIntegral +{-# INLINE w2c #-} + +-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and +-- silently truncates to 8 bits Chars > '\255'. It is provided as +-- convenience for ByteString construction. +c2w :: Char -> Word8 +c2w = fromIntegral . ord +{-# INLINE c2w #-} + +-- | Selects words corresponding to white-space characters in the Latin-1 range +isSpaceWord8 :: Word8 -> Bool +isSpaceWord8 w8 = + -- Avoid the cost of narrowing arithmetic results to Word8, + -- the conversion from Word8 to Word is free. + let w :: Word + !w = fromIntegral w8 + in w .&. 0x50 == 0 -- Quick non-whitespace filter + && w - 0x21 > 0x7e -- Second non-whitespace filter + && ( w == 0x20 -- SP + || w == 0xa0 -- NBSP + || w - 0x09 < 5) -- HT, NL, VT, FF, CR +{-# INLINE isSpaceWord8 #-} + +-- | Selects white-space characters in the Latin-1 range +isSpaceChar8 :: Char -> Bool +isSpaceChar8 = isSpaceWord8 . c2w +{-# INLINE isSpaceChar8 #-} + +------------------------------------------------------------------------ + +-- | The type of exception raised by 'overflowError' +-- and on failure by overflow-checked arithmetic operations. +newtype SizeOverflowException + = SizeOverflowException String + +instance Show SizeOverflowException where + show (SizeOverflowException err) = err + +instance Exception SizeOverflowException + +-- | Raises a 'SizeOverflowException', +-- with a message using the given function name. +overflowError :: String -> a +overflowError fun = throw $ SizeOverflowException msg + where msg = "Data.ByteString." ++ fun ++ ": size overflow" + +-- | Add two non-negative numbers. +-- Calls 'overflowError' on overflow. +checkedAdd :: String -> Int -> Int -> Int +{-# INLINE checkedAdd #-} +checkedAdd fun x y + | r >= 0 = r + | otherwise = overflowError fun + where r = assert (min x y >= 0) $ x + y + +-- | Multiplies two non-negative numbers. +-- Calls 'overflowError' on overflow. +checkedMultiply :: String -> Int -> Int -> Int +{-# INLINE checkedMultiply #-} +checkedMultiply fun !x@(I# x#) !y@(I# y#) = assert (min x y >= 0) $ +#if TIMES_INT_2_AVAILABLE + case timesInt2# x# y# of + (# 0#, _, result #) -> I# result + _ -> overflowError fun +#else + case timesWord2# (int2Word# x#) (int2Word# y#) of + (# hi, lo #) -> case or# hi (uncheckedShiftRL# lo shiftAmt) of + 0## -> I# (word2Int# lo) + _ -> overflowError fun + where !(I# shiftAmt) = finiteBitSize (0 :: Word) - 1 +#endif + + +-- | Attempts to convert an 'Integer' value to an 'Int', returning +-- 'Nothing' if doing so would result in an overflow. +checkedIntegerToInt :: Integer -> Maybe Int +{-# INLINE checkedIntegerToInt #-} +-- We could use Data.Bits.toIntegralSized, but this hand-rolled +-- version is currently a bit faster as of GHC 9.2. +-- It's even faster to just match on the Integer constructors, but +-- we'd still need a fallback implementation for integer-simple. +checkedIntegerToInt x + | x == toInteger res = Just res + | otherwise = Nothing + where res = fromInteger x :: Int + + +------------------------------------------------------------------------ + +-- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but +-- it is in fact a malevolent agent of chaos. It unpicks the seams of reality +-- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you +-- into thinking it is reasonable, but when you are not looking it stabs you +-- in the back and aliases all of your mutable buffers. The carcass of many a +-- seasoned Haskell programmer lie strewn at its feet. +-- +-- Witness the trail of destruction: +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- Do not talk about \"safe\"! You do not know what is safe! +-- +-- Yield not to its blasphemous call! Flee traveller! Flee or you will be +-- corrupted and devoured! +-- +{-# INLINE accursedUnutterablePerformIO #-} +accursedUnutterablePerformIO :: IO a -> a +accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r + +-- --------------------------------------------------------------------- +-- +-- Standard C functions +-- + +foreign import ccall unsafe "string.h strlen" c_strlen + :: CString -> IO CSize + +foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer + :: FunPtr (Ptr Word8 -> IO ()) + +foreign import ccall unsafe "string.h memchr" c_memchr + :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) + +memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) +memchr p w = c_memchr p (fromIntegral w) + +foreign import ccall unsafe "string.h memcmp" c_memcmp + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt + +memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt +memcmp p q s = c_memcmp p q (fromIntegral s) + +foreign import ccall unsafe "string.h memcpy" c_memcpy + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) + +memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO () +memcpy p q s = void $ c_memcpy p q (fromIntegral s) + +memcpyFp :: ForeignPtr Word8 -> ForeignPtr Word8 -> Int -> IO () +memcpyFp fp fq s = unsafeWithForeignPtr fp $ \p -> + unsafeWithForeignPtr fq $ \q -> memcpy p q s + +{- +foreign import ccall unsafe "string.h memmove" c_memmove + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8) + +memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () +memmove p q s = do c_memmove p q s + return () +-} + +foreign import ccall unsafe "string.h memset" c_memset + :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8) + +memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8) +memset p w = c_memset p (fromIntegral w) + +-- --------------------------------------------------------------------- +-- +-- Uses our C code +-- + +foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse + :: Ptr Word8 -> Ptr Word8 -> CSize -> IO () + +foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse + :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO () + +foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum + :: Ptr Word8 -> CSize -> IO Word8 + +foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum + :: Ptr Word8 -> CSize -> IO Word8 + +foreign import ccall unsafe "static fpstring.h fps_count" c_count + :: Ptr Word8 -> CSize -> Word8 -> IO CSize + +foreign import ccall unsafe "static fpstring.h fps_sort" c_sort + :: Ptr Word8 -> CSize -> IO () diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index f046d61e5..d9ae76acc 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -231,7 +231,7 @@ import qualified Data.List as List import qualified Data.Bifunctor as BF import qualified Data.ByteString as P (ByteString) -- type name only import qualified Data.ByteString as S -- S for strict (hmm...) -import qualified Data.ByteString.Internal as S +import qualified Data.ByteString.Internal.Type as S import qualified Data.ByteString.Unsafe as S import qualified Data.ByteString.Lazy.Internal.Deque as D import Data.ByteString.Lazy.Internal @@ -434,9 +434,11 @@ intersperse w (Chunk c cs) = Chunk (S.intersperse w c) (foldrChunks (Chunk . intersperse') Empty cs) where intersperse' :: P.ByteString -> P.ByteString intersperse' (S.BS fp l) = - S.unsafeCreate (2*l) $ \p' -> S.unsafeWithForeignPtr fp $ \p -> do - poke p' w - S.c_intersperse (p' `plusPtr` 1) p (fromIntegral l) w + S.unsafeCreateFp (2*l) $ \fp' -> + S.unsafeWithForeignPtr fp' $ \p' -> + S.unsafeWithForeignPtr fp $ \p -> do + poke p' w + S.c_intersperse (p' `plusPtr` 1) p (fromIntegral l) w -- | The 'transpose' function transposes the rows and columns of its -- 'ByteString' argument. diff --git a/Data/ByteString/Lazy/Internal.hs b/Data/ByteString/Lazy/Internal.hs index 63dc3f670..5962e5ae8 100644 --- a/Data/ByteString/Lazy/Internal.hs +++ b/Data/ByteString/Lazy/Internal.hs @@ -49,10 +49,9 @@ module Data.ByteString.Lazy.Internal ( import Prelude hiding (concat) -import qualified Data.ByteString.Internal as S +import qualified Data.ByteString.Internal.Type as S import Data.Word (Word8) -import Foreign.Ptr (plusPtr) import Foreign.Storable (Storable(sizeOf)) #if MIN_VERSION_base(4,13,0) @@ -313,14 +312,13 @@ toStrict = \cs -> goLen0 cs cs goLen cs0 !total (Chunk (S.BS _ cl) cs) = goLen cs0 (S.checkedAdd "Lazy.toStrict" total cl) cs goLen cs0 total Empty = - S.unsafeCreate total $ \ptr -> goCopy cs0 ptr + S.unsafeCreateFp total $ \ptr -> goCopy cs0 ptr -- Copy the data goCopy Empty !_ = return () goCopy (Chunk (S.BS _ 0 ) cs) !ptr = goCopy cs ptr - goCopy (Chunk (S.BS fp len) cs) !ptr = - S.unsafeWithForeignPtr fp $ \p -> do - S.memcpy ptr p len - goCopy cs (ptr `plusPtr` len) + goCopy (Chunk (S.BS fp len) cs) !ptr = do + S.memcpyFp ptr fp len + goCopy cs (ptr `S.plusForeignPtr` len) -- See the comment on Data.ByteString.Internal.concat for some background on -- this implementation. diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 4813af803..7b50ebe33 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -407,6 +407,7 @@ indexError sbs i = -- | @since 0.11.2.0 unsafePackLenLiteral :: Int -> Addr# -> ShortByteString unsafePackLenLiteral len addr# = + -- createFromPtr allocates, so accursedUnutterablePerformIO is wrong unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len ------------------------------------------------------------------------ diff --git a/bytestring.cabal b/bytestring.cabal index 9e3dea0a3..fd5656fc4 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -101,6 +101,7 @@ library Data.ByteString.Builder.RealFloat.D2S Data.ByteString.Builder.RealFloat.Internal Data.ByteString.Builder.RealFloat.TableGenerator + Data.ByteString.Internal.Type Data.ByteString.Lazy.Internal.Deque Data.ByteString.Lazy.ReadInt Data.ByteString.Lazy.ReadNat