From e4f91906bedd05429d8a582825237da8a3962bb9 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Wed, 13 Nov 2019 15:32:58 -0500 Subject: [PATCH] Compute length at compile time for literal strings Add unsafePackLiteral to Data.ByteString.Internal. With GHC-8.10+, use known-key variant of C `strlen` from `GHC.CString` that supports constant folding. Also in GHC 8.10, another data constructor of ForeignPtrContents becomes available: LiteralPtr. For string literals, this is now used. It saves space when there are lots of literals, and it improves opportunities for case-of-known data constructor optimizations when a function scrutinizes the length of a ByteString. --- Changelog.md | 3 +++ Data/ByteString.hs | 1 + Data/ByteString/Internal.hs | 39 ++++++++++++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Changelog.md b/Changelog.md index c4cc2f52a..7e1df641f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -8,6 +8,9 @@ * Fix benchmark builds * Add GHC 8.10 to the CI matrix * Improve the performance of `sconcat` for lazy and strict bytestrings + * Add `unsafePackLiteral` to `Data.ByteString.Internal`. Where possible, + use known-key variant of C `strlen` from `GHC.CString` that supports + constant folding. 0.10.10.0 July 2019 July 2019 diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 6564e961f..8d80d262d 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -752,6 +752,7 @@ scanr1 f ps -- -- This implemenation uses @memset(3)@ replicate :: Int -> Word8 -> ByteString +{-# inline replicate #-} replicate w c | w <= 0 = empty | otherwise = unsafeCreate w $ \ptr -> diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index 30e913f6d..4e1e19b21 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -33,7 +33,7 @@ module Data.ByteString.Internal ( packChars, packUptoLenChars, unsafePackLenChars, unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, - unsafePackAddress, + unsafePackAddress, unsafePackLiteral, -- * Low level imperative construction create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString @@ -135,6 +135,12 @@ import GHC.ForeignPtr (ForeignPtr(ForeignPtr) ,newForeignPtr_, mallocPlainForeignPtrBytes) import GHC.Ptr (Ptr(..), castPtr) +#if __GLASGOW_HASKELL__ >= 811 +import GHC.CString (cstringLength#) +import GHC.Exts (Int(I#)) +import GHC.ForeignPtr (ForeignPtrContents(FinalPtr)) +#endif + -- CFILES stuff is Hugs only {-# CFILES cbits/fpstring.c #-} @@ -183,6 +189,7 @@ instance Read ByteString where readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ] instance IsString ByteString where + {-# INLINE fromString #-} fromString = packChars instance Data ByteString where @@ -204,7 +211,7 @@ packChars cs = unsafePackLenChars (List.length cs) cs {-# RULES "ByteString packChars/packAddress" forall s . - packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s) + packChars (unpackCString# s) = unsafePackLiteral s #-} unsafePackLenBytes :: Int -> [Word8] -> ByteString @@ -245,14 +252,40 @@ unsafePackLenChars len cs0 = -- unsafePackAddress :: Addr# -> IO ByteString unsafePackAddress addr# = do +#if __GLASGOW_HASKELL__ >= 811 + return $ PS + (accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#))) + 0 + (I# (cstringLength# addr#)) +#else p <- newForeignPtr_ (castPtr cstr) l <- c_strlen cstr - return $ PS p 0 (fromIntegral l) + let len = fromIntegral l + return $ PS p 0 len where cstr :: CString cstr = Ptr addr# +#endif {-# INLINE unsafePackAddress #-} +-- | See 'unsafePackAddress'. This function has similar behavior. Prefer +-- this function when the address in known to be an @Addr#@ literal. In +-- that context, there is no need for the sequencing guarantees that 'IO' +-- provides. On GHC 8.10 and up, this function uses the @FinalPtr@ data +-- constructor for @ForeignPtrContents@. Do not attempt to add a finalizer +-- to the resulting @ByteString@. Although the bytestrings produced by +-- 'unsafePackAddress' allow finalizers to be added, the bytestrings provided +-- by this function do not. +unsafePackLiteral :: Addr# -> ByteString +unsafePackLiteral addr# = +#if __GLASGOW_HASKELL__ >= 811 + PS (ForeignPtr addr# FinalPtr) 0 (I# (cstringLength# addr#)) +#else + let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#)) + in PS (accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#))) 0 (fromIntegral len) +#endif +{-# INLINE unsafePackLiteral #-} + packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8]) packUptoLenBytes len xs0 =