Skip to content

Commit

Permalink
Compute length at compile time for literal strings
Browse files Browse the repository at this point in the history
Add unsafePackLiteral to Data.ByteString.Internal. With GHC-8.10+,
use known-key variant of C `strlen` from `GHC.CString` that supports
constant folding. Also in GHC 8.10, another data constructor of
ForeignPtrContents becomes available: LiteralPtr. For string literals,
this is now used. It saves space when there are lots of literals, and
it improves opportunities for case-of-known data constructor optimizations
when a function scrutinizes the length of a ByteString.
  • Loading branch information
andrewthad committed May 23, 2020
1 parent bac4225 commit 3940285
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 3 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
0.10.12.0

* Add `unsafePackLiteral` to `Data.ByteString.Internal`. Where possible,
use known-key variant of C `strlen` from `GHC.CString` that supports
constant folding.

0.10.10.1 <[email protected]> May 2020

* Fix off-by-one infinite loop in primMapByteStringBounded.
Expand Down
1 change: 1 addition & 0 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,7 @@ scanr1 f ps
--
-- This implemenation uses @memset(3)@
replicate :: Int -> Word8 -> ByteString
{-# inline replicate #-}
replicate w c
| w <= 0 = empty
| otherwise = unsafeCreate w $ \ptr ->
Expand Down
39 changes: 36 additions & 3 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module Data.ByteString.Internal (
packChars, packUptoLenChars, unsafePackLenChars,
unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
unsafePackAddress,
unsafePackAddress, unsafePackLiteral,

-- * Low level imperative construction
create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
Expand Down Expand Up @@ -135,6 +135,12 @@ import GHC.ForeignPtr (ForeignPtr(ForeignPtr)
,newForeignPtr_, mallocPlainForeignPtrBytes)
import GHC.Ptr (Ptr(..), castPtr)

#if __GLASGOW_HASKELL__ >= 809
import GHC.CString (cstringLength#)
import GHC.Exts (Int(I#))
import GHC.ForeignPtr (ForeignPtrContents(LiteralPtr))
#endif

-- CFILES stuff is Hugs only
{-# CFILES cbits/fpstring.c #-}

Expand Down Expand Up @@ -183,6 +189,7 @@ instance Read ByteString where
readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]

instance IsString ByteString where
{-# INLINE fromString #-}
fromString = packChars

instance Data ByteString where
Expand All @@ -204,7 +211,7 @@ packChars cs = unsafePackLenChars (List.length cs) cs

{-# RULES
"ByteString packChars/packAddress" forall s .
packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s)
packChars (unpackCString# s) = unsafePackLiteral s
#-}

unsafePackLenBytes :: Int -> [Word8] -> ByteString
Expand Down Expand Up @@ -245,14 +252,40 @@ unsafePackLenChars len cs0 =
--
unsafePackAddress :: Addr# -> IO ByteString
unsafePackAddress addr# = do
#if __GLASGOW_HASKELL__ >= 809
return $ PS
(accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#)))
0
(I# (cstringLength# addr#))
#else
p <- newForeignPtr_ (castPtr cstr)
l <- c_strlen cstr
return $ PS p 0 (fromIntegral l)
let len = fromIntegral l
return $ PS p 0 len
where
cstr :: CString
cstr = Ptr addr#
#endif
{-# INLINE unsafePackAddress #-}

-- | See 'unsafePackAddress'. This function has similar behavior. Prefer
-- this function when the address in known to be an @Addr#@ literal. In
-- that context, there is no need for the sequencing guarantees that 'IO'
-- provides. On GHC 8.10 and up, this function uses the @LiteralPtr@ data
-- constructor for @ForeignPtrContents@. Do not attempt to add a finalizer
-- to the resulting @ByteString@. Although the bytestrings produced by
-- 'unsafePackAddress' allow finalizers to be added, the bytestrings provided
-- by this function do not.
unsafePackLiteral :: Addr# -> ByteString
unsafePackLiteral addr# =
#if __GLASGOW_HASKELL__ >= 809
PS (ForeignPtr addr# LiteralPtr) 0 (I# (cstringLength# addr#))
#else
let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#))
in PS (accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#))) 0 (fromIntegral len)
#endif
{-# INLINE unsafePackLiteral #-}


packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
packUptoLenBytes len xs0 =
Expand Down

0 comments on commit 3940285

Please sign in to comment.