From 0a4f471d4c545c2ebc4040bec54e003c22852b75 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Sat, 7 Aug 2021 23:49:38 +0100 Subject: [PATCH] Switch Data.ByteString.Short to reuse Data.Array.Byte --- Data/ByteString/Short.hs | 2 + Data/ByteString/Short/Internal.hs | 96 ++++++------------------------- bytestring.cabal | 3 + 3 files changed, 22 insertions(+), 79 deletions(-) diff --git a/Data/ByteString/Short.hs b/Data/ByteString/Short.hs index e1339e970..3668b5401 100644 --- a/Data/ByteString/Short.hs +++ b/Data/ByteString/Short.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE Trustworthy #-} -- | @@ -28,6 +29,7 @@ module Data.ByteString.Short ( -- * The @ShortByteString@ type ShortByteString(..), + pattern SBS, -- ** Memory overhead -- | With GHC, the memory overheads are as follows, expressed in words and diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 8639a3e36..819d1e213 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -1,10 +1,13 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveLift #-} {-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskellQuotes #-} @@ -12,7 +15,9 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE Unsafe #-} + {-# OPTIONS_HADDOCK not-home #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} @@ -41,6 +46,7 @@ module Data.ByteString.Short.Internal ( -- * The @ShortByteString@ type and representation ShortByteString(..), + pattern SBS, -- * Introducing and eliminating 'ShortByteString's empty, @@ -162,6 +168,8 @@ import Data.ByteString.Internal , checkedAdd ) +import Data.Array.Byte + ( ByteArray(..) ) import Data.Bits ( FiniteBits (finiteBitSize) , shiftL @@ -172,21 +180,17 @@ import Data.Bits , (.|.) ) import Data.Data - ( Data(..) - , mkNoRepType - ) + ( Data(..) ) import Data.Monoid ( Monoid(..) ) import Data.Semigroup - ( Semigroup((<>)) ) + ( Semigroup ) import Data.String ( IsString(..) ) -import Data.Typeable - ( Typeable ) import Control.Applicative ( pure ) import Control.DeepSeq - ( NFData(..) ) + ( NFData ) import Control.Exception ( assert ) import Control.Monad @@ -269,7 +273,6 @@ import qualified Data.ByteString.Internal as BS import qualified Data.List as List import qualified GHC.Exts -import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH -- | A compact representation of a 'Word8' vector. @@ -279,77 +282,28 @@ import qualified Language.Haskell.TH.Syntax as TH -- 'ByteString' (at the cost of copying the string data). It supports very few -- other operations. -- -data ShortByteString = SBS ByteArray# - deriving Typeable - --- | @since 0.11.2.0 -instance TH.Lift ShortByteString where -#if MIN_VERSION_template_haskell(2,16,0) - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len)) - where - BS ptr len = fromShort sbs -#else - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs) - where - bs@(BS _ len) = fromShort sbs -#endif - -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - --- The ByteArray# representation is always word sized and aligned but with a --- known byte length. Our representation choice for ShortByteString is to leave --- the 0--3 trailing bytes undefined. This means we can use word-sized writes, --- but we have to be careful with reads, see equateBytes and compareBytes below. - +newtype ShortByteString = ShortByteString { unShortByteString :: ByteArray } + deriving (Eq, Semigroup, Monoid, TH.Lift, GHC.Exts.IsList, Data, NFData) -instance Eq ShortByteString where - (==) = equateBytes +pattern SBS :: ByteArray# -> ShortByteString +pattern SBS x = ShortByteString (ByteArray x) +{-# COMPLETE SBS #-} +-- | Lexicographic order. instance Ord ShortByteString where compare = compareBytes -instance Semigroup ShortByteString where - (<>) = append - -instance Monoid ShortByteString where - mempty = empty - mappend = (<>) - mconcat = concat - -instance NFData ShortByteString where - rnf SBS{} = () - instance Show ShortByteString where showsPrec p ps r = showsPrec p (unpackChars ps) r instance Read ShortByteString where readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ] --- | @since 0.10.12.0 -instance GHC.Exts.IsList ShortByteString where - type Item ShortByteString = Word8 - fromList = packBytes - toList = unpack - -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ShortByteString where fromString = packChars -instance Data ShortByteString where - gfoldl f z txt = z packBytes `f` unpack txt - toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr" - gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold" - dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString" - ------------------------------------------------------------------------ -- Simple operations @@ -402,12 +356,6 @@ indexError sbs i = moduleError "index" $ "error in array index: " ++ show i ++ " not in range [0.." ++ show (length sbs) ++ "]" --- | @since 0.11.2.0 -unsafePackLenLiteral :: Int -> Addr# -> ShortByteString -unsafePackLenLiteral len addr# = - -- createFromPtr allocates, so accursedUnutterablePerformIO is wrong - unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len - ------------------------------------------------------------------------ -- Internal utils @@ -642,13 +590,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len) ------------------------------------------------------------------------ -- Eq and Ord implementations -equateBytes :: ShortByteString -> ShortByteString -> Bool -equateBytes sbs1 sbs2 = - let !len1 = length sbs1 - !len2 = length sbs2 - in len1 == len2 - && 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1 - compareBytes :: ShortByteString -> ShortByteString -> Ordering compareBytes sbs1 sbs2 = let !len1 = length sbs1 @@ -661,7 +602,6 @@ compareBytes sbs1 sbs2 = | len2 < len1 -> GT | otherwise -> EQ - ------------------------------------------------------------------------ -- Appending and concatenation @@ -1597,8 +1537,6 @@ findIndices k = \sbs -> | otherwise = go (n + 1) in go 0 - - ------------------------------------------------------------------------ -- Exported low level operations diff --git a/bytestring.cabal b/bytestring.cabal index fd5656fc4..bd47cfc9a 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -74,6 +74,9 @@ source-repository head library build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell + if impl(ghc < 9.4) + build-depends: data-array-byte >= 0.1 && < 0.2 + exposed-modules: Data.ByteString Data.ByteString.Char8 Data.ByteString.Unsafe