From c87ccfba6ade658ca3a21667d7836c00aef339e4 Mon Sep 17 00:00:00 2001 From: Adam Gundry Date: Thu, 18 May 2023 10:05:14 +0100 Subject: [PATCH] WIP: experimentally use dupIO to fix #318 This adds a new token type TkDup for use when encoding, that duplicates the contained closure. This means that encodings can explicitly duplicate thunks that may otherwise end up in the old generation. It isn't obvious to me whether this is enough or whether calling dupIO every time would be better (and whether that might have an unreasonable performance cost). --- cborg/cborg.cabal | 1 + cborg/src/Codec/CBOR/Encoding.hs | 2 ++ cborg/src/Codec/CBOR/FlatTerm.hs | 1 + cborg/src/Codec/CBOR/Pretty.hs | 3 +++ cborg/src/Codec/CBOR/Write.hs | 4 ++++ 5 files changed, 11 insertions(+) diff --git a/cborg/cborg.cabal b/cborg/cborg.cabal index 32fd8ef8..38a4bcf4 100644 --- a/cborg/cborg.cabal +++ b/cborg/cborg.cabal @@ -94,6 +94,7 @@ library bytestring >= 0.10.4 && < 0.12, containers >= 0.5 && < 0.7, deepseq >= 1.0 && < 1.5, + dupIO, ghc-prim >= 0.3.1.0 && < 0.11, half >= 0.2.2.3 && < 0.4, primitive >= 0.5 && < 0.9, diff --git a/cborg/src/Codec/CBOR/Encoding.hs b/cborg/src/Codec/CBOR/Encoding.hs index c6e46686..7e969dd1 100644 --- a/cborg/src/Codec/CBOR/Encoding.hs +++ b/cborg/src/Codec/CBOR/Encoding.hs @@ -131,6 +131,8 @@ data Tokens = -- Special | TkEncoded {-# UNPACK #-} !B.ByteString Tokens + | TkDup Tokens + | TkEnd deriving (Show,Eq) diff --git a/cborg/src/Codec/CBOR/FlatTerm.hs b/cborg/src/Codec/CBOR/FlatTerm.hs index a630624c..00434124 100644 --- a/cborg/src/Codec/CBOR/FlatTerm.hs +++ b/cborg/src/Codec/CBOR/FlatTerm.hs @@ -153,6 +153,7 @@ convFlatTerm (Enc.TkFloat64 f ts) = TkFloat64 f : convFlatTerm ts convFlatTerm (Enc.TkBreak ts) = TkBreak : convFlatTerm ts convFlatTerm (Enc.TkEncoded bs ts) = decodePreEncoded bs ++ convFlatTerm ts +convFlatTerm (Enc.TkDup ts) = convFlatTerm ts convFlatTerm Enc.TkEnd = [] -------------------------------------------------------------------------------- diff --git a/cborg/src/Codec/CBOR/Pretty.hs b/cborg/src/Codec/CBOR/Pretty.hs index 7a4eb93d..e5dfb371 100644 --- a/cborg/src/Codec/CBOR/Pretty.hs +++ b/cborg/src/Codec/CBOR/Pretty.hs @@ -215,6 +215,8 @@ pprint = do TkFloat64 _ _ -> termFailure term TkEncoded _ TkEnd -> ppTkEncoded TkEncoded _ _ -> termFailure term + TkDup TkEnd -> str "# Dup" + TkDup _ -> termFailure term TkEnd -> str "# End of input" where termFailure t = fail $ unwords ["pprint: Unexpected token:", show t] @@ -337,6 +339,7 @@ ppTkFloat64 f = str "# float64" >> parens (shown f) unconsToken :: Tokens -> Maybe (Tokens, Tokens) unconsToken TkEnd = Nothing +unconsToken (TkDup tks) = Just (TkDup TkEnd,tks) unconsToken (TkWord w tks) = Just (TkWord w TkEnd,tks) unconsToken (TkWord64 w tks) = Just (TkWord64 w TkEnd,tks) unconsToken (TkInt i tks) = Just (TkInt i TkEnd,tks) diff --git a/cborg/src/Codec/CBOR/Write.hs b/cborg/src/Codec/CBOR/Write.hs index 977685ec..f4a74b35 100644 --- a/cborg/src/Codec/CBOR/Write.hs +++ b/cborg/src/Codec/CBOR/Write.hs @@ -75,6 +75,8 @@ import qualified Codec.CBOR.ByteArray.Sliced as BAS import Codec.CBOR.Encoding import Codec.CBOR.Magic +import qualified Data.Dup as Dup + -------------------------------------------------------------------------------- -- | Turn an 'Encoding' into a lazy 'L.ByteString' in CBOR binary @@ -192,6 +194,8 @@ buildStep vs1 k (BI.BufferRange op0 ope0) = (B.byteString x) (buildStep vs' k) (BI.BufferRange op ope0) + TkDup vs' -> flip go op =<< Dup.dupIO vs' + TkEnd -> k (BI.BufferRange op ope0) | otherwise = return $ BI.bufferFull bound op (buildStep vs k)