From 50d46197bdce61f77f33be4d2382e15ee8f2fab4 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Thu, 19 Sep 2019 13:23:36 -0400 Subject: [PATCH] add delimit --- CHANGELOG.md | 2 ++ src/Data/Bytes/Parser.hs | 31 ++++++++++++++++++++++++++++++- test/Main.hs | 13 +++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 870c8c5..f21f9ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ live in their own module. * Add a lot more functions and attempt to make naming somewhat consistent. +* Add `delimit`. +* Add `annotate` and its infix synonym ``. ## 0.1.0.0 -- 2019-08-22 diff --git a/src/Data/Bytes/Parser.hs b/src/Data/Bytes/Parser.hs index 9166753..859da00 100644 --- a/src/Data/Bytes/Parser.hs +++ b/src/Data/Bytes/Parser.hs @@ -44,6 +44,7 @@ module Data.Bytes.Parser , annotate , () -- * Subparsing + , delimit , measure -- * Lift Effects , effect @@ -80,7 +81,7 @@ import Data.Kind (Type) import GHC.ST (ST(..),runST) import GHC.Exts (Word(W#),Word#,TYPE,State#,Int#,ByteArray#) import GHC.Exts (Int(I#),Char(C#),chr#,RuntimeRep) -import GHC.Exts (Char#,(+#),(-#),(<#),(>#),word2Int#) +import GHC.Exts (Char#,(+#),(-#),(<#),(>#),(>=#),word2Int#) import GHC.Exts (indexCharArray#,indexWord8Array#,ord#) import GHC.Exts (timesWord#,plusWord#) import Data.Bytes.Types (Bytes(..)) @@ -381,3 +382,31 @@ measure (Parser f) = Parser (# e | #) -> (# s1, (# e | #) #) (# | (# y, post, c #) #) -> (# s1, (# | (# (I# (post -# pre), y),post,c #) #) #) ) + +-- | Run a parser in a delimited context, failing if the requested number +-- of bytes are not available or if the delimited parser does not +-- consume all input. This combinator can be understood as a composition +-- of 'take', 'effect', 'parseBytesST', and 'endOfInput'. It is provided as +-- a single combinator because for convenience and because it is easy +-- make mistakes when manually assembling the aforementioned parsers. +-- The pattern of prefixing an encoding with its length is common. +-- This is discussed more in +-- . +-- +-- > delimit e1 e2 n remaining === take e1 n +delimit :: + e -- ^ Error message when not enough bytes are present + -> e -- ^ Error message when delimited parser does not consume all input + -> Int -- ^ Exact number of bytes delimited parser is expected to consume + -> Parser e s a -- ^ Parser to execute in delimited context + -> Parser e s a +delimit esz eleftovers (I# n) (Parser f) = Parser + ( \(# arr, off, len #) s0 -> case len >=# n of + 1# -> case f (# arr, off, n #) s0 of + (# s1, r #) -> case r of + (# e | #) -> (# s1, (# e | #) #) + (# | (# a, newOff, leftovers #) #) -> case leftovers of + 0# -> (# s1, (# | (# a, newOff, len -# n #) #) #) + _ -> (# s1, (# eleftovers | #) #) + _ -> (# s0, (# esz | #) #) + ) diff --git a/test/Main.hs b/test/Main.hs index 070481c..8eee4eb 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -32,6 +32,19 @@ tests :: TestTree tests = testGroup "Parser" [ testProperty "decStandardInt" $ \i -> P.parseBytes (Latin.decStandardInt ()) (bytes (show i)) === P.Success i 0 + , testCase "delimit" $ + P.Success (167,14625) 0 + @=? + P.parseBytes + (do len <- Latin.decUnsignedInt () + Latin.char () ',' + r <- P.delimit () () len $ (,) + <$> Latin.decUnsignedInt () + <* Latin.char () '*' + <*> Latin.decUnsignedInt () + Latin.char () '0' + pure r + ) (bytes "9,167*146250") , testGroup "decUnsignedInt" [ testCase "A" $ P.Failure ()