Skip to content

Commit

Permalink
add delimit
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewthad committed Sep 19, 2019
1 parent b3a9af3 commit 50d4619
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
live in their own module.
* Add a lot more functions and attempt to make naming somewhat
consistent.
* Add `delimit`.
* Add `annotate` and its infix synonym `<?>`.

## 0.1.0.0 -- 2019-08-22

Expand Down
31 changes: 30 additions & 1 deletion src/Data/Bytes/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ module Data.Bytes.Parser
, annotate
, (<?>)
-- * Subparsing
, delimit
, measure
-- * Lift Effects
, effect
Expand Down Expand Up @@ -80,7 +81,7 @@ import Data.Kind (Type)
import GHC.ST (ST(..),runST)
import GHC.Exts (Word(W#),Word#,TYPE,State#,Int#,ByteArray#)
import GHC.Exts (Int(I#),Char(C#),chr#,RuntimeRep)
import GHC.Exts (Char#,(+#),(-#),(<#),(>#),word2Int#)
import GHC.Exts (Char#,(+#),(-#),(<#),(>#),(>=#),word2Int#)
import GHC.Exts (indexCharArray#,indexWord8Array#,ord#)
import GHC.Exts (timesWord#,plusWord#)
import Data.Bytes.Types (Bytes(..))
Expand Down Expand Up @@ -381,3 +382,31 @@ measure (Parser f) = Parser
(# e | #) -> (# s1, (# e | #) #)
(# | (# y, post, c #) #) -> (# s1, (# | (# (I# (post -# pre), y),post,c #) #) #)
)

-- | Run a parser in a delimited context, failing if the requested number
-- of bytes are not available or if the delimited parser does not
-- consume all input. This combinator can be understood as a composition
-- of 'take', 'effect', 'parseBytesST', and 'endOfInput'. It is provided as
-- a single combinator because for convenience and because it is easy
-- make mistakes when manually assembling the aforementioned parsers.
-- The pattern of prefixing an encoding with its length is common.
-- This is discussed more in
-- <https://github.com/bos/attoparsec/issues/129 attoparsec issue #129>.
--
-- > delimit e1 e2 n remaining === take e1 n
delimit ::
e -- ^ Error message when not enough bytes are present
-> e -- ^ Error message when delimited parser does not consume all input
-> Int -- ^ Exact number of bytes delimited parser is expected to consume
-> Parser e s a -- ^ Parser to execute in delimited context
-> Parser e s a
delimit esz eleftovers (I# n) (Parser f) = Parser
( \(# arr, off, len #) s0 -> case len >=# n of
1# -> case f (# arr, off, n #) s0 of
(# s1, r #) -> case r of
(# e | #) -> (# s1, (# e | #) #)
(# | (# a, newOff, leftovers #) #) -> case leftovers of
0# -> (# s1, (# | (# a, newOff, len -# n #) #) #)
_ -> (# s1, (# eleftovers | #) #)
_ -> (# s0, (# esz | #) #)
)
13 changes: 13 additions & 0 deletions test/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ tests :: TestTree
tests = testGroup "Parser"
[ testProperty "decStandardInt" $ \i ->
P.parseBytes (Latin.decStandardInt ()) (bytes (show i)) === P.Success i 0
, testCase "delimit" $
P.Success (167,14625) 0
@=?
P.parseBytes
(do len <- Latin.decUnsignedInt ()
Latin.char () ','
r <- P.delimit () () len $ (,)
<$> Latin.decUnsignedInt ()
<* Latin.char () '*'
<*> Latin.decUnsignedInt ()
Latin.char () '0'
pure r
) (bytes "9,167*146250")
, testGroup "decUnsignedInt"
[ testCase "A" $
P.Failure ()
Expand Down

0 comments on commit 50d4619

Please sign in to comment.