From 50d46197bdce61f77f33be4d2382e15ee8f2fab4 Mon Sep 17 00:00:00 2001
From: Andrew Martin <andrew.thaddeus@gmail.com>
Date: Thu, 19 Sep 2019 13:23:36 -0400
Subject: [PATCH] add delimit

---
 CHANGELOG.md             |  2 ++
 src/Data/Bytes/Parser.hs | 31 ++++++++++++++++++++++++++++++-
 test/Main.hs             | 13 +++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 870c8c5..f21f9ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@
   live in their own module.
 * Add a lot more functions and attempt to make naming somewhat
   consistent.
+* Add `delimit`.
+* Add `annotate` and its infix synonym `<?>`.
 
 ## 0.1.0.0 -- 2019-08-22
 
diff --git a/src/Data/Bytes/Parser.hs b/src/Data/Bytes/Parser.hs
index 9166753..859da00 100644
--- a/src/Data/Bytes/Parser.hs
+++ b/src/Data/Bytes/Parser.hs
@@ -44,6 +44,7 @@ module Data.Bytes.Parser
   , annotate
   , (<?>)
     -- * Subparsing
+  , delimit
   , measure
     -- * Lift Effects
   , effect
@@ -80,7 +81,7 @@ import Data.Kind (Type)
 import GHC.ST (ST(..),runST)
 import GHC.Exts (Word(W#),Word#,TYPE,State#,Int#,ByteArray#)
 import GHC.Exts (Int(I#),Char(C#),chr#,RuntimeRep)
-import GHC.Exts (Char#,(+#),(-#),(<#),(>#),word2Int#)
+import GHC.Exts (Char#,(+#),(-#),(<#),(>#),(>=#),word2Int#)
 import GHC.Exts (indexCharArray#,indexWord8Array#,ord#)
 import GHC.Exts (timesWord#,plusWord#)
 import Data.Bytes.Types (Bytes(..))
@@ -381,3 +382,31 @@ measure (Parser f) = Parser
       (# e | #) -> (# s1, (# e | #) #)
       (# | (# y, post, c #) #) -> (# s1, (# | (# (I# (post -# pre), y),post,c #) #) #)
   )
+
+-- | Run a parser in a delimited context, failing if the requested number
+-- of bytes are not available or if the delimited parser does not
+-- consume all input. This combinator can be understood as a composition
+-- of 'take', 'effect', 'parseBytesST', and 'endOfInput'. It is provided as
+-- a single combinator because for convenience and because it is easy
+-- make mistakes when manually assembling the aforementioned parsers.
+-- The pattern of prefixing an encoding with its length is common.
+-- This is discussed more in
+-- <https://github.com/bos/attoparsec/issues/129 attoparsec issue #129>.
+--
+-- > delimit e1 e2 n remaining === take e1 n
+delimit ::
+     e -- ^ Error message when not enough bytes are present
+  -> e -- ^ Error message when delimited parser does not consume all input
+  -> Int -- ^ Exact number of bytes delimited parser is expected to consume
+  -> Parser e s a -- ^ Parser to execute in delimited context
+  -> Parser e s a
+delimit esz eleftovers (I# n) (Parser f) = Parser
+  ( \(# arr, off, len #) s0 -> case len >=# n of
+    1# -> case f (# arr, off, n #) s0 of
+      (# s1, r #) -> case r of
+        (# e | #) -> (# s1, (# e | #) #)
+        (# | (# a, newOff, leftovers #) #) -> case leftovers of
+          0# -> (# s1, (# | (# a, newOff, len -# n #) #) #)
+          _ -> (# s1, (# eleftovers | #) #)
+    _ -> (# s0, (# esz | #) #)
+  )
diff --git a/test/Main.hs b/test/Main.hs
index 070481c..8eee4eb 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -32,6 +32,19 @@ tests :: TestTree
 tests = testGroup "Parser"
   [ testProperty "decStandardInt" $ \i ->
       P.parseBytes (Latin.decStandardInt ()) (bytes (show i)) === P.Success i 0
+  , testCase "delimit" $
+      P.Success (167,14625) 0
+      @=?
+      P.parseBytes
+        (do len <- Latin.decUnsignedInt ()
+            Latin.char () ','
+            r <- P.delimit () () len $ (,)
+              <$> Latin.decUnsignedInt ()
+              <*  Latin.char () '*'
+              <*> Latin.decUnsignedInt ()
+            Latin.char () '0'
+            pure r
+        ) (bytes "9,167*146250")
   , testGroup "decUnsignedInt"
     [ testCase "A" $
         P.Failure ()