Skip to content

Commit

Permalink
Compatibility with haskell/text#365 (needs a close review)
Browse files Browse the repository at this point in the history
copyI semantics changed
renaming for 16 -> 8
use iter* functions from Text directly
  • Loading branch information
jberryman committed Aug 26, 2021
1 parent 610892e commit eeba289
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 34 deletions.
1 change: 1 addition & 0 deletions Data/Attoparsec/Internal/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import Prelude hiding (succ)
import qualified Data.Attoparsec.ByteString.Buffer as B
import qualified Data.Attoparsec.Text.Buffer as T

-- | Position(?) in code units (afaict)
newtype Pos = Pos { fromPos :: Int }
deriving (Eq, Ord, Show, Num)

Expand Down
33 changes: 12 additions & 21 deletions Data/Attoparsec/Text/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module Data.Attoparsec.Text.Buffer
, iter
, iter_
, substring
, dropWord16
, dropWord8
) where

import Control.Exception (assert)
Expand All @@ -44,9 +44,7 @@ import Data.Monoid as Mon (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Text ()
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
import Data.Text.Unsafe (Iter(..))
import Data.Text.Unsafe (Iter(..), iterArray)
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
import GHC.ST (ST(..), runST)
Expand Down Expand Up @@ -100,24 +98,24 @@ pappend buf (Text arr off len) = append buf arr off len

append :: Buffer -> A.Array -> Int -> Int -> Buffer
append (Buf arr0 off0 len0 cap0 gen0) !arr1 !off1 !len1 = runST $ do
let woff = sizeOf (0::Int) `shiftR` 1
let woff = sizeOf (0::Int)
newlen = len0 + len1
!gen = if gen0 == 0 then 0 else readGen arr0
if gen == gen0 && newlen <= cap0
then do
let newgen = gen + 1
marr <- unsafeThaw arr0
writeGen marr newgen
A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
A.copyI marr (off0+len0) arr1 off1 len1
arr2 <- A.unsafeFreeze marr
return (Buf arr2 off0 newlen cap0 newgen)
else do
let newcap = newlen * 2
newgen = 1
marr <- A.new (newcap + woff)
writeGen marr newgen
A.copyI marr woff arr0 off0 (woff+len0)
A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
A.copyI marr woff arr0 off0 len0
A.copyI marr (woff+len0) arr1 off1 len1
arr2 <- A.unsafeFreeze marr
return (Buf arr2 woff newlen newcap newgen)

Expand All @@ -132,31 +130,24 @@ substring s l (Buf arr off len _ _) =
Text arr (off+s) l
{-# INLINE substring #-}

dropWord16 :: Int -> Buffer -> Text
dropWord16 s (Buf arr off len _ _) =
dropWord8 :: Int -> Buffer -> Text
dropWord8 s (Buf arr off len _ _) =
assert (s >= 0 && s <= len) $
Text arr (off+s) (len-s)
{-# INLINE dropWord16 #-}
{-# INLINE dropWord8 #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter (Buf arr off _ _ _) i
| m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
| otherwise = Iter (chr2 m n) 2
where m = A.unsafeIndex arr j
n = A.unsafeIndex arr k
j = off + i
k = j + 1
iter (Buf arr off _ _ _) i = iterArray arr (off+i)
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
| otherwise = 2
where m = A.unsafeIndex arr (off+i)
iter_ b i = case iter b i of
Iter _ l -> l
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
Expand Down
10 changes: 5 additions & 5 deletions Data/Attoparsec/Text/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ string_ suspended f s0 = T.Parser $ \t pos more lose succ ->
| T.null ft -> suspended s s t pos more lose succ
| otherwise -> lose t pos more [] "string"
Just (pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 pfx)
| T.null ssfx -> let l = Pos (T.lengthWord8 pfx)
in succ t (pos + l) more (substring pos l t)
| not (T.null tsfx) -> lose t pos more [] "string"
| otherwise -> suspended s ssfx t pos more lose succ
Expand All @@ -195,7 +195,7 @@ stringSuspended f s000 s0 t0 pos0 more0 lose0 succ0 =
in case T.commonPrefixes s0 s of
Nothing -> lose t pos more [] "string"
Just (_pfx,ssfx,tsfx)
| T.null ssfx -> let l = Pos (T.lengthWord16 s000)
| T.null ssfx -> let l = Pos (T.lengthWord8 s000)
in succ t (pos + l) more (substring pos l t)
| T.null tsfx -> stringSuspended f s000 ssfx t pos more lose succ
| otherwise -> lose t pos more [] "string"
Expand Down Expand Up @@ -445,12 +445,12 @@ endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())

-- | Terminal failure continuation.
failK :: Failure a
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord16 pos t) stack msg
failK t (Pos pos) _more stack msg = Fail (Buf.dropWord8 pos t) stack msg
{-# INLINE failK #-}

-- | Terminal success continuation.
successK :: Success a a
successK t (Pos pos) _more a = Done (Buf.dropWord16 pos t) a
successK t (Pos pos) _more a = Done (Buf.dropWord8 pos t) a
{-# INLINE successK #-}

-- | Run a parser.
Expand All @@ -477,7 +477,7 @@ parseOnly m s = case runParser m (buffer s) 0 Complete failK successK of

get :: Parser Text
get = T.Parser $ \t pos more _lose succ ->
succ t pos more (Buf.dropWord16 (fromPos pos) t)
succ t pos more (Buf.dropWord8 (fromPos pos) t)
{-# INLINE get #-}

endOfChunk :: Parser Bool
Expand Down
16 changes: 8 additions & 8 deletions tests/QC/Buffer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ b_length :: BPB -> Property
b_length (BP _ts t buf) = B.length t === BB.length buf

t_length :: BPT -> Property
t_length (BP _ts t buf) = T.lengthWord16 t === BT.length buf
t_length (BP _ts t buf) = T.lengthWord8 t === BT.length buf

b_unsafeIndex :: BPB -> Gen Property
b_unsafeIndex (BP _ts t buf) = do
Expand All @@ -61,14 +61,14 @@ b_unsafeIndex (BP _ts t buf) = do

t_iter :: BPT -> Gen Property
t_iter (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = T.lengthWord8 t
i <- choose (0,l-1)
let it (T.Iter c q) = (c,q)
return $ l === 0 .||. it (T.iter t i) === it (BT.iter buf i)

t_iter_ :: BPT -> Gen Property
t_iter_ (BP _ts t buf) = do
let l = T.lengthWord16 t
let l = T.lengthWord8 t
i <- choose (0,l-1)
return $ l === 0 .||. T.iter_ t i === BT.iter_ buf i

Expand All @@ -77,10 +77,10 @@ b_unsafeDrop (BP _ts t buf) = do
i <- choose (0, B.length t)
return $ B.unsafeDrop i t === BB.unsafeDrop i buf

t_dropWord16 :: BPT -> Gen Property
t_dropWord16 (BP _ts t buf) = do
i <- choose (0, T.lengthWord16 t)
return $ T.dropWord16 i t === BT.dropWord16 i buf
t_dropWord8 :: BPT -> Gen Property
t_dropWord8 (BP _ts t buf) = do
i <- choose (0, T.lengthWord8 t)
return $ T.dropWord8 i t === BT.dropWord8 i buf

tests :: [TestTree]
tests = [
Expand All @@ -92,5 +92,5 @@ tests = [
, testProperty "t_iter" t_iter
, testProperty "t_iter_" t_iter_
, testProperty "b_unsafeDrop" b_unsafeDrop
, testProperty "t_dropWord16" t_dropWord16
, testProperty "t_dropWord8" t_dropWord8
]

0 comments on commit eeba289

Please sign in to comment.