diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..f002eb5 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,11 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build-matrix.yaml@main + with: + cabal-file: bytesmith.cabal diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml deleted file mode 100644 index b61ed6c..0000000 --- a/.github/workflows/haskell-ci.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: CI -on: - push: - branches: - - master - pull_request: - types: - - opened - - synchronize -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - ghc: [ - # "8.6.5", "8.8.4", # dependency issues - "8.10.7", "9.0.2", "9.2.4", "9.4.2", "latest" - ] - os: ["ubuntu-latest", "windows-latest", "macos-latest"] - env: - CONFIG: "--enable-tests" - steps: - - uses: actions/checkout@v3 - - uses: haskell/actions/setup@v2 - id: setup-haskell-cabal - with: - ghc-version: ${{ matrix.ghc }} - cabal-version: "latest" - - run: cabal v2-update - - run: cabal v2-freeze $CONFIG - - uses: actions/cache@v3 - with: - path: | - ${{ steps.setup-haskell-cabal.outputs.cabal-store }} - dist-newstyle - key: ${{ runner.os }}-${{ matrix.ghc }}-${{ hashFiles('cabal.project.freeze') }} - restore-keys: | - ${{ runner.os }}-${{ matrix.ghc }}-${{ hashFiles('cabal.project.freeze') }} - ${{ runner.os }}-${{ matrix.ghc }}- - - run: cabal v2-build --disable-optimization -j $CONFIG - - run: cabal v2-test --disable-optimization -j $CONFIG - - run: cabal v2-haddock -j $CONFIG - - run: cabal v2-sdist diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..9411962 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,10 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/release.yaml@main + secrets: inherit diff --git a/.gitignore b/.gitignore index 28d589b..cde1485 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev diff --git a/CHANGELOG.md b/CHANGELOG.md index cecd774..39b287a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Revision history for bytesmith +## 0.3.11.1 -- 2024-02-13 + +* Update package metadata. + ## 0.3.11.0 -- 2024-01-05 * Add `Data.Bytes.Parser.Latin.hexWord32`. diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/bench/Main.hs b/bench/Main.hs index 373de1a..69e9fe0 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -4,24 +4,28 @@ import Data.Char (ord) import Data.Primitive (ByteArray) import Data.Word (Word8) -import Gauge.Main (defaultMain,bench,whnf) +import Gauge.Main (bench, defaultMain, whnf) import qualified Data.Bytes.Parser as P import qualified Data.Bytes.Parser.Latin as Latin import qualified GHC.Exts as Exts main :: IO () -main = defaultMain - [ bench "decPositiveInteger" $ whnf - (\x -> P.parseByteArray (Latin.decUnsignedInteger ()) x) - encodedBigNumber - ] +main = + defaultMain + [ bench "decPositiveInteger" $ + whnf + (\x -> P.parseByteArray (Latin.decUnsignedInteger ()) x) + encodedBigNumber + ] encodedBigNumber :: ByteArray -encodedBigNumber = stringToByteArray $ show $ id @Integer $ - 246246357264327645234627753190240202405243024304504230544 - * - 732345623640035232405249305932503920593209520932095234651 +encodedBigNumber = + stringToByteArray $ + show $ + id @Integer $ + 246246357264327645234627753190240202405243024304504230544 + * 732345623640035232405249305932503920593209520932095234651 stringToByteArray :: String -> ByteArray stringToByteArray = diff --git a/bytesmith.cabal b/bytesmith.cabal index 1736ede..caa1fc5 100644 --- a/bytesmith.cabal +++ b/bytesmith.cabal @@ -1,79 +1,89 @@ -cabal-version: 2.2 -name: bytesmith -version: 0.3.11.0 -synopsis: Nonresumable byte parser +cabal-version: 2.2 +name: bytesmith +version: 0.3.11.1 +synopsis: Nonresumable byte parser description: Parse bytes as fast as possible. This is a nonresumable parser that aggresively uses `UnboxedSums` to avoid performing any allocations. -homepage: https://github.com/andrewthad/bytesmith -bug-reports: https://github.com/andrewthad/bytesmith/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2019 Andrew Martin -category: Data -extra-source-files: CHANGELOG.md -tested-with: GHC == 8.6.5 || == 8.8.4 || == 8.10.7 || == 9.0.2 || == 9.2.5 || == 9.4.3 + +homepage: https://github.com/byteverse/bytesmith +bug-reports: https://github.com/byteverse/bytesmith/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2019 Andrew Martin +category: Data +extra-doc-files: CHANGELOG.md +tested-with: GHC ==9.4.8 || ==9.6.3 || ==9.8.1 + +common build-settings + default-language: Haskell2010 + ghc-options: -Wall -Wunused-packages library + import: build-settings exposed-modules: Data.Bytes.Parser - Data.Bytes.Parser.BigEndian - Data.Bytes.Parser.LittleEndian Data.Bytes.Parser.Ascii Data.Bytes.Parser.Base128 + Data.Bytes.Parser.BigEndian Data.Bytes.Parser.Latin Data.Bytes.Parser.Leb128 + Data.Bytes.Parser.LittleEndian Data.Bytes.Parser.Rebindable Data.Bytes.Parser.Unsafe Data.Bytes.Parser.Utf8 + other-modules: Data.Bytes.Parser.Internal Data.Bytes.Parser.Types + build-depends: - , base >=4.12 && <5 - , byteslice >=0.2.6 && <0.3 - , bytestring >=0.10.8 && <0.13 - , contiguous >= 0.6 && < 0.7 - , natural-arithmetic >=0.1.3 - , primitive >=0.7 && <0.10 - , run-st >=0.1 && <0.2 - , text-short >=0.1.3 && <0.2 - , wide-word >=0.1.0.9 && <0.2 - hs-source-dirs: src - ghc-options: -O2 -Wall - default-language: Haskell2010 + , base >=4.12 && <5 + , byteslice >=0.2.6 && <0.3 + , bytestring >=0.10.8 && <0.13 + , contiguous >=0.6 && <0.7 + , natural-arithmetic >=0.1.3 + , primitive >=0.7 && <0.10 + , run-st >=0.1 && <0.2 + , text-short >=0.1.3 && <0.2 + , wide-word >=0.1.0.9 && <0.2 + + hs-source-dirs: src + ghc-options: -O2 test-suite test - default-language: Haskell2010 - type: exitcode-stdio-1.0 + import: build-settings + type: exitcode-stdio-1.0 hs-source-dirs: test - main-is: Main.hs - ghc-options: -Wall -O2 + main-is: Main.hs build-depends: - , base >=4.12.0.0 && <5 + , base >=4.12.0.0 && <5 + , byte-order + , byteslice , bytesmith , primitive - , byteslice - , tasty-hunit , tasty + , tasty-hunit , tasty-quickcheck - , byte-order , text-short , wide-word benchmark bench - type: exitcode-stdio-1.0 + import: build-settings + type: exitcode-stdio-1.0 build-depends: , base - , byteslice - , bytestring + , bytesmith , gauge , primitive - , bytesmith - ghc-options: -Wall -O2 - default-language: Haskell2010 + + ghc-options: -O2 hs-source-dirs: bench - main-is: Main.hs + main-is: Main.hs + +source-repository head + type: git + location: git://github.com/byteverse/bytesmith.git diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src/Data/Bytes/Parser.hs b/src/Data/Bytes/Parser.hs index 39e5561..5cee4df 100644 --- a/src/Data/Bytes/Parser.hs +++ b/src/Data/Bytes/Parser.hs @@ -1,19 +1,15 @@ {-# language BangPatterns #-} {-# language BinaryLiterals #-} {-# language DataKinds #-} -{-# language DeriveFunctor #-} {-# language DerivingStrategies #-} {-# language GADTSyntax #-} -{-# language KindSignatures #-} {-# language LambdaCase #-} {-# language MagicHash #-} {-# language MultiWayIf #-} {-# language PolyKinds #-} {-# language RankNTypes #-} {-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} {-# language TypeApplications #-} -{-# language UnboxedSums #-} {-# language UnboxedTuples #-} {-# language CPP #-} diff --git a/src/Data/Bytes/Parser/Ascii.hs b/src/Data/Bytes/Parser/Ascii.hs index 9a117d0..f87c6fe 100644 --- a/src/Data/Bytes/Parser/Ascii.hs +++ b/src/Data/Bytes/Parser/Ascii.hs @@ -1,41 +1,40 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language DeriveFunctor #-} -{-# language DerivingStrategies #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} -{-# language TypeApplications #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} --- | Parse input as ASCII-encoded text. Some parsers in this module, --- like 'any' and 'peek', fail if they encounter a byte above @0x7F@. --- Others, like numeric parsers and skipping parsers, leave the cursor --- at the position of the offending byte without failing. +{- | Parse input as ASCII-encoded text. Some parsers in this module, +like 'any' and 'peek', fail if they encounter a byte above @0x7F@. +Others, like numeric parsers and skipping parsers, leave the cursor +at the position of the offending byte without failing. +-} module Data.Bytes.Parser.Ascii ( -- * Matching Latin.char , Latin.char2 , Latin.char3 , Latin.char4 + -- * Case-Insensitive Matching , charInsensitive + -- * Get Character , any , any# , peek , opt + -- * Match Many , shortTrailedBy , takeShortWhile + -- * Skip , Latin.skipDigits , Latin.skipDigits1 @@ -45,6 +44,7 @@ module Data.Bytes.Parser.Ascii , skipAlpha1 , skipTrailedBy , skipWhile + -- * Numbers , Latin.decWord , Latin.decWord8 @@ -52,47 +52,49 @@ module Data.Bytes.Parser.Ascii , Latin.decWord32 ) where -import Prelude hiding (length,any,fail,takeWhile) +import Prelude hiding (any, fail, length, takeWhile) +import Control.Monad.ST.Run (runByteArrayST) import Data.Bits (clearBit) -import Data.Bytes.Types (Bytes(..)) -import Data.Bytes.Parser.Internal (Parser(..),uneffectful,Result#,uneffectful#) -import Data.Bytes.Parser.Internal (Result(..),indexLatinCharArray,upcastUnitSuccess) +import Data.Bytes.Parser.Internal (Parser (..), Result (..), Result#, indexLatinCharArray, uneffectful, uneffectful#, upcastUnitSuccess) +import Data.Bytes.Types (Bytes (..)) import Data.Char (ord) -import Data.Word (Word8) import Data.Text.Short (ShortText) -import Control.Monad.ST.Run (runByteArrayST) -import GHC.Exts (Int(I#),Char(C#),Int#,Char#,(-#),(+#),(<#),ord#,indexCharArray#,chr#) -import GHC.Exts (gtChar#) +import Data.Word (Word8) +import GHC.Exts (Char (C#), Char#, Int (I#), Int#, chr#, gtChar#, indexCharArray#, ord#, (+#), (-#), (<#)) import qualified Data.ByteString.Short.Internal as BSS -import qualified Data.Text.Short.Unsafe as TS import qualified Data.Bytes as Bytes import qualified Data.Bytes.Parser.Latin as Latin import qualified Data.Bytes.Parser.Unsafe as Unsafe import qualified Data.Primitive as PM +import qualified Data.Text.Short.Unsafe as TS + +{- | Consume the next character, failing if it does not match the expected +value or if there is no more input. This check for equality is case +insensitive. --- | Consume the next character, failing if it does not match the expected --- value or if there is no more input. This check for equality is case --- insensitive. --- --- Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is --- undefined if it is not. +Precondition: The argument must be a letter (@[a-zA-Z]@). Behavior is +undefined if it is not. +-} charInsensitive :: e -> Char -> Parser e s () -{-# inline charInsensitive #-} -charInsensitive e !c = uneffectful $ \chunk -> if length chunk > 0 - then if clearBit (PM.indexByteArray (array chunk) (offset chunk) :: Word8) 5 == w - then Success () (offset chunk + 1) (length chunk - 1) +{-# INLINE charInsensitive #-} +charInsensitive e !c = uneffectful $ \chunk -> + if length chunk > 0 + then + if clearBit (PM.indexByteArray (array chunk) (offset chunk) :: Word8) 5 == w + then Success () (offset chunk + 1) (length chunk - 1) + else Failure e else Failure e - else Failure e - where + where w = clearBit (fromIntegral @Int @Word8 (ord c)) 5 --- | Consume input until the trailer is found. Then, consume --- the trailer as well. This fails if the trailer is not --- found or if any non-ASCII characters are encountered. +{- | Consume input until the trailer is found. Then, consume +the trailer as well. This fails if the trailer is not +found or if any non-ASCII characters are encountered. +-} skipTrailedBy :: e -> Char -> Parser e s () -{-# inline skipTrailedBy #-} +{-# INLINE skipTrailedBy #-} skipTrailedBy e !c = do let go = do !d <- any e @@ -101,11 +103,12 @@ skipTrailedBy e !c = do else go go --- | Consume characters matching the predicate. The stops when it --- encounters a non-matching character or when it encounters a byte --- above @0x7F@. This never fails. +{- | Consume characters matching the predicate. The stops when it +encounters a non-matching character or when it encounters a byte +above @0x7F@. This never fails. +-} takeShortWhile :: (Char -> Bool) -> Parser e s ShortText -{-# inline takeShortWhile #-} +{-# INLINE takeShortWhile #-} takeShortWhile p = do !start <- Unsafe.cursor skipWhile p @@ -116,15 +119,16 @@ takeShortWhile p = do marr <- PM.newByteArray len PM.copyByteArray marr 0 src start len PM.unsafeFreezeByteArray marr - pure - $ TS.fromShortByteStringUnsafe - $ byteArrayToShortByteString - $ r + pure $ + TS.fromShortByteStringUnsafe $ + byteArrayToShortByteString $ + r --- | Consume input through the next occurrence of the target --- character and return the consumed input, excluding the --- target character, as a 'ShortText'. This fails if it --- encounters any bytes above @0x7F@. +{- | Consume input through the next occurrence of the target +character and return the consumed input, excluding the +target character, as a 'ShortText'. This fails if it +encounters any bytes above @0x7F@. +-} shortTrailedBy :: e -> Char -> Parser e s ShortText shortTrailedBy e !c = do !start <- Unsafe.cursor @@ -136,129 +140,143 @@ shortTrailedBy e !c = do marr <- PM.newByteArray len PM.copyByteArray marr 0 src start len PM.unsafeFreezeByteArray marr - pure - $ TS.fromShortByteStringUnsafe - $ byteArrayToShortByteString - $ r - + pure $ + TS.fromShortByteStringUnsafe $ + byteArrayToShortByteString $ + r -- | Consumes and returns the next character in the input. any :: e -> Parser e s Char -{-# inline any #-} -any e = uneffectful $ \chunk -> if length chunk > 0 - then - let c = indexLatinCharArray (array chunk) (offset chunk) - in if c < '\128' - then Success c (offset chunk + 1) (length chunk - 1) - else Failure e - else Failure e +{-# INLINE any #-} +any e = uneffectful $ \chunk -> + if length chunk > 0 + then + let c = indexLatinCharArray (array chunk) (offset chunk) + in if c < '\128' + then Success c (offset chunk + 1) (length chunk - 1) + else Failure e + else Failure e -- | Variant of 'any' with unboxed result. any# :: e -> Parser e s Char# -{-# inline any# #-} -any# e = Parser - (\(# arr, off, len #) s0 -> case len of - 0# -> (# s0, (# e | #) #) - _ -> - let !w = indexCharArray# arr off - in case ord# w <# 128# of - 1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #) - _ -> (# s0, (# e | #) #) - ) +{-# INLINE any# #-} +any# e = + Parser + ( \(# arr, off, len #) s0 -> case len of + 0# -> (# s0, (# e | #) #) + _ -> + let !w = indexCharArray# arr off + in case ord# w <# 128# of + 1# -> (# s0, (# | (# w, off +# 1#, len -# 1# #) #) #) + _ -> (# s0, (# e | #) #) + ) unI :: Int -> Int# -{-# inline unI #-} +{-# INLINE unI #-} unI (I# w) = w --- | Examine the next byte without consuming it, interpret it as an --- ASCII-encoded character. This fails if the byte is above @0x7F@ or --- if the end of input has been reached. +{- | Examine the next byte without consuming it, interpret it as an +ASCII-encoded character. This fails if the byte is above @0x7F@ or +if the end of input has been reached. +-} peek :: e -> Parser e s Char -{-# inline peek #-} -peek e = uneffectful $ \chunk -> if length chunk > 0 - then - let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - in if w < 128 - then Success - (C# (chr# (unI (fromIntegral w)))) - (offset chunk) - (length chunk) - else Failure e - else Failure e +{-# INLINE peek #-} +peek e = uneffectful $ \chunk -> + if length chunk > 0 + then + let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + in if w < 128 + then + Success + (C# (chr# (unI (fromIntegral w)))) + (offset chunk) + (length chunk) + else Failure e + else Failure e --- | Consume the next byte, interpreting it as an ASCII-encoded character. --- Fails if the byte is above @0x7F@. Returns @Nothing@ if the --- end of the input has been reached. +{- | Consume the next byte, interpreting it as an ASCII-encoded character. +Fails if the byte is above @0x7F@. Returns @Nothing@ if the +end of the input has been reached. +-} opt :: e -> Parser e s (Maybe Char) -{-# inline opt #-} -opt e = uneffectful $ \chunk -> if length chunk > 0 - then - let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - in if w < 128 - then Success - (Just (C# (chr# (unI (fromIntegral w))))) - (offset chunk + 1) - (length chunk - 1) - else Failure e - else Success Nothing (offset chunk) (length chunk) +{-# INLINE opt #-} +opt e = uneffectful $ \chunk -> + if length chunk > 0 + then + let w = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + in if w < 128 + then + Success + (Just (C# (chr# (unI (fromIntegral w))))) + (offset chunk + 1) + (length chunk - 1) + else Failure e + else Success Nothing (offset chunk) (length chunk) --- | Consume characters matching the predicate. The stops when it --- encounters a non-matching character or when it encounters a byte --- above @0x7F@. This never fails. +{- | Consume characters matching the predicate. The stops when it +encounters a non-matching character or when it encounters a byte +above @0x7F@. This never fails. +-} skipWhile :: (Char -> Bool) -> Parser e s () -{-# inline skipWhile #-} -skipWhile p = Parser - ( \(# arr, off0, len0 #) s0 -> - let go off len = case len of - 0# -> (# (), off, 0# #) - _ -> let c = indexCharArray# arr off in - case p (C# c) of - True -> case gtChar# c '\x7F'# of - 1# -> (# (), off, len #) - _ -> go (off +# 1# ) (len -# 1# ) - False -> (# (), off, len #) - in (# s0, (# | go off0 len0 #) #) - ) +{-# INLINE skipWhile #-} +skipWhile p = + Parser + ( \(# arr, off0, len0 #) s0 -> + let go off len = case len of + 0# -> (# (), off, 0# #) + _ -> + let c = indexCharArray# arr off + in case p (C# c) of + True -> case gtChar# c '\x7F'# of + 1# -> (# (), off, len #) + _ -> go (off +# 1#) (len -# 1#) + False -> (# (), off, len #) + in (# s0, (# | go off0 len0 #) #) + ) --- | Skip uppercase and lowercase letters until a non-alpha --- character is encountered. +{- | Skip uppercase and lowercase letters until a non-alpha +character is encountered. +-} skipAlpha :: Parser e s () -{-# inline skipAlpha #-} +{-# INLINE skipAlpha #-} skipAlpha = uneffectful# $ \c -> upcastUnitSuccess (skipAlphaAsciiLoop c) --- | Skip uppercase and lowercase letters until a non-alpha --- character is encountered. +{- | Skip uppercase and lowercase letters until a non-alpha +character is encountered. +-} skipAlpha1 :: e -> Parser e s () -{-# inline skipAlpha1 #-} +{-# INLINE skipAlpha1 #-} skipAlpha1 e = uneffectful# $ \c -> skipAlphaAsciiLoop1Start e c skipAlphaAsciiLoop :: - Bytes -- Chunk - -> (# Int#, Int# #) -{-# inline skipAlphaAsciiLoop #-} -skipAlphaAsciiLoop !c = if length c > 0 - then - let w = indexLatinCharArray (array c) (offset c) - in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') - then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c) - else (# unI (offset c), unI (length c) #) - else (# unI (offset c), unI (length c) #) + Bytes -> -- Chunk + (# Int#, Int# #) +{-# INLINE skipAlphaAsciiLoop #-} +skipAlphaAsciiLoop !c = + if length c > 0 + then + let w = indexLatinCharArray (array c) (offset c) + in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') + then skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c) + else (# unI (offset c), unI (length c) #) + else (# unI (offset c), unI (length c) #) skipAlphaAsciiLoop1Start :: - e - -> Bytes -- chunk - -> Result# e () -{-# inline skipAlphaAsciiLoop1Start #-} -skipAlphaAsciiLoop1Start e !c = if length c > 0 - then - let w = indexLatinCharArray (array c) (offset c) - in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') - then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)) - else (# e | #) - else (# e | #) + e -> + Bytes -> -- chunk + Result# e () +{-# INLINE skipAlphaAsciiLoop1Start #-} +skipAlphaAsciiLoop1Start e !c = + if length c > 0 + then + let w = indexLatinCharArray (array c) (offset c) + in if (w >= 'a' && w <= 'z') || (w >= 'A' && w <= 'Z') + then upcastUnitSuccess (skipAlphaAsciiLoop (Bytes.unsafeDrop 1 c)) + else (# e | #) + else (# e | #) byteArrayToShortByteString :: PM.ByteArray -> BSS.ShortByteString -{-# inline byteArrayToShortByteString #-} +{-# INLINE byteArrayToShortByteString #-} byteArrayToShortByteString (PM.ByteArray x) = BSS.SBS x diff --git a/src/Data/Bytes/Parser/Base128.hs b/src/Data/Bytes/Parser/Base128.hs index c9dbd05..725bd6a 100644 --- a/src/Data/Bytes/Parser/Base128.hs +++ b/src/Data/Bytes/Parser/Base128.hs @@ -1,5 +1,5 @@ -{-# language BangPatterns #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeApplications #-} module Data.Bytes.Parser.Base128 ( -- * Unsigned @@ -9,9 +9,9 @@ module Data.Bytes.Parser.Base128 ) where import Control.Monad (when) -import Data.Bits (testBit,unsafeShiftL,(.|.),bit,clearBit) +import Data.Bits (bit, clearBit, testBit, unsafeShiftL, (.|.)) import Data.Bytes.Parser (Parser) -import Data.Word (Word8,Word16,Word32,Word64) +import Data.Word (Word16, Word32, Word64, Word8) import qualified Data.Bytes.Parser as P diff --git a/src/Data/Bytes/Parser/BigEndian.hs b/src/Data/Bytes/Parser/BigEndian.hs index 9e158de..d24bea9 100644 --- a/src/Data/Bytes/Parser/BigEndian.hs +++ b/src/Data/Bytes/Parser/BigEndian.hs @@ -1,20 +1,14 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language DeriveFunctor #-} -{-# language DerivingStrategies #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} -{-# language TypeApplications #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedSums #-} -- | Big-endian fixed-width numbers. module Data.Bytes.Parser.BigEndian @@ -25,12 +19,15 @@ module Data.Bytes.Parser.BigEndian , word64 , word128 , word256 + -- * Signed , int8 , int16 , int32 , int64 + -- * Many + -- ** Unsigned , word16Array , word32Array @@ -39,20 +36,20 @@ module Data.Bytes.Parser.BigEndian , word256Array ) where -import Prelude hiding (length,any,fail,takeWhile) +import Prelude hiding (any, fail, length, takeWhile) +#if MIN_VERSION_base(4,18,0) +#else import Control.Applicative (liftA2) -import Data.Bits ((.|.),unsafeShiftL) -import Data.Bytes.Types (Bytes(..)) -import Data.Bytes.Parser.Internal (Parser,uneffectful) -import Data.Bytes.Parser.Internal (Result(..)) -import Data.Bytes.Parser.Internal (swapArray16,swapArray32,swapArray64,swapArray256) -import Data.Bytes.Parser.Internal (swapArray128) -import Data.Word (Word8,Word16,Word32,Word64) -import Data.Int (Int8,Int16,Int32,Int64) -import Data.Primitive (ByteArray(..),PrimArray(..)) -import Data.WideWord (Word128(Word128),Word256(Word256)) -import GHC.ByteOrder (ByteOrder(LittleEndian,BigEndian),targetByteOrder) +#endif +import Data.Bits (unsafeShiftL, (.|.)) +import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful) +import Data.Bytes.Types (Bytes (..)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Primitive (ByteArray (..), PrimArray (..)) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) +import Data.Word (Word16, Word32, Word64, Word8) +import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) import qualified Data.Bytes as Bytes import qualified Data.Bytes.Parser as P @@ -62,14 +59,18 @@ import qualified Data.Primitive as PM word8 :: e -> Parser e s Word8 word8 = P.any --- | Parse an array of big-endian unsigned 16-bit words. If the host is --- big-endian, the implementation is optimized to simply @memcpy@ bytes --- into the result array. The result array always has elements in --- native-endian byte order. +{- | Parse an array of big-endian unsigned 16-bit words. If the host is +big-endian, the implementation is optimized to simply @memcpy@ bytes +into the result array. The result array always has elements in +native-endian byte order. +-} word16Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of big-endian 16-bit words to expect - -> Parser e s (PrimArray Word16) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of big-endian 16-bit words to expect + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word16) word16Array e !n = case targetByteOrder of BigEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2)) LittleEndian -> do @@ -79,9 +80,12 @@ word16Array e !n = case targetByteOrder of -- | Parse an array of big-endian unsigned 32-bit words. word32Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of big-endian 32-bit words to expect - -> Parser e s (PrimArray Word32) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of big-endian 32-bit words to expect + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word32) word32Array e !n = case targetByteOrder of BigEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4)) LittleEndian -> do @@ -91,9 +95,12 @@ word32Array e !n = case targetByteOrder of -- | Parse an array of big-endian unsigned 64-bit words. word64Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of big-endian 64-bit words to consume - -> Parser e s (PrimArray Word64) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of big-endian 64-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word64) word64Array e !n = case targetByteOrder of BigEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8)) LittleEndian -> do @@ -103,9 +110,12 @@ word64Array e !n = case targetByteOrder of -- | Parse an array of big-endian unsigned 256-bit words. word256Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of big-endian 256-bit words to consume - -> Parser e s (PrimArray Word256) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of big-endian 256-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word256) word256Array e !n = case targetByteOrder of BigEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32)) LittleEndian -> do @@ -115,9 +125,12 @@ word256Array e !n = case targetByteOrder of -- | Parse an array of big-endian unsigned 128-bit words. word128Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of big-endian 128-bit words to consume - -> Parser e s (PrimArray Word128) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of big-endian 128-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word128) word128Array e !n = case targetByteOrder of BigEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16)) LittleEndian -> do @@ -142,58 +155,64 @@ asWord256s (ByteArray x) = PrimArray x -- | Unsigned 16-bit word. word16 :: e -> Parser e s Word16 -word16 e = uneffectful $ \chunk -> if length chunk >= 2 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - in Success - (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wa) 8 .|. fromIntegral wb)) - (offset chunk + 2) (length chunk - 2) - else Failure e +word16 e = uneffectful $ \chunk -> + if length chunk >= 2 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + in Success + (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wa) 8 .|. fromIntegral wb)) + (offset chunk + 2) + (length chunk - 2) + else Failure e -- | Unsigned 32-bit word. word32 :: e -> Parser e s Word32 -word32 e = uneffectful $ \chunk -> if length chunk >= 4 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 - wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 - in Success - (fromIntegral @Word @Word32 - ( unsafeShiftL (fromIntegral wa) 24 .|. - unsafeShiftL (fromIntegral wb) 16 .|. - unsafeShiftL (fromIntegral wc) 8 .|. - fromIntegral wd +word32 e = uneffectful $ \chunk -> + if length chunk >= 4 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 + wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 + in Success + ( fromIntegral @Word @Word32 + ( unsafeShiftL (fromIntegral wa) 24 + .|. unsafeShiftL (fromIntegral wb) 16 + .|. unsafeShiftL (fromIntegral wc) 8 + .|. fromIntegral wd + ) ) - ) - (offset chunk + 4) (length chunk - 4) - else Failure e + (offset chunk + 4) + (length chunk - 4) + else Failure e -- | Unsigned 64-bit word. word64 :: e -> Parser e s Word64 -word64 e = uneffectful $ \chunk -> if length chunk >= 8 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 - wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 - we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 - wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 - wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 - wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 - in Success - ( unsafeShiftL (fromIntegral wa) 56 .|. - unsafeShiftL (fromIntegral wb) 48 .|. - unsafeShiftL (fromIntegral wc) 40 .|. - unsafeShiftL (fromIntegral wd) 32 .|. - unsafeShiftL (fromIntegral we) 24 .|. - unsafeShiftL (fromIntegral wf) 16 .|. - unsafeShiftL (fromIntegral wg) 8 .|. - fromIntegral wh - ) - (offset chunk + 8) (length chunk - 8) - else Failure e +word64 e = uneffectful $ \chunk -> + if length chunk >= 8 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 + wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 + we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 + wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 + wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 + wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 + in Success + ( unsafeShiftL (fromIntegral wa) 56 + .|. unsafeShiftL (fromIntegral wb) 48 + .|. unsafeShiftL (fromIntegral wc) 40 + .|. unsafeShiftL (fromIntegral wd) 32 + .|. unsafeShiftL (fromIntegral we) 24 + .|. unsafeShiftL (fromIntegral wf) 16 + .|. unsafeShiftL (fromIntegral wg) 8 + .|. fromIntegral wh + ) + (offset chunk + 8) + (length chunk - 8) + else Failure e -- | Unsigned 128-bit word. word128 :: e -> Parser e s Word128 diff --git a/src/Data/Bytes/Parser/Internal.hs b/src/Data/Bytes/Parser/Internal.hs index de5b779..dc4c4f9 100644 --- a/src/Data/Bytes/Parser/Internal.hs +++ b/src/Data/Bytes/Parser/Internal.hs @@ -1,26 +1,19 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language DeriveFunctor #-} -{-# language DerivingStrategies #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language NamedFieldPuns #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} -{-# language TypeApplications #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.Parser.Internal - ( Parser(..) - , Result(..) - , InternalStep(..) + ( Parser (..) + , Result (..) + , InternalStep (..) , Bytes# , ST# , Result# @@ -34,7 +27,7 @@ module Data.Bytes.Parser.Internal , fail , indexLatinCharArray , upcastUnitSuccess - -- Swapping + -- Swapping , swapArray16 , swapArray32 , swapArray64 @@ -42,15 +35,15 @@ module Data.Bytes.Parser.Internal , swapArray256 ) where -import Prelude hiding (length,any,fail,takeWhile) +import Prelude hiding (any, fail, length, takeWhile) import Control.Applicative (Alternative) import Control.Monad.ST.Run (runByteArrayST) -import Data.Primitive (ByteArray(ByteArray)) -import Data.Bytes.Types (Bytes(..)) +import Data.Bytes.Types (Bytes (..)) import Data.Kind (Type) +import Data.Primitive (ByteArray (ByteArray)) import Data.Word (Word8) -import GHC.Exts (TYPE,RuntimeRep,Int(I#),Int#,State#,ByteArray#,Char(C#)) +import GHC.Exts (ByteArray#, Char (C#), Int (I#), Int#, RuntimeRep, State#, TYPE) import qualified Control.Applicative import qualified Control.Monad @@ -59,71 +52,81 @@ import qualified GHC.Exts as Exts -- | A non-resumable parser. newtype Parser :: forall (r :: RuntimeRep). Type -> Type -> TYPE r -> Type where - Parser :: forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r). - { runParser :: (# ByteArray#, Int#, Int# #) -> ST# s (Result# e a) } -> Parser e s a + Parser :: + forall (r :: RuntimeRep) (e :: Type) (s :: Type) (a :: TYPE r). + {runParser :: (# ByteArray#, Int#, Int# #) -> ST# s (Result# e a)} -> + Parser e s a -- The result of running a parser. Used internally. data Result e a = Failure e - -- An error message indicating what went wrong. - | Success !a !Int !Int - -- The parsed value, the offset after the last consumed byte, and the - -- number of bytes remaining in parsed slice. + | -- An error message indicating what went wrong. + Success !a !Int !Int + +-- The parsed value, the offset after the last consumed byte, and the +-- number of bytes remaining in parsed slice. data InternalStep a = InternalStep !a !Int !Int uneffectful :: (Bytes -> Result e a) -> Parser e s a -{-# inline uneffectful #-} -uneffectful f = Parser - ( \b s0 -> (# s0, unboxResult (f (boxBytes b)) #) ) +{-# INLINE uneffectful #-} +uneffectful f = + Parser + (\b s0 -> (# s0, unboxResult (f (boxBytes b)) #)) -- This is like uneffectful but for parsers that always succeed. -- These combinators typically have names that begin with @try@. unfailing :: (Bytes -> InternalStep a) -> Parser e s a -{-# inline unfailing #-} -unfailing f = Parser - ( \b s0 -> (# s0, case f (boxBytes b) of { InternalStep a (I# off) (I# len) -> (# | (# a, off, len #) #) } #) ) +{-# INLINE unfailing #-} +unfailing f = + Parser + (\b s0 -> (# s0, case f (boxBytes b) of InternalStep a (I# off) (I# len) -> (# | (# a, off, len #) #) #)) boxBytes :: Bytes# -> Bytes -{-# inline boxBytes #-} +{-# INLINE boxBytes #-} boxBytes (# a, b, c #) = Bytes (ByteArray a) (I# b) (I# c) unboxBytes :: Bytes -> Bytes# -{-# inline unboxBytes #-} -unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a,b,c #) +{-# INLINE unboxBytes #-} +unboxBytes (Bytes (ByteArray a) (I# b) (I# c)) = (# a, b, c #) type Bytes# = (# ByteArray#, Int#, Int# #) type ST# s (a :: TYPE r) = State# s -> (# State# s, a #) type Result# e (a :: TYPE r) = - (# e - | (# a, Int#, Int# #) #) -- ints are offset and length + (# + e | + (# a, Int#, Int# #) -- ints are offset and length + #) unboxResult :: Result e a -> Result# e a -{-# inline unboxResult #-} +{-# INLINE unboxResult #-} unboxResult (Success a (I# b) (I# c)) = (# | (# a, b, c #) #) unboxResult (Failure e) = (# e | #) --- | Combines the error messages using '<>' when both --- parsers fail. -instance Monoid e => Alternative (Parser e s) where - {-# inline empty #-} - {-# inline (<|>) #-} +{- | Combines the error messages using '<>' when both +parsers fail. +-} +instance (Monoid e) => Alternative (Parser e s) where + {-# INLINE empty #-} + {-# INLINE (<|>) #-} empty = fail mempty - Parser f <|> Parser g = Parser - (\x s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# eRight | #) -> case g x s1 of - (# s2, r1 #) -> case r1 of - (# eLeft | #) -> (# s2, (# eRight <> eLeft | #) #) - (# | r #) -> (# s2, (# | r #) #) - (# | r #) -> (# s1, (# | r #) #) - ) + Parser f <|> Parser g = + Parser + ( \x s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# eRight | #) -> case g x s1 of + (# s2, r1 #) -> case r1 of + (# eLeft | #) -> (# s2, (# eRight <> eLeft | #) #) + (# | r #) -> (# s2, (# | r #) #) + (# | r #) -> (# s1, (# | r #) #) + ) -- | Fail with the provided error message. fail :: - e -- ^ Error message - -> Parser e s a -{-# inline fail #-} + -- | Error message + e -> + Parser e s a +{-# INLINE fail #-} fail e = uneffectful $ \_ -> Failure e instance Applicative (Parser e s) where @@ -131,165 +134,175 @@ instance Applicative (Parser e s) where (<*>) = Control.Monad.ap instance Monad (Parser e s) where - {-# inline (>>=) #-} + {-# INLINE (>>=) #-} (>>=) = bindParser instance Functor (Parser e s) where - {-# inline fmap #-} - fmap f (Parser g) = Parser - (\x s0 -> case g x s0 of - (# s1, r #) -> case r of - (# e | #) -> (# s1, (# e | #) #) - (# | (# a, b, c #) #) -> (# s1, (# | (# f a, b, c #) #) #) - ) + {-# INLINE fmap #-} + fmap f (Parser g) = + Parser + ( \x s0 -> case g x s0 of + (# s1, r #) -> case r of + (# e | #) -> (# s1, (# e | #) #) + (# | (# a, b, c #) #) -> (# s1, (# | (# f a, b, c #) #) #) + ) indexLatinCharArray :: ByteArray -> Int -> Char -{-# inline indexLatinCharArray #-} +{-# INLINE indexLatinCharArray #-} indexLatinCharArray (ByteArray arr) (I# off) = C# (Exts.indexCharArray# arr off) uneffectful# :: (Bytes -> Result# e a) -> Parser e s a -{-# inline uneffectful# #-} -uneffectful# f = Parser - ( \b s0 -> (# s0, (f (boxBytes b)) #) ) +{-# INLINE uneffectful# #-} +uneffectful# f = + Parser + (\b s0 -> (# s0, (f (boxBytes b)) #)) -uneffectfulInt# :: (Bytes -> Result# e Int# ) -> Parser e s Int# -{-# inline uneffectfulInt# #-} -uneffectfulInt# f = Parser - ( \b s0 -> (# s0, (f (boxBytes b)) #) ) +uneffectfulInt# :: (Bytes -> Result# e Int#) -> Parser e s Int# +{-# INLINE uneffectfulInt# #-} +uneffectfulInt# f = + Parser + (\b s0 -> (# s0, (f (boxBytes b)) #)) upcastUnitSuccess :: (# Int#, Int# #) -> Result# e () -{-# inline upcastUnitSuccess #-} +{-# INLINE upcastUnitSuccess #-} upcastUnitSuccess (# b, c #) = (# | (# (), b, c #) #) swapArray16 :: Bytes -> ByteArray -swapArray16 (Bytes{array,offset,length}) = runByteArrayST $ do +swapArray16 (Bytes {array, offset, length}) = runByteArrayST $ do dst <- PM.newByteArray length - let go !ixSrc !ixDst !len = if len > 0 - then do - let v0 = PM.indexByteArray array ixSrc :: Word8 - v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 - PM.writeByteArray dst ixDst v1 - PM.writeByteArray dst (ixDst + 1) v0 - go (ixSrc + 2) (ixDst + 2) (len - 2) - else pure () + let go !ixSrc !ixDst !len = + if len > 0 + then do + let v0 = PM.indexByteArray array ixSrc :: Word8 + v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 + PM.writeByteArray dst ixDst v1 + PM.writeByteArray dst (ixDst + 1) v0 + go (ixSrc + 2) (ixDst + 2) (len - 2) + else pure () go offset 0 length PM.unsafeFreezeByteArray dst swapArray32 :: Bytes -> ByteArray -swapArray32 (Bytes{array,offset,length}) = runByteArrayST $ do +swapArray32 (Bytes {array, offset, length}) = runByteArrayST $ do dst <- PM.newByteArray length - let go !ixSrc !ixDst !len = if len > 0 - then do - let v0 = PM.indexByteArray array ixSrc :: Word8 - v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 - v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 - v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 - PM.writeByteArray dst ixDst v3 - PM.writeByteArray dst (ixDst + 1) v2 - PM.writeByteArray dst (ixDst + 2) v1 - PM.writeByteArray dst (ixDst + 3) v0 - go (ixSrc + 4) (ixDst + 4) (len - 4) - else pure () + let go !ixSrc !ixDst !len = + if len > 0 + then do + let v0 = PM.indexByteArray array ixSrc :: Word8 + v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 + v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 + v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 + PM.writeByteArray dst ixDst v3 + PM.writeByteArray dst (ixDst + 1) v2 + PM.writeByteArray dst (ixDst + 2) v1 + PM.writeByteArray dst (ixDst + 3) v0 + go (ixSrc + 4) (ixDst + 4) (len - 4) + else pure () go offset 0 length PM.unsafeFreezeByteArray dst swapArray64 :: Bytes -> ByteArray -swapArray64 (Bytes{array,offset,length}) = runByteArrayST $ do +swapArray64 (Bytes {array, offset, length}) = runByteArrayST $ do dst <- PM.newByteArray length - let go !ixSrc !ixDst !len = if len > 0 - then do - let v0 = PM.indexByteArray array ixSrc :: Word8 - v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 - v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 - v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 - v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 - v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 - v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 - v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 - PM.writeByteArray dst ixDst v7 - PM.writeByteArray dst (ixDst + 1) v6 - PM.writeByteArray dst (ixDst + 2) v5 - PM.writeByteArray dst (ixDst + 3) v4 - PM.writeByteArray dst (ixDst + 4) v3 - PM.writeByteArray dst (ixDst + 5) v2 - PM.writeByteArray dst (ixDst + 6) v1 - PM.writeByteArray dst (ixDst + 7) v0 - go (ixSrc + 8) (ixDst + 8) (len - 8) - else pure () + let go !ixSrc !ixDst !len = + if len > 0 + then do + let v0 = PM.indexByteArray array ixSrc :: Word8 + v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 + v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 + v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 + v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 + v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 + v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 + v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 + PM.writeByteArray dst ixDst v7 + PM.writeByteArray dst (ixDst + 1) v6 + PM.writeByteArray dst (ixDst + 2) v5 + PM.writeByteArray dst (ixDst + 3) v4 + PM.writeByteArray dst (ixDst + 4) v3 + PM.writeByteArray dst (ixDst + 5) v2 + PM.writeByteArray dst (ixDst + 6) v1 + PM.writeByteArray dst (ixDst + 7) v0 + go (ixSrc + 8) (ixDst + 8) (len - 8) + else pure () go offset 0 length PM.unsafeFreezeByteArray dst swapArray128 :: Bytes -> ByteArray -swapArray128 (Bytes{array,offset,length}) = runByteArrayST $ do +swapArray128 (Bytes {array, offset, length}) = runByteArrayST $ do dst <- PM.newByteArray length - let go !ixSrc !ixDst !len = if len > 0 - then do - let v0 = PM.indexByteArray array ixSrc :: Word8 - v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 - v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 - v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 - v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 - v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 - v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 - v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 - v8 = PM.indexByteArray array (ixSrc + 8) :: Word8 - v9 = PM.indexByteArray array (ixSrc + 9) :: Word8 - v10 = PM.indexByteArray array (ixSrc + 10) :: Word8 - v11 = PM.indexByteArray array (ixSrc + 11) :: Word8 - v12 = PM.indexByteArray array (ixSrc + 12) :: Word8 - v13 = PM.indexByteArray array (ixSrc + 13) :: Word8 - v14 = PM.indexByteArray array (ixSrc + 14) :: Word8 - v15 = PM.indexByteArray array (ixSrc + 15) :: Word8 - PM.writeByteArray dst ixDst v15 - PM.writeByteArray dst (ixDst + 1) v14 - PM.writeByteArray dst (ixDst + 2) v13 - PM.writeByteArray dst (ixDst + 3) v12 - PM.writeByteArray dst (ixDst + 4) v11 - PM.writeByteArray dst (ixDst + 5) v10 - PM.writeByteArray dst (ixDst + 6) v9 - PM.writeByteArray dst (ixDst + 7) v8 - PM.writeByteArray dst (ixDst + 8) v7 - PM.writeByteArray dst (ixDst + 9) v6 - PM.writeByteArray dst (ixDst + 10) v5 - PM.writeByteArray dst (ixDst + 11) v4 - PM.writeByteArray dst (ixDst + 12) v3 - PM.writeByteArray dst (ixDst + 13) v2 - PM.writeByteArray dst (ixDst + 14) v1 - PM.writeByteArray dst (ixDst + 15) v0 - go (ixSrc + 16) (ixDst + 16) (len - 16) - else pure () + let go !ixSrc !ixDst !len = + if len > 0 + then do + let v0 = PM.indexByteArray array ixSrc :: Word8 + v1 = PM.indexByteArray array (ixSrc + 1) :: Word8 + v2 = PM.indexByteArray array (ixSrc + 2) :: Word8 + v3 = PM.indexByteArray array (ixSrc + 3) :: Word8 + v4 = PM.indexByteArray array (ixSrc + 4) :: Word8 + v5 = PM.indexByteArray array (ixSrc + 5) :: Word8 + v6 = PM.indexByteArray array (ixSrc + 6) :: Word8 + v7 = PM.indexByteArray array (ixSrc + 7) :: Word8 + v8 = PM.indexByteArray array (ixSrc + 8) :: Word8 + v9 = PM.indexByteArray array (ixSrc + 9) :: Word8 + v10 = PM.indexByteArray array (ixSrc + 10) :: Word8 + v11 = PM.indexByteArray array (ixSrc + 11) :: Word8 + v12 = PM.indexByteArray array (ixSrc + 12) :: Word8 + v13 = PM.indexByteArray array (ixSrc + 13) :: Word8 + v14 = PM.indexByteArray array (ixSrc + 14) :: Word8 + v15 = PM.indexByteArray array (ixSrc + 15) :: Word8 + PM.writeByteArray dst ixDst v15 + PM.writeByteArray dst (ixDst + 1) v14 + PM.writeByteArray dst (ixDst + 2) v13 + PM.writeByteArray dst (ixDst + 3) v12 + PM.writeByteArray dst (ixDst + 4) v11 + PM.writeByteArray dst (ixDst + 5) v10 + PM.writeByteArray dst (ixDst + 6) v9 + PM.writeByteArray dst (ixDst + 7) v8 + PM.writeByteArray dst (ixDst + 8) v7 + PM.writeByteArray dst (ixDst + 9) v6 + PM.writeByteArray dst (ixDst + 10) v5 + PM.writeByteArray dst (ixDst + 11) v4 + PM.writeByteArray dst (ixDst + 12) v3 + PM.writeByteArray dst (ixDst + 13) v2 + PM.writeByteArray dst (ixDst + 14) v1 + PM.writeByteArray dst (ixDst + 15) v0 + go (ixSrc + 16) (ixDst + 16) (len - 16) + else pure () go offset 0 length PM.unsafeFreezeByteArray dst swapArray256 :: Bytes -> ByteArray -swapArray256 (Bytes{array,offset,length}) = runByteArrayST $ do +swapArray256 (Bytes {array, offset, length}) = runByteArrayST $ do dst <- PM.newByteArray length - let go !ixSrc !ixDst !len = if len > 0 - then do - let loop !i - | i < 32 = do - let v = PM.indexByteArray array (ixSrc + i) :: Word8 - PM.writeByteArray dst (ixDst + (31 - i)) v - loop (i + 1) - | otherwise = pure () - loop 0 - go (ixSrc + 32) (ixDst + 32) (len - 32) - else pure () + let go !ixSrc !ixDst !len = + if len > 0 + then do + let loop !i + | i < 32 = do + let v = PM.indexByteArray array (ixSrc + i) :: Word8 + PM.writeByteArray dst (ixDst + (31 - i)) v + loop (i + 1) + | otherwise = pure () + loop 0 + go (ixSrc + 32) (ixDst + 32) (len - 32) + else pure () go offset 0 length PM.unsafeFreezeByteArray dst pureParser :: a -> Parser e s a -{-# inline pureParser #-} -pureParser a = Parser - (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) +{-# INLINE pureParser #-} +pureParser a = + Parser + (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindParser #-} -bindParser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) +{-# INLINE bindParser #-} +bindParser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) diff --git a/src/Data/Bytes/Parser/Leb128.hs b/src/Data/Bytes/Parser/Leb128.hs index a7156b6..04f4867 100644 --- a/src/Data/Bytes/Parser/Leb128.hs +++ b/src/Data/Bytes/Parser/Leb128.hs @@ -1,66 +1,73 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE TypeApplications #-} --- | Parse numbers that have been encoded with . --- LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this --- module will fail if the number they attempt to parse is outside the --- range of what their target type can handle. The parsers for signed --- numbers assume that the numbers have been --- . +{- | Parse numbers that have been encoded with . +LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this +module will fail if the number they attempt to parse is outside the +range of what their target type can handle. The parsers for signed +numbers assume that the numbers have been +. +-} module Data.Bytes.Parser.Leb128 ( -- * Unsigned word16 , word32 , word64 + -- * Signed (Zig-zag) , int16 , int32 , int64 ) where -import Data.Bits (testBit,(.&.),unsafeShiftR,xor,complement) -import Data.Bits (unsafeShiftL,(.|.)) +import Data.Bits (complement, testBit, unsafeShiftL, unsafeShiftR, xor, (.&.), (.|.)) import Data.Bytes.Parser (Parser) -import Data.Int (Int16,Int32,Int64) -import Data.Word (Word8,Word16,Word32,Word64) +import Data.Int (Int16, Int32, Int64) +import Data.Word (Word16, Word32, Word64, Word8) import qualified Data.Bytes.Parser as P --- | Parse a LEB-128-encoded number. If the number is larger --- than @0xFFFF@, fails with the provided error. +{- | Parse a LEB-128-encoded number. If the number is larger +than @0xFFFF@, fails with the provided error. +-} word16 :: e -> Parser e s Word16 word16 e = do w <- stepBoundedWord e 16 0 0 pure (fromIntegral @Word64 @Word16 w) --- | Parse a LEB-128-encoded number. If the number is larger --- than @0xFFFFFFFF@, fails with the provided error. +{- | Parse a LEB-128-encoded number. If the number is larger +than @0xFFFFFFFF@, fails with the provided error. +-} word32 :: e -> Parser e s Word32 word32 e = do w <- stepBoundedWord e 32 0 0 pure (fromIntegral @Word64 @Word32 w) --- | Parse a LEB-128-encoded number. If the number is larger --- than @0xFFFFFFFFFFFFFFFF@, fails with the provided error. +{- | Parse a LEB-128-encoded number. If the number is larger +than @0xFFFFFFFFFFFFFFFF@, fails with the provided error. +-} word64 :: e -> Parser e s Word64 word64 e = stepBoundedWord e 64 0 0 --- | Parse a LEB-128-zigzag-encoded signed number. If the encoded --- number is outside the range @[-32768,32767]@, this fails with --- the provided error. +{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded +number is outside the range @[-32768,32767]@, this fails with +the provided error. +-} int16 :: e -> Parser e s Int16 int16 = fmap zigzagDecode16 . word16 --- | Parse a LEB-128-zigzag-encoded signed number. If the encoded --- number is outside the range @[-2147483648,2147483647]@, this --- fails with the provided error. +{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded +number is outside the range @[-2147483648,2147483647]@, this +fails with the provided error. +-} int32 :: e -> Parser e s Int32 int32 = fmap zigzagDecode32 . word32 --- | Parse a LEB-128-zigzag-encoded signed number. If the encoded --- number is outside the range @[-9223372036854775808,9223372036854775807]@, --- this fails with the provided error. +{- | Parse a LEB-128-zigzag-encoded signed number. If the encoded +number is outside the range @[-9223372036854775808,9223372036854775807]@, +this fails with the provided error. +-} int64 :: e -> Parser e s Int64 int64 = fmap zigzagDecode64 . word64 @@ -72,16 +79,19 @@ stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64 stepBoundedWord e !bitLimit !acc0 !accShift = do raw <- P.any e let number = raw .&. 0x7F - acc1 = acc0 .|. - unsafeShiftL (fromIntegral @Word8 @Word64 number) accShift + acc1 = + acc0 + .|. unsafeShiftL (fromIntegral @Word8 @Word64 number) accShift accShift' = accShift + 7 if accShift' <= bitLimit - then if testBit raw 7 - then stepBoundedWord e bitLimit acc1 accShift' - else pure acc1 - else if fromIntegral @Word8 @Word raw < twoExp (bitLimit - accShift) - then pure acc1 -- TODO: no need to mask upper bit in number - else P.fail e + then + if testBit raw 7 + then stepBoundedWord e bitLimit acc1 accShift' + else pure acc1 + else + if fromIntegral @Word8 @Word raw < twoExp (bitLimit - accShift) + then pure acc1 -- TODO: no need to mask upper bit in number + else P.fail e twoExp :: Int -> Word twoExp x = unsafeShiftL 1 x diff --git a/src/Data/Bytes/Parser/LittleEndian.hs b/src/Data/Bytes/Parser/LittleEndian.hs index ac8b5cd..24040cb 100644 --- a/src/Data/Bytes/Parser/LittleEndian.hs +++ b/src/Data/Bytes/Parser/LittleEndian.hs @@ -1,24 +1,19 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language DeriveFunctor #-} -{-# language DerivingStrategies #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} -{-# language TypeApplications #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedSums #-} -- | Little-endian fixed-width numbers. module Data.Bytes.Parser.LittleEndian ( -- * One + -- ** Unsigned word8 , word16 @@ -26,36 +21,40 @@ module Data.Bytes.Parser.LittleEndian , word64 , word128 , word256 + -- ** Signed , int8 , int16 , int32 , int64 + -- * Many + -- ** Unsigned , word16Array , word32Array , word64Array , word128Array , word256Array + -- ** Unsigned , int64Array ) where -import Prelude hiding (length,any,fail,takeWhile) +import Prelude hiding (any, fail, length, takeWhile) +#if MIN_VERSION_base(4,18,0) +#else import Control.Applicative (liftA2) -import Data.Bits ((.|.),unsafeShiftL) -import Data.Primitive (ByteArray(..),PrimArray(..)) -import Data.Bytes.Types (Bytes(..)) -import Data.Bytes.Parser.Internal (Parser,uneffectful) -import Data.Bytes.Parser.Internal (Result(..)) -import Data.Bytes.Parser.Internal (swapArray16,swapArray32) -import Data.Bytes.Parser.Internal (swapArray64,swapArray128,swapArray256) -import Data.Word (Word8,Word16,Word32,Word64) -import Data.Int (Int8,Int16,Int32,Int64) -import Data.WideWord (Word128(Word128),Word256(Word256)) -import GHC.ByteOrder (ByteOrder(LittleEndian,BigEndian),targetByteOrder) +#endif +import Data.Bits (unsafeShiftL, (.|.)) +import Data.Bytes.Parser.Internal (Parser, Result (..), swapArray128, swapArray16, swapArray256, swapArray32, swapArray64, uneffectful) +import Data.Bytes.Types (Bytes (..)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Primitive (ByteArray (..), PrimArray (..)) +import Data.WideWord (Word128 (Word128), Word256 (Word256)) +import Data.Word (Word16, Word32, Word64, Word8) +import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian), targetByteOrder) import qualified Data.Bytes as Bytes import qualified Data.Bytes.Parser as P @@ -65,14 +64,18 @@ import qualified Data.Primitive as PM word8 :: e -> Parser e s Word8 word8 = P.any --- | Array of little-endian unsigned 16-bit words. If the host is --- little-endian, the implementation is optimized to simply @memcpy@ --- bytes into the result array. The result array always has elements --- in native-endian byte order. +{- | Array of little-endian unsigned 16-bit words. If the host is +little-endian, the implementation is optimized to simply @memcpy@ +bytes into the result array. The result array always has elements +in native-endian byte order. +-} word16Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 16-bit words to expect - -> Parser e s (PrimArray Word16) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 16-bit words to expect + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word16) word16Array e !n = case targetByteOrder of LittleEndian -> fmap (asWord16s . Bytes.toByteArrayClone) (P.take e (n * 2)) BigEndian -> do @@ -82,9 +85,12 @@ word16Array e !n = case targetByteOrder of -- | Parse an array of little-endian unsigned 32-bit words. word32Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 32-bit words to consume - -> Parser e s (PrimArray Word32) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 32-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word32) word32Array e !n = case targetByteOrder of LittleEndian -> fmap (asWord32s . Bytes.toByteArrayClone) (P.take e (n * 4)) BigEndian -> do @@ -94,9 +100,12 @@ word32Array e !n = case targetByteOrder of -- | Parse an array of little-endian unsigned 64-bit words. word64Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 64-bit words to consume - -> Parser e s (PrimArray Word64) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 64-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word64) word64Array e !n = case targetByteOrder of LittleEndian -> fmap (asWord64s . Bytes.toByteArrayClone) (P.take e (n * 8)) BigEndian -> do @@ -106,9 +115,12 @@ word64Array e !n = case targetByteOrder of -- | Parse an array of little-endian unsigned 128-bit words. word128Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 128-bit words to consume - -> Parser e s (PrimArray Word128) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 128-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word128) word128Array e !n = case targetByteOrder of LittleEndian -> fmap (asWord128s . Bytes.toByteArrayClone) (P.take e (n * 16)) BigEndian -> do @@ -118,9 +130,12 @@ word128Array e !n = case targetByteOrder of -- | Parse an array of little-endian unsigned 256-bit words. word256Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 256-bit words to consume - -> Parser e s (PrimArray Word256) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 256-bit words to consume + Int -> + -- | Native-endian elements + Parser e s (PrimArray Word256) word256Array e !n = case targetByteOrder of LittleEndian -> fmap (asWord256s . Bytes.toByteArrayClone) (P.take e (n * 32)) BigEndian -> do @@ -130,9 +145,12 @@ word256Array e !n = case targetByteOrder of -- | Parse an array of little-endian signed 64-bit words. int64Array :: - e -- ^ Error message if not enough bytes are present - -> Int -- ^ Number of little-endian 64-bit words to expect - -> Parser e s (PrimArray Int64) -- ^ Native-endian elements + -- | Error message if not enough bytes are present + e -> + -- | Number of little-endian 64-bit words to expect + Int -> + -- | Native-endian elements + Parser e s (PrimArray Int64) int64Array e !n = do PrimArray x <- word64Array e n pure (PrimArray x) @@ -154,58 +172,64 @@ asWord256s (ByteArray x) = PrimArray x -- | Unsigned 16-bit word. word16 :: e -> Parser e s Word16 -word16 e = uneffectful $ \chunk -> if length chunk >= 2 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - in Success - (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wb) 8 .|. fromIntegral wa)) - (offset chunk + 2) (length chunk - 2) - else Failure e +word16 e = uneffectful $ \chunk -> + if length chunk >= 2 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + in Success + (fromIntegral @Word @Word16 (unsafeShiftL (fromIntegral wb) 8 .|. fromIntegral wa)) + (offset chunk + 2) + (length chunk - 2) + else Failure e -- | Unsigned 32-bit word. word32 :: e -> Parser e s Word32 -word32 e = uneffectful $ \chunk -> if length chunk >= 4 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 - wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 - in Success - (fromIntegral @Word @Word32 - ( unsafeShiftL (fromIntegral wd) 24 .|. - unsafeShiftL (fromIntegral wc) 16 .|. - unsafeShiftL (fromIntegral wb) 8 .|. - fromIntegral wa +word32 e = uneffectful $ \chunk -> + if length chunk >= 4 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 + wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 + in Success + ( fromIntegral @Word @Word32 + ( unsafeShiftL (fromIntegral wd) 24 + .|. unsafeShiftL (fromIntegral wc) 16 + .|. unsafeShiftL (fromIntegral wb) 8 + .|. fromIntegral wa + ) ) - ) - (offset chunk + 4) (length chunk - 4) - else Failure e + (offset chunk + 4) + (length chunk - 4) + else Failure e -- | Unsigned 64-bit word. word64 :: e -> Parser e s Word64 -word64 e = uneffectful $ \chunk -> if length chunk >= 8 - then - let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 - wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 - wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 - wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 - we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 - wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 - wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 - wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 - in Success - ( unsafeShiftL (fromIntegral wh) 56 .|. - unsafeShiftL (fromIntegral wg) 48 .|. - unsafeShiftL (fromIntegral wf) 40 .|. - unsafeShiftL (fromIntegral we) 32 .|. - unsafeShiftL (fromIntegral wd) 24 .|. - unsafeShiftL (fromIntegral wc) 16 .|. - unsafeShiftL (fromIntegral wb) 8 .|. - fromIntegral wa - ) - (offset chunk + 8) (length chunk - 8) - else Failure e +word64 e = uneffectful $ \chunk -> + if length chunk >= 8 + then + let wa = PM.indexByteArray (array chunk) (offset chunk) :: Word8 + wb = PM.indexByteArray (array chunk) (offset chunk + 1) :: Word8 + wc = PM.indexByteArray (array chunk) (offset chunk + 2) :: Word8 + wd = PM.indexByteArray (array chunk) (offset chunk + 3) :: Word8 + we = PM.indexByteArray (array chunk) (offset chunk + 4) :: Word8 + wf = PM.indexByteArray (array chunk) (offset chunk + 5) :: Word8 + wg = PM.indexByteArray (array chunk) (offset chunk + 6) :: Word8 + wh = PM.indexByteArray (array chunk) (offset chunk + 7) :: Word8 + in Success + ( unsafeShiftL (fromIntegral wh) 56 + .|. unsafeShiftL (fromIntegral wg) 48 + .|. unsafeShiftL (fromIntegral wf) 40 + .|. unsafeShiftL (fromIntegral we) 32 + .|. unsafeShiftL (fromIntegral wd) 24 + .|. unsafeShiftL (fromIntegral wc) 16 + .|. unsafeShiftL (fromIntegral wb) 8 + .|. fromIntegral wa + ) + (offset chunk + 8) + (length chunk - 8) + else Failure e -- | Unsigned 256-bit word. word256 :: e -> Parser e s Word256 diff --git a/src/Data/Bytes/Parser/Rebindable.hs b/src/Data/Bytes/Parser/Rebindable.hs index ac29782..22edd43 100644 --- a/src/Data/Bytes/Parser/Rebindable.hs +++ b/src/Data/Bytes/Parser/Rebindable.hs @@ -1,31 +1,30 @@ -{-# language CPP #-} -{-# language DataKinds #-} -{-# language FlexibleInstances #-} -{-# language MagicHash #-} -{-# language MultiParamTypeClasses #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} - --- | Provides levity-polymorphic variants of @>>=@, @>>@, and @pure@ --- used to assemble parsers whose result types are unlifted. This --- cannot be used with the @RebindableSyntax@ extension because that --- extension disallows representations other than @LiftedRep@. Consequently, --- users of this module must manually desugar do notation. See the --- @url-bytes@ library for an example of this module in action. --- --- Only resort to the functions in this module after checking that --- GHC is unable to optimize away @I#@ and friends in your code. +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} + +{- | Provides levity-polymorphic variants of @>>=@, @>>@, and @pure@ +used to assemble parsers whose result types are unlifted. This +cannot be used with the @RebindableSyntax@ extension because that +extension disallows representations other than @LiftedRep@. Consequently, +users of this module must manually desugar do notation. See the +@url-bytes@ library for an example of this module in action. + +Only resort to the functions in this module after checking that +GHC is unable to optimize away @I#@ and friends in your code. +-} module Data.Bytes.Parser.Rebindable - ( Bind(..) - , Pure(..) + ( Bind (..) + , Pure (..) ) where +import Data.Bytes.Parser.Internal (Parser (..)) +import GHC.Exts (RuntimeRep (..), TYPE) import Prelude () -import GHC.Exts (TYPE,RuntimeRep(..)) -import Data.Bytes.Parser.Internal (Parser(..)) #if MIN_VERSION_base(4,16,0) import GHC.Exts (LiftedRep) @@ -34,411 +33,506 @@ type LiftedRep = 'LiftedRep #endif class Bind (ra :: RuntimeRep) (rb :: RuntimeRep) where - (>>=) :: forall e s (a :: TYPE ra) (b :: TYPE rb). - Parser e s a -> (a -> Parser e s b) -> Parser e s b - (>>) :: forall e s (a :: TYPE ra) (b :: TYPE rb). - Parser e s a -> Parser e s b -> Parser e s b + (>>=) :: + forall e s (a :: TYPE ra) (b :: TYPE rb). + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b + (>>) :: + forall e s (a :: TYPE ra) (b :: TYPE rb). + Parser e s a -> + Parser e s b -> + Parser e s b class Pure (ra :: RuntimeRep) where pure :: forall e s (a :: TYPE ra). a -> Parser e s a pureParser :: a -> Parser e s a -{-# inline pureParser #-} -pureParser a = Parser - (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) +{-# INLINE pureParser #-} +pureParser a = + Parser + (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) bindParser :: Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindParser #-} -bindParser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) +{-# INLINE bindParser #-} +bindParser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceParser :: Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceParser #-} -sequenceParser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - -pureIntParser :: forall (a :: TYPE 'IntRep) e s. - a -> Parser e s a -{-# inline pureIntParser #-} -pureIntParser a = Parser - (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) - -bindIntParser :: forall (a :: TYPE 'IntRep) e s b. - Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindIntParser #-} -bindIntParser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) - -bindWordParser :: forall (a :: TYPE 'WordRep) e s b. - Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindWordParser #-} -bindWordParser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) - -sequenceIntParser :: forall (a :: TYPE 'IntRep) e s b. - Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceIntParser #-} -sequenceIntParser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - -sequenceWordParser :: forall (a :: TYPE 'WordRep) e s b. - Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceWordParser #-} -sequenceWordParser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - -pureIntPairParser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s. - a -> Parser e s a -{-# inline pureIntPairParser #-} -pureIntPairParser a = Parser - (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) - -bindIntPairParser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. - Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindIntPairParser #-} -bindIntPairParser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) - -pureInt5Parser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s. - a -> Parser e s a -{-# inline pureInt5Parser #-} -pureInt5Parser a = Parser - (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) - -bindInt5Parser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. - Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindInt5Parser #-} -bindInt5Parser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) - -sequenceInt5Parser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. - Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceInt5Parser #-} -sequenceInt5Parser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - -sequenceIntPairParser :: forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. - Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceIntPairParser #-} -sequenceIntPairParser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - -bindInt2to5Parser :: forall - (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) - e s. - Parser e s a -> (a -> Parser e s b) -> Parser e s b -{-# inline bindInt2to5Parser #-} -bindInt2to5Parser (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) - -sequenceInt2to5Parser :: forall - (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) - e s. - Parser e s a -> Parser e s b -> Parser e s b -{-# inline sequenceInt2to5Parser #-} -sequenceInt2to5Parser (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) +{-# INLINE sequenceParser #-} +sequenceParser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) + +pureIntParser :: + forall (a :: TYPE 'IntRep) e s. + a -> + Parser e s a +{-# INLINE pureIntParser #-} +pureIntParser a = + Parser + (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) + +bindIntParser :: + forall (a :: TYPE 'IntRep) e s b. + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b +{-# INLINE bindIntParser #-} +bindIntParser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) + +bindWordParser :: + forall (a :: TYPE 'WordRep) e s b. + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b +{-# INLINE bindWordParser #-} +bindWordParser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) + +sequenceIntParser :: + forall (a :: TYPE 'IntRep) e s b. + Parser e s a -> + Parser e s b -> + Parser e s b +{-# INLINE sequenceIntParser #-} +sequenceIntParser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) + +sequenceWordParser :: + forall (a :: TYPE 'WordRep) e s b. + Parser e s a -> + Parser e s b -> + Parser e s b +{-# INLINE sequenceWordParser #-} +sequenceWordParser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) + +pureIntPairParser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s. + a -> + Parser e s a +{-# INLINE pureIntPairParser #-} +pureIntPairParser a = + Parser + (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) + +bindIntPairParser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b +{-# INLINE bindIntPairParser #-} +bindIntPairParser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) + +pureInt5Parser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s. + a -> + Parser e s a +{-# INLINE pureInt5Parser #-} +pureInt5Parser a = + Parser + (\(# _, b, c #) s -> (# s, (# | (# a, b, c #) #) #)) + +bindInt5Parser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b +{-# INLINE bindInt5Parser #-} +bindInt5Parser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) + +sequenceInt5Parser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) e s b. + Parser e s a -> + Parser e s b -> + Parser e s b +{-# INLINE sequenceInt5Parser #-} +sequenceInt5Parser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) + +sequenceIntPairParser :: + forall (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) e s b. + Parser e s a -> + Parser e s b -> + Parser e s b +{-# INLINE sequenceIntPairParser #-} +sequenceIntPairParser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) + +bindInt2to5Parser :: + forall + (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) + e + s. + Parser e s a -> + (a -> Parser e s b) -> + Parser e s b +{-# INLINE bindInt2to5Parser #-} +bindInt2to5Parser (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) + +sequenceInt2to5Parser :: + forall + (a :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])) + e + s. + Parser e s a -> + Parser e s b -> + Parser e s b +{-# INLINE sequenceInt2to5Parser #-} +sequenceInt2to5Parser (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) instance Bind LiftedRep LiftedRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindParser (>>) = sequenceParser instance Bind 'WordRep LiftedRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindWordParser (>>) = sequenceWordParser instance Bind 'IntRep LiftedRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindIntParser (>>) = sequenceIntParser instance Bind ('TupleRep '[ 'IntRep, 'IntRep]) LiftedRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindIntPairParser (>>) = sequenceIntPairParser - -instance Bind ('TupleRep '[ 'IntRep, 'IntRep]) - ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) +instance + Bind + ('TupleRep '[ 'IntRep, 'IntRep]) + ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindInt2to5Parser (>>) = sequenceInt2to5Parser -instance Bind ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) - LiftedRep +instance + Bind + ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) + LiftedRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindInt5Parser (>>) = sequenceInt5Parser - -instance Bind 'IntRep - ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) +instance + Bind + 'IntRep + ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindFromIntToInt5 (>>) = sequenceIntToInt5 instance Bind LiftedRep ('TupleRep '[ 'IntRep, 'IntRep]) where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindFromLiftedToIntPair (>>) = sequenceLiftedToIntPair -instance Bind LiftedRep - ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) +instance + Bind + LiftedRep + ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindFromLiftedToInt5 (>>) = sequenceLiftedToInt5 instance Bind 'IntRep ('TupleRep '[ 'IntRep, 'IntRep]) where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindFromIntToIntPair (>>) = sequenceIntToIntPair instance Bind LiftedRep 'IntRep where - {-# inline (>>=) #-} - {-# inline (>>) #-} + {-# INLINE (>>=) #-} + {-# INLINE (>>) #-} (>>=) = bindFromLiftedToInt (>>) = sequenceLiftedToInt instance Pure LiftedRep where - {-# inline pure #-} + {-# INLINE pure #-} pure = pureParser instance Pure 'IntRep where - {-# inline pure #-} + {-# INLINE pure #-} pure = pureIntParser instance Pure ('TupleRep '[ 'IntRep, 'IntRep]) where - {-# inline pure #-} + {-# INLINE pure #-} pure = pureIntPairParser instance Pure ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep]) where - {-# inline pure #-} + {-# INLINE pure #-} pure = pureInt5Parser bindFromIntToIntPair :: - forall s e - (a :: TYPE 'IntRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep ])). - Parser s e a - -> (a -> Parser s e b) - -> Parser s e b -{-# inline bindFromIntToIntPair #-} -bindFromIntToIntPair (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE 'IntRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). + Parser s e a -> + (a -> Parser s e b) -> + Parser s e b +{-# INLINE bindFromIntToIntPair #-} +bindFromIntToIntPair (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceIntToIntPair :: - forall s e - (a :: TYPE 'IntRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep ])). - Parser s e a - -> Parser s e b - -> Parser s e b -{-# inline sequenceIntToIntPair #-} -sequenceIntToIntPair (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE 'IntRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). + Parser s e a -> + Parser s e b -> + Parser s e b +{-# INLINE sequenceIntToIntPair #-} +sequenceIntToIntPair (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) bindFromIntToInt5 :: - forall s e - (a :: TYPE 'IntRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep ])). - Parser s e a - -> (a -> Parser s e b) - -> Parser s e b -{-# inline bindFromIntToInt5 #-} -bindFromIntToInt5 (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE 'IntRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). + Parser s e a -> + (a -> Parser s e b) -> + Parser s e b +{-# INLINE bindFromIntToInt5 #-} +bindFromIntToInt5 (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceIntToInt5 :: - forall s e - (a :: TYPE 'IntRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep ])). - Parser s e a - -> Parser s e b - -> Parser s e b -{-# inline sequenceIntToInt5 #-} -sequenceIntToInt5 (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE 'IntRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). + Parser s e a -> + Parser s e b -> + Parser s e b +{-# INLINE sequenceIntToInt5 #-} +sequenceIntToInt5 (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) bindFromLiftedToIntPair :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep ])). - Parser s e a - -> (a -> Parser s e b) - -> Parser s e b -{-# inline bindFromLiftedToIntPair #-} -bindFromLiftedToIntPair (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). + Parser s e a -> + (a -> Parser s e b) -> + Parser s e b +{-# INLINE bindFromLiftedToIntPair #-} +bindFromLiftedToIntPair (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceLiftedToIntPair :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep ])). - Parser s e a - -> Parser s e b - -> Parser s e b -{-# inline sequenceLiftedToIntPair #-} -sequenceLiftedToIntPair (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) - + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep])). + Parser s e a -> + Parser s e b -> + Parser s e b +{-# INLINE sequenceLiftedToIntPair #-} +sequenceLiftedToIntPair (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) bindFromLiftedToInt5 :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). - Parser s e a - -> (a -> Parser s e b) - -> Parser s e b -{-# inline bindFromLiftedToInt5 #-} -bindFromLiftedToInt5 (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). + Parser s e a -> + (a -> Parser s e b) -> + Parser s e b +{-# INLINE bindFromLiftedToInt5 #-} +bindFromLiftedToInt5 (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceLiftedToInt5 :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep ])). - Parser s e a - -> Parser s e b - -> Parser s e b -{-# inline sequenceLiftedToInt5 #-} -sequenceLiftedToInt5 (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE ('TupleRep '[ 'IntRep, 'IntRep, 'IntRep, 'IntRep, 'IntRep])). + Parser s e a -> + Parser s e b -> + Parser s e b +{-# INLINE sequenceLiftedToInt5 #-} +sequenceLiftedToInt5 (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) bindFromLiftedToInt :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE 'IntRep). - Parser s e a - -> (a -> Parser s e b) - -> Parser s e b -{-# inline bindFromLiftedToInt #-} -bindFromLiftedToInt (Parser f) g = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# y, b, c #) #) -> - runParser (g y) (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE 'IntRep). + Parser s e a -> + (a -> Parser s e b) -> + Parser s e b +{-# INLINE bindFromLiftedToInt #-} +bindFromLiftedToInt (Parser f) g = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# y, b, c #) #) -> + runParser (g y) (# arr, b, c #) s1 + ) sequenceLiftedToInt :: - forall s e - (a :: TYPE LiftedRep) - (b :: TYPE 'IntRep). - Parser s e a - -> Parser s e b - -> Parser s e b -{-# inline sequenceLiftedToInt #-} -sequenceLiftedToInt (Parser f) (Parser g) = Parser - (\x@(# arr, _, _ #) s0 -> case f x s0 of - (# s1, r0 #) -> case r0 of - (# e | #) -> (# s1, (# e | #) #) - (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 - ) + forall + s + e + (a :: TYPE LiftedRep) + (b :: TYPE 'IntRep). + Parser s e a -> + Parser s e b -> + Parser s e b +{-# INLINE sequenceLiftedToInt #-} +sequenceLiftedToInt (Parser f) (Parser g) = + Parser + ( \x@(# arr, _, _ #) s0 -> case f x s0 of + (# s1, r0 #) -> case r0 of + (# e | #) -> (# s1, (# e | #) #) + (# | (# _, b, c #) #) -> g (# arr, b, c #) s1 + ) diff --git a/src/Data/Bytes/Parser/Types.hs b/src/Data/Bytes/Parser/Types.hs index 6e5850b..319a873 100644 --- a/src/Data/Bytes/Parser/Types.hs +++ b/src/Data/Bytes/Parser/Types.hs @@ -1,41 +1,43 @@ -{-# language DeriveFunctor #-} -{-# language DeriveFoldable #-} -{-# language DerivingStrategies #-} +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DerivingStrategies #-} module Data.Bytes.Parser.Types ( Parser - , Result(..) - , Slice(..) + , Result (..) + , Slice (..) ) where -import Data.Bytes.Parser.Internal (Parser(..)) +import Data.Bytes.Parser.Internal (Parser (..)) -- | The result of running a parser. data Result e a - = Failure e - -- ^ An error message indicating what went wrong. - | Success {-# UNPACK #-} !(Slice a) - -- ^ The parsed value and the number of bytes + = -- | An error message indicating what went wrong. + Failure e + | -- | The parsed value and the number of bytes -- remaining in parsed slice. - deriving stock (Eq,Show,Foldable,Functor) + Success {-# UNPACK #-} !(Slice a) + deriving stock (Eq, Show, Foldable, Functor) --- | Slicing metadata (an offset and a length) accompanied --- by a value. This does not represent a slice into the --- value. This type is intended to be used as the result --- of an executed parser. In this context the slicing metadata --- describe a slice into to the array (or byte array) that --- from which the value was parsed. --- --- It is often useful to check the @length@ when a parser --- succeeds since a non-zero length indicates that there --- was additional unconsumed input. The @offset@ is only --- ever needed to construct a new slice (via @Bytes@ or --- @SmallVector@) from the remaining input. +{- | Slicing metadata (an offset and a length) accompanied +by a value. This does not represent a slice into the +value. This type is intended to be used as the result +of an executed parser. In this context the slicing metadata +describe a slice into to the array (or byte array) that +from which the value was parsed. + +It is often useful to check the @length@ when a parser +succeeds since a non-zero length indicates that there +was additional unconsumed input. The @offset@ is only +ever needed to construct a new slice (via @Bytes@ or +@SmallVector@) from the remaining input. +-} data Slice a = Slice { offset :: {-# UNPACK #-} !Int - -- ^ Offset into the array. + -- ^ Offset into the array. , length :: {-# UNPACK #-} !Int - -- ^ Length of the slice. + -- ^ Length of the slice. , value :: a - -- ^ The structured data that was successfully parsed. - } deriving stock (Eq,Show,Foldable,Functor) + -- ^ The structured data that was successfully parsed. + } + deriving stock (Eq, Show, Foldable, Functor) diff --git a/src/Data/Bytes/Parser/Unsafe.hs b/src/Data/Bytes/Parser/Unsafe.hs index f629380..c5aa098 100644 --- a/src/Data/Bytes/Parser/Unsafe.hs +++ b/src/Data/Bytes/Parser/Unsafe.hs @@ -1,28 +1,22 @@ -{-# language BangPatterns #-} -{-# language BinaryLiterals #-} -{-# language DataKinds #-} -{-# language DeriveFunctor #-} -{-# language DerivingStrategies #-} -{-# language DuplicateRecordFields #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language NamedFieldPuns #-} -{-# language PolyKinds #-} -{-# language RankNTypes #-} -{-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} -{-# language TypeApplications #-} -{-# language UnboxedSums #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnboxedTuples #-} --- | Everything in this module is unsafe and can lead to --- nondeterministic output or segfaults if used incorrectly. +{- | Everything in this module is unsafe and can lead to +nondeterministic output or segfaults if used incorrectly. +-} module Data.Bytes.Parser.Unsafe ( -- * Types - Parser(..) + Parser (..) + -- * Functions , cursor , cursor# @@ -34,41 +28,43 @@ module Data.Bytes.Parser.Unsafe import Prelude hiding (length) -import Data.Bytes.Parser.Internal (Parser(..),uneffectful,uneffectfulInt#) -import Data.Bytes.Parser.Internal (Result(..)) -import Data.Bytes.Types (Bytes(..)) +import Data.Bytes.Parser.Internal (Parser (..), Result (..), uneffectful, uneffectfulInt#) +import Data.Bytes.Types (Bytes (..)) import Data.Primitive (ByteArray) -import GHC.Exts (Int#,Int(I#)) - +import GHC.Exts (Int (I#), Int#) --- | Get the current offset into the chunk. Using this makes --- it possible to observe the internal difference between 'Bytes' --- that refer to equivalent slices. Be careful. +{- | Get the current offset into the chunk. Using this makes +it possible to observe the internal difference between 'Bytes' +that refer to equivalent slices. Be careful. +-} cursor :: Parser e s Int -cursor = uneffectful $ \Bytes{offset,length} -> +cursor = uneffectful $ \Bytes {offset, length} -> Success offset offset length -- | Variant of 'cursor' with unboxed result. cursor# :: Parser e s Int# -cursor# = uneffectfulInt# $ \Bytes{offset=I# off,length=I# len} -> (# | (# off, off, len #) #) +cursor# = uneffectfulInt# $ \Bytes {offset = I# off, length = I# len} -> (# | (# off, off, len #) #) --- | Return the byte array being parsed. This includes bytes --- that preceed the current offset and may include bytes that --- go beyond the length. This is somewhat dangerous, so only --- use this is you know what you're doing. +{- | Return the byte array being parsed. This includes bytes +that preceed the current offset and may include bytes that +go beyond the length. This is somewhat dangerous, so only +use this is you know what you're doing. +-} expose :: Parser e s ByteArray -expose = uneffectful $ \Bytes{length,offset,array} -> +expose = uneffectful $ \Bytes {length, offset, array} -> Success array offset length --- | Move the cursor back by @n@ bytes. Precondition: you --- must have previously consumed at least @n@ bytes. +{- | Move the cursor back by @n@ bytes. Precondition: you +must have previously consumed at least @n@ bytes. +-} unconsume :: Int -> Parser e s () -unconsume n = uneffectful $ \Bytes{length,offset} -> +unconsume n = uneffectful $ \Bytes {length, offset} -> Success () (offset - n) (length + n) --- | Set the position to the given index. Precondition: the index --- must be valid. It should be the result of an earlier call to --- 'cursor'. +{- | Set the position to the given index. Precondition: the index +must be valid. It should be the result of an earlier call to +'cursor'. +-} jump :: Int -> Parser e s () -jump ix = uneffectful $ \(Bytes{length,offset}) -> +jump ix = uneffectful $ \(Bytes {length, offset}) -> Success () ix (length + (offset - ix)) diff --git a/src/Data/Bytes/Parser/Utf8.hs b/src/Data/Bytes/Parser/Utf8.hs index 9bfbb72..d182636 100644 --- a/src/Data/Bytes/Parser/Utf8.hs +++ b/src/Data/Bytes/Parser/Utf8.hs @@ -1,19 +1,14 @@ {-# language BangPatterns #-} {-# language BinaryLiterals #-} {-# language DataKinds #-} -{-# language DeriveFunctor #-} {-# language DerivingStrategies #-} {-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language LambdaCase #-} {-# language MagicHash #-} {-# language MultiWayIf #-} {-# language PolyKinds #-} {-# language RankNTypes #-} {-# language ScopedTypeVariables #-} -{-# language StandaloneDeriving #-} {-# language TypeApplications #-} -{-# language UnboxedSums #-} {-# language UnboxedTuples #-} {-# language CPP #-} diff --git a/test/Main.hs b/test/Main.hs index b8f65d2..70fd90c 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,31 +1,32 @@ -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language MagicHash #-} -{-# language MultiWayIf #-} -{-# language NumDecimals #-} -{-# language OverloadedStrings #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} - +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -fno-warn-orphans #-} +#if MIN_VERSION_base(4,18,0) +#else import Control.Applicative (liftA2) +#endif import Control.Monad (replicateM) import Control.Monad.ST (runST) -import Data.Bytes.Parser (Slice(Slice)) -import Data.Bytes.Types (Bytes(Bytes)) +import Data.Bytes.Parser (Slice (Slice)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) import Data.Coerce (coerce) -import Data.Int (Int16,Int32) -import Data.Primitive (ByteArray(..),PrimArray(..)) +import Data.Int (Int16, Int32) +import Data.Primitive (ByteArray (..), PrimArray (..)) import Data.Text.Short (ShortText) -import Data.WideWord (Word128(Word128)) -import Data.Word (Word8,Word64,Word16,Word32) +import Data.WideWord (Word128 (Word128)) +import Data.Word (Word16, Word32, Word64, Word8) import Numeric.Natural (Natural) -import System.ByteOrder (Fixed(..),ByteOrder(BigEndian,LittleEndian)) -import Test.Tasty (defaultMain,testGroup,TestTree) -import Test.Tasty.HUnit ((@=?),testCase) -import Test.Tasty.QuickCheck ((===),testProperty) +import System.ByteOrder (ByteOrder (BigEndian, LittleEndian), Fixed (..)) +import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (testCase, (@=?)) +import Test.Tasty.QuickCheck (testProperty, (===)) import qualified Data.Bits as Bits import qualified Data.Bytes as Bytes @@ -45,404 +46,413 @@ main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "Parser" - [ testProperty "decStandardInt" $ \i -> - withSz (show i) $ \str len -> - P.parseBytes (Latin.decStandardInt ()) str - === - P.Success (Slice len 0 i) - , testProperty "big-endian-word16-array" $ \(xs :: [Word16]) -> - let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word16]) - res = Exts.fromList xs :: PrimArray Word16 - sz = length xs * 2 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.Success (Slice (sz + 1) 0 res) - === - P.parseBytes (BigEndian.word16Array () (length xs)) bs - , testProperty "big-endian-word32-array" $ \(xs :: [Word32]) -> - let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word32]) - res = Exts.fromList xs :: PrimArray Word32 - sz = length xs * 4 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.Success (Slice (sz + 1) 0 res) - === - P.parseBytes (BigEndian.word32Array () (length xs)) bs - , testProperty "little-endian-word32-array" $ \(xs :: [Word32]) -> - let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word32]) - res = Exts.fromList xs :: PrimArray Word32 - sz = length xs * 4 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.Success (Slice (sz + 1) 0 res) - === - P.parseBytes (LittleEndian.word32Array () (length xs)) bs - , testProperty "big-endian-word64-array" $ \(xs :: [Word64]) -> - let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word64]) - res = Exts.fromList xs :: PrimArray Word64 - sz = length xs * 8 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.Success (Slice (sz + 1) 0 res) - === - P.parseBytes (BigEndian.word64Array () (length xs)) bs - , testProperty "little-endian-word64-array" $ \(xs :: [Word64]) -> - let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word64]) - res = Exts.fromList xs :: PrimArray Word64 - sz = length xs * 8 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.Success (Slice (sz + 1) 0 res) - === - P.parseBytes (LittleEndian.word64Array () (length xs)) bs - , testProperty "little-endian-word128-array" $ \(xs :: [Word128]) -> - let src = Exts.fromList xs - sz = length xs * 16 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.parseBytes (replicateM (length xs) (LittleEndian.word128 ())) bs - === - P.parseBytes (fmap Exts.toList (LittleEndian.word128Array () (length xs))) bs - , testProperty "big-endian-word128-array" $ \(xs :: [Word128]) -> - let src = Exts.fromList xs - sz = length xs * 16 - bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz - in - P.parseBytes (replicateM (length xs) (BigEndian.word128 ())) bs - === - P.parseBytes (fmap Exts.toList (BigEndian.word128Array () (length xs))) bs - , testProperty "cstring" $ \(xs :: [Word8]) -> - let ys = Exts.fromList xs - bs = Bytes.singleton 0x31 <> ys - in - P.parseBytes (P.cstring () (Exts.Ptr "1"# ) *> P.bytes () ys *> pure 42) bs - === - (P.Success (Slice (Bytes.length ys + 1) 0 42) :: P.Result () Integer) - , testCase "big-endian-word256" $ - P.parseBytesMaybe (BigEndian.word256Array () 1) (Exts.fromList [ - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12, - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12, 0x34, 0x56, 0x78, 0x90, - 0x12 - ]) - @=? - Just (Exts.fromList [0x1234567890123456789012345678901212345678901234567890123456789012]) - , testProperty "big-endian-word64" bigEndianWord64 - , testProperty "big-endian-word32" bigEndianWord32 - , testProperty "little-endian-word32" littleEndianWord32 - , testCase "delimit" $ - P.Success (Slice 13 0 (167,14625)) - @=? - P.parseBytes - (do len <- Latin.decUnsignedInt () - Latin.char () ',' - r <- P.delimit () () len $ (,) - <$> Latin.decUnsignedInt () - <* Latin.char () '*' - <*> Latin.decUnsignedInt () - Latin.char () '0' - pure r - ) (bytes "9,167*146250") - , testGroup "decUnsignedInt" - [ testCase "A" $ - P.Failure () - @=? - P.parseBytes (Latin.decUnsignedInt ()) - (bytes "742493495120739103935542") - , testCase "B" $ - P.Success (Slice 8 3 4654667) - @=? - P.parseBytes (Latin.decUnsignedInt ()) - (bytes "4654667,55") - , testCase "C" $ - P.Failure () - @=? - P.parseBytes (Latin.decUnsignedInt ()) - (bytes ('1' : show (maxBound :: Int))) - , testCase "D" $ - P.Failure () - @=? - P.parseBytes (Latin.decUnsignedInt ()) - (bytes "2481030337885070917891") - , testCase "E" $ - P.Failure () - @=? - P.parseBytes (Latin.decUnsignedInt ()) - (bytes (show (fromIntegral @Int @Word maxBound + 1))) - , testCase "F" $ withSz (show (maxBound :: Int)) $ \str len -> - P.Success (Slice len 0 maxBound) - @=? - P.parseBytes (Latin.decUnsignedInt ()) str - , testProperty "property" $ \(QC.NonNegative i) -> - withSz (show i) $ \str len -> - P.parseBytes (Latin.decUnsignedInt ()) str - === - P.Success (Slice len 0 i) - ] - , testGroup "hexNibbleLower" - [ testCase "A" $ - P.parseBytes (Latin.hexNibbleLower ()) (bytes "Ab") @=? P.Failure () - , testCase "B" $ - P.parseBytes (Latin.hexNibbleLower ()) (bytes "bA") @=? P.Success (Slice 2 1 0xb) - , testCase "C" $ - P.parseBytes (Latin.hexNibbleLower ()) (bytes "") @=? P.Failure () - ] - , testGroup "tryHexNibbleLower" - [ testCase "A" $ - P.Success @() (Slice 1 2 Nothing) - @=? - P.parseBytes Latin.tryHexNibbleLower (bytes "Ab") - , testCase "B" $ - P.Success @() (Slice 2 1 (Just 0xb)) - @=? - P.parseBytes Latin.tryHexNibbleLower (bytes "bA") - , testCase "C" $ - P.Success @() (Slice 1 0 Nothing) - @=? - P.parseBytes Latin.tryHexNibbleLower (bytes "") - ] - , testGroup "decPositiveInteger" - [ testCase "A" $ - P.parseBytes (Latin.decUnsignedInteger ()) - (bytes "5469999463123462573426452736423546373235260") - @=? - P.Success - (Slice 44 0 5469999463123462573426452736423546373235260) - , testProperty "property" $ \(LargeInteger i) -> +tests = + testGroup + "Parser" + [ testProperty "decStandardInt" $ \i -> withSz (show i) $ \str len -> - i >= 0 - QC.==> - P.parseBytes (Latin.decUnsignedInteger ()) str - === - P.Success (Slice len 0 i) - ] - , testGroup "decTrailingInteger" - [ testProperty "property" $ \(LargeInteger i) -> - withSz (show i) $ \str sz -> - i >= 0 - QC.==> - P.parseBytes (Latin.decTrailingInteger 2) str - === - (P.Success (Slice sz 0 (read ('2' : show i) :: Integer)) :: P.Result () Integer) - ] - , testGroup "decSignedInteger" - [ testCase "A" $ - P.parseBytes (Latin.decSignedInteger ()) - (bytes "-54699994631234625734264527364235463732352601") - @=? - P.Success - ( Slice 46 0 - (-54699994631234625734264527364235463732352601) + P.parseBytes (Latin.decStandardInt ()) str + === P.Success (Slice len 0 i) + , testProperty "big-endian-word16-array" $ \(xs :: [Word16]) -> + let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word16]) + res = Exts.fromList xs :: PrimArray Word16 + sz = length xs * 2 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.Success (Slice (sz + 1) 0 res) + === P.parseBytes (BigEndian.word16Array () (length xs)) bs + , testProperty "big-endian-word32-array" $ \(xs :: [Word32]) -> + let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word32]) + res = Exts.fromList xs :: PrimArray Word32 + sz = length xs * 4 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.Success (Slice (sz + 1) 0 res) + === P.parseBytes (BigEndian.word32Array () (length xs)) bs + , testProperty "little-endian-word32-array" $ \(xs :: [Word32]) -> + let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word32]) + res = Exts.fromList xs :: PrimArray Word32 + sz = length xs * 4 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.Success (Slice (sz + 1) 0 res) + === P.parseBytes (LittleEndian.word32Array () (length xs)) bs + , testProperty "big-endian-word64-array" $ \(xs :: [Word64]) -> + let src = Exts.fromList (coerce xs :: [Fixed 'BigEndian Word64]) + res = Exts.fromList xs :: PrimArray Word64 + sz = length xs * 8 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.Success (Slice (sz + 1) 0 res) + === P.parseBytes (BigEndian.word64Array () (length xs)) bs + , testProperty "little-endian-word64-array" $ \(xs :: [Word64]) -> + let src = Exts.fromList (coerce xs :: [Fixed 'LittleEndian Word64]) + res = Exts.fromList xs :: PrimArray Word64 + sz = length xs * 8 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.Success (Slice (sz + 1) 0 res) + === P.parseBytes (LittleEndian.word64Array () (length xs)) bs + , testProperty "little-endian-word128-array" $ \(xs :: [Word128]) -> + let src = Exts.fromList xs + sz = length xs * 16 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.parseBytes (replicateM (length xs) (LittleEndian.word128 ())) bs + === P.parseBytes (fmap Exts.toList (LittleEndian.word128Array () (length xs))) bs + , testProperty "big-endian-word128-array" $ \(xs :: [Word128]) -> + let src = Exts.fromList xs + sz = length xs * 16 + bs = Bytes (Exts.fromList [0x42 :: Word8] <> untype src) 1 sz + in P.parseBytes (replicateM (length xs) (BigEndian.word128 ())) bs + === P.parseBytes (fmap Exts.toList (BigEndian.word128Array () (length xs))) bs + , testProperty "cstring" $ \(xs :: [Word8]) -> + let ys = Exts.fromList xs + bs = Bytes.singleton 0x31 <> ys + in P.parseBytes (P.cstring () (Exts.Ptr "1"#) *> P.bytes () ys *> pure 42) bs + === (P.Success (Slice (Bytes.length ys + 1) 0 42) :: P.Result () Integer) + , testCase "big-endian-word256" $ + P.parseBytesMaybe + (BigEndian.word256Array () 1) + ( Exts.fromList + [ 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + , 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + , 0x34 + , 0x56 + , 0x78 + , 0x90 + , 0x12 + ] ) - , testCase "B" $ - P.Success (Slice 25 0 (3,(-206173954435705292503))) - @=? - P.parseBytes - ( pure (,) - <*> Latin.decSignedInteger () - <* Latin.char () 'e' - <*> Latin.decSignedInteger () - ) (bytes "3e-206173954435705292503") - , testProperty "property" $ \(LargeInteger i) -> - withSz (show i) $ \str len -> - P.parseBytes (Latin.decSignedInteger ()) str - === - P.Success (Slice len 0 i) - ] - , testGroup "decSignedInt" - [ testProperty "A" $ \i -> withSz (show i) $ \str len -> - P.parseBytes (Latin.decSignedInt ()) str - === - P.Success (Slice len 0 i) - , testProperty "B" $ \i -> - let s = (if i >= 0 then "+" else "") ++ show i in - withSz s $ \str len -> - P.parseBytes (Latin.decSignedInt ()) str - === - P.Success (Slice len 0 i) - , testCase "C" $ - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes ('1' : show (maxBound :: Int))) - , testCase "D" $ - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes ('-' : '3' : show (maxBound :: Int))) - , testCase "E" $ - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes "2481030337885070917891") - , testCase "F" $ - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes "-4305030950553840988981") - , testCase "G" $ withSz (show (minBound :: Int)) $ \str len -> - P.Success (Slice len 0 minBound) - @=? - P.parseBytes (Latin.decSignedInt ()) str - , testCase "H" $ withSz (show (maxBound :: Int)) $ \str len -> - P.Success (Slice len 0 maxBound) - @=? - P.parseBytes (Latin.decSignedInt ()) str - , testCase "I" $ - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes (show (fromIntegral @Int @Word maxBound + 1))) - , testCase "J" $ - -- This is one number lower than the minimum bound for - -- a signed 64-bit number, but this test will pass on - -- 32-bit architectures as well. - P.Failure () - @=? - P.parseBytes (Latin.decSignedInt ()) - (bytes "-9223372036854775809") - ] - , testGroup "decWord64" - [ testCase "A" $ - P.Failure () - @=? - P.parseBytes (Latin.decWord64 ()) - (bytes "2481030337885070917891") - ] - , testCase "decWord-composition" $ - P.Success (Slice 6 0 (42,8)) - @=? - P.parseBytes - ( pure (,) - <*> Ascii.decWord () - <* Ascii.char () '.' - <*> Ascii.decWord () - <* Ascii.char () '.' - ) (bytes "42.8.") - , testCase "decWord-replicate" $ - P.Success (Slice 7 0 (Exts.fromList [42,93] :: PrimArray Word)) - @=? - P.parseBytes - (P.replicate 2 (Ascii.decWord () <* Ascii.char () '.')) - (bytes "42.93.") - , testCase "ascii-takeShortWhile" $ - P.Success (Slice 11 0 (Exts.fromList ["the","world"] :: PM.Array ShortText)) - @=? - P.parseBytes - (P.replicate 2 (Ascii.takeShortWhile (/=',') <* Ascii.char () ',')) - (bytes "the,world,") - , testGroup "hexFixedWord8" - [ testCase "A" $ - P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A") @=? P.Failure () - , testCase "B" $ - P.parseBytes (Latin.hexFixedWord8 ()) (bytes "0A") @=? P.Success (Slice 3 0 0x0A) - , testCase "C" $ - P.parseBytes (Latin.hexFixedWord8 ()) (bytes "") @=? P.Failure () - , testCase "D" $ - P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A!") @=? P.Failure () - ] - , testGroup "hexFixedWord16" - [ testCase "A" $ - P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A") @=? P.Failure () - , testCase "B" $ - P.parseBytes (Latin.hexFixedWord16 ()) (bytes "0A0A") @=? P.Success (Slice 5 0 0x0A0A) - , testCase "C" $ - P.parseBytes (Latin.hexFixedWord16 ()) (bytes "") @=? P.Failure () - , testCase "D" $ - P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A!A!") @=? P.Failure () - ] - , testGroup "hexFixedWord32" - [ testCase "A" $ - P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A") @=? P.Failure () - , testCase "B" $ - P.parseBytes (Latin.hexFixedWord32 ()) (bytes "0A0A0A0A") @=? P.Success (Slice 9 0 0x0A0A0A0A) - , testCase "C" $ - P.parseBytes (Latin.hexFixedWord32 ()) (bytes "") @=? P.Failure () - , testCase "D" $ - P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A!A0A0A0") @=? P.Failure () - ] - , testGroup "hexFixedWord64" - [ testCase "A" $ - P.parseBytes (Latin.hexFixedWord64 ()) (bytes "ABCD01235678BCDE") - @=? P.Success - (Slice 17 0 0xABCD01235678BCDE) - ] - , testGroup "base128-w32" - [ testCase "A" $ - P.Success (Slice 2 0 0x7E) - @=? - P.parseBytes (Base128.word32 ()) (bytes "\x7E") - , testCase "B" $ - P.Success (Slice 5 0 0x200000) - @=? - P.parseBytes (Base128.word32 ()) (bytes "\x81\x80\x80\x00") - , testCase "C" $ - P.Success (Slice 4 0 1656614) - @=? - P.parseBytes (Base128.word32 ()) (bytes "\xE5\x8E\x26") - -- , testProperty "iso" $ \w -> -- TODO - -- P.parseBytesMaybe (Base.word32 ()) (encodeBase128 (fromIntegral w)) - -- === - -- Just w - ] - , testGroup "leb128-w32" - [ testCase "A" $ - P.Success (Slice 2 0 0x7E) - @=? - P.parseBytes (Leb128.word32 ()) (bytes "\x7E") - , testCase "B" $ - P.Success (Slice 5 0 0x200000) - @=? - P.parseBytes (Leb128.word32 ()) (bytes "\x80\x80\x80\x01") - , testCase "C" $ - P.Success (Slice 4 0 624485) - @=? - P.parseBytes (Leb128.word32 ()) (bytes "\xE5\x8E\x26") - , testProperty "iso" $ \w -> - P.parseBytesMaybe (Leb128.word32 ()) (encodeLeb128 (fromIntegral w)) - === - Just w - ] - , testGroup "leb128-w16" - [ testCase "A" $ - P.Failure () - @=? - P.parseBytes (Leb128.word16 ()) (bytes "\x80\x80\x04") - , testCase "B" $ - P.Success (Slice 4 0 0xFFFF) - @=? - P.parseBytes (Leb128.word16 ()) (bytes "\xFF\xFF\x03") - , testProperty "iso" $ \w -> - P.parseBytesMaybe (Leb128.word16 ()) (encodeLeb128 (fromIntegral w)) - === - Just w - ] - , testGroup "leb128-i16" - [ testProperty "iso" $ \(w :: Int16) -> - P.parseBytesMaybe (Leb128.int16 ()) - (encodeLeb128 (fromIntegral @Word16 @Natural (zigzag16 w))) - === - Just w - ] - , testGroup "leb128-i32" - [ testProperty "iso" $ \(w :: Int32) -> - P.parseBytesMaybe (Leb128.int32 ()) - (encodeLeb128 (fromIntegral @Word32 @Natural (zigzag32 w))) - === - Just w - ] - , testGroup "satisfy" - [ testCase "A" $ - P.Success (Slice 2 0 0x20) - @=? - P.parseBytes (P.satisfy () (== 0x20)) (bytes "\x20") + @=? Just (Exts.fromList [0x1234567890123456789012345678901212345678901234567890123456789012]) + , testProperty "big-endian-word64" bigEndianWord64 + , testProperty "big-endian-word32" bigEndianWord32 + , testProperty "little-endian-word32" littleEndianWord32 + , testCase "delimit" $ + P.Success (Slice 13 0 (167, 14625)) + @=? P.parseBytes + ( do + len <- Latin.decUnsignedInt () + Latin.char () ',' + r <- + P.delimit () () len $ + (,) + <$> Latin.decUnsignedInt () + <* Latin.char () '*' + <*> Latin.decUnsignedInt () + Latin.char () '0' + pure r + ) + (bytes "9,167*146250") + , testGroup + "decUnsignedInt" + [ testCase "A" $ + P.Failure () + @=? P.parseBytes + (Latin.decUnsignedInt ()) + (bytes "742493495120739103935542") + , testCase "B" $ + P.Success (Slice 8 3 4654667) + @=? P.parseBytes + (Latin.decUnsignedInt ()) + (bytes "4654667,55") + , testCase "C" $ + P.Failure () + @=? P.parseBytes + (Latin.decUnsignedInt ()) + (bytes ('1' : show (maxBound :: Int))) + , testCase "D" $ + P.Failure () + @=? P.parseBytes + (Latin.decUnsignedInt ()) + (bytes "2481030337885070917891") + , testCase "E" $ + P.Failure () + @=? P.parseBytes + (Latin.decUnsignedInt ()) + (bytes (show (fromIntegral @Int @Word maxBound + 1))) + , testCase "F" $ withSz (show (maxBound :: Int)) $ \str len -> + P.Success (Slice len 0 maxBound) + @=? P.parseBytes (Latin.decUnsignedInt ()) str + , testProperty "property" $ \(QC.NonNegative i) -> + withSz (show i) $ \str len -> + P.parseBytes (Latin.decUnsignedInt ()) str + === P.Success (Slice len 0 i) + ] + , testGroup + "hexNibbleLower" + [ testCase "A" $ + P.parseBytes (Latin.hexNibbleLower ()) (bytes "Ab") @=? P.Failure () + , testCase "B" $ + P.parseBytes (Latin.hexNibbleLower ()) (bytes "bA") @=? P.Success (Slice 2 1 0xb) + , testCase "C" $ + P.parseBytes (Latin.hexNibbleLower ()) (bytes "") @=? P.Failure () + ] + , testGroup + "tryHexNibbleLower" + [ testCase "A" $ + P.Success @() (Slice 1 2 Nothing) + @=? P.parseBytes Latin.tryHexNibbleLower (bytes "Ab") + , testCase "B" $ + P.Success @() (Slice 2 1 (Just 0xb)) + @=? P.parseBytes Latin.tryHexNibbleLower (bytes "bA") + , testCase "C" $ + P.Success @() (Slice 1 0 Nothing) + @=? P.parseBytes Latin.tryHexNibbleLower (bytes "") + ] + , testGroup + "decPositiveInteger" + [ testCase "A" $ + P.parseBytes + (Latin.decUnsignedInteger ()) + (bytes "5469999463123462573426452736423546373235260") + @=? P.Success + (Slice 44 0 5469999463123462573426452736423546373235260) + , testProperty "property" $ \(LargeInteger i) -> + withSz (show i) $ \str len -> + i + >= 0 + QC.==> P.parseBytes (Latin.decUnsignedInteger ()) str + === P.Success (Slice len 0 i) + ] + , testGroup + "decTrailingInteger" + [ testProperty "property" $ \(LargeInteger i) -> + withSz (show i) $ \str sz -> + i + >= 0 + QC.==> P.parseBytes (Latin.decTrailingInteger 2) str + === (P.Success (Slice sz 0 (read ('2' : show i) :: Integer)) :: P.Result () Integer) + ] + , testGroup + "decSignedInteger" + [ testCase "A" $ + P.parseBytes + (Latin.decSignedInteger ()) + (bytes "-54699994631234625734264527364235463732352601") + @=? P.Success + ( Slice + 46 + 0 + (-54699994631234625734264527364235463732352601) + ) + , testCase "B" $ + P.Success (Slice 25 0 (3, (-206173954435705292503))) + @=? P.parseBytes + ( pure (,) + <*> Latin.decSignedInteger () + <* Latin.char () 'e' + <*> Latin.decSignedInteger () + ) + (bytes "3e-206173954435705292503") + , testProperty "property" $ \(LargeInteger i) -> + withSz (show i) $ \str len -> + P.parseBytes (Latin.decSignedInteger ()) str + === P.Success (Slice len 0 i) + ] + , testGroup + "decSignedInt" + [ testProperty "A" $ \i -> withSz (show i) $ \str len -> + P.parseBytes (Latin.decSignedInt ()) str + === P.Success (Slice len 0 i) + , testProperty "B" $ \i -> + let s = (if i >= 0 then "+" else "") ++ show i + in withSz s $ \str len -> + P.parseBytes (Latin.decSignedInt ()) str + === P.Success (Slice len 0 i) + , testCase "C" $ + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes ('1' : show (maxBound :: Int))) + , testCase "D" $ + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes ('-' : '3' : show (maxBound :: Int))) + , testCase "E" $ + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes "2481030337885070917891") + , testCase "F" $ + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes "-4305030950553840988981") + , testCase "G" $ withSz (show (minBound :: Int)) $ \str len -> + P.Success (Slice len 0 minBound) + @=? P.parseBytes (Latin.decSignedInt ()) str + , testCase "H" $ withSz (show (maxBound :: Int)) $ \str len -> + P.Success (Slice len 0 maxBound) + @=? P.parseBytes (Latin.decSignedInt ()) str + , testCase "I" $ + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes (show (fromIntegral @Int @Word maxBound + 1))) + , testCase "J" $ + -- This is one number lower than the minimum bound for + -- a signed 64-bit number, but this test will pass on + -- 32-bit architectures as well. + P.Failure () + @=? P.parseBytes + (Latin.decSignedInt ()) + (bytes "-9223372036854775809") + ] + , testGroup + "decWord64" + [ testCase "A" $ + P.Failure () + @=? P.parseBytes + (Latin.decWord64 ()) + (bytes "2481030337885070917891") + ] + , testCase "decWord-composition" $ + P.Success (Slice 6 0 (42, 8)) + @=? P.parseBytes + ( pure (,) + <*> Ascii.decWord () + <* Ascii.char () '.' + <*> Ascii.decWord () + <* Ascii.char () '.' + ) + (bytes "42.8.") + , testCase "decWord-replicate" $ + P.Success (Slice 7 0 (Exts.fromList [42, 93] :: PrimArray Word)) + @=? P.parseBytes + (P.replicate 2 (Ascii.decWord () <* Ascii.char () '.')) + (bytes "42.93.") + , testCase "ascii-takeShortWhile" $ + P.Success (Slice 11 0 (Exts.fromList ["the", "world"] :: PM.Array ShortText)) + @=? P.parseBytes + (P.replicate 2 (Ascii.takeShortWhile (/= ',') <* Ascii.char () ',')) + (bytes "the,world,") + , testGroup + "hexFixedWord8" + [ testCase "A" $ + P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A") @=? P.Failure () + , testCase "B" $ + P.parseBytes (Latin.hexFixedWord8 ()) (bytes "0A") @=? P.Success (Slice 3 0 0x0A) + , testCase "C" $ + P.parseBytes (Latin.hexFixedWord8 ()) (bytes "") @=? P.Failure () + , testCase "D" $ + P.parseBytes (Latin.hexFixedWord8 ()) (bytes "A!") @=? P.Failure () + ] + , testGroup + "hexFixedWord16" + [ testCase "A" $ + P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A") @=? P.Failure () + , testCase "B" $ + P.parseBytes (Latin.hexFixedWord16 ()) (bytes "0A0A") @=? P.Success (Slice 5 0 0x0A0A) + , testCase "C" $ + P.parseBytes (Latin.hexFixedWord16 ()) (bytes "") @=? P.Failure () + , testCase "D" $ + P.parseBytes (Latin.hexFixedWord16 ()) (bytes "A!A!") @=? P.Failure () + ] + , testGroup + "hexFixedWord32" + [ testCase "A" $ + P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A") @=? P.Failure () + , testCase "B" $ + P.parseBytes (Latin.hexFixedWord32 ()) (bytes "0A0A0A0A") @=? P.Success (Slice 9 0 0x0A0A0A0A) + , testCase "C" $ + P.parseBytes (Latin.hexFixedWord32 ()) (bytes "") @=? P.Failure () + , testCase "D" $ + P.parseBytes (Latin.hexFixedWord32 ()) (bytes "A!A0A0A0") @=? P.Failure () + ] + , testGroup + "hexFixedWord64" + [ testCase "A" $ + P.parseBytes (Latin.hexFixedWord64 ()) (bytes "ABCD01235678BCDE") + @=? P.Success + (Slice 17 0 0xABCD01235678BCDE) + ] + , testGroup + "base128-w32" + [ testCase "A" $ + P.Success (Slice 2 0 0x7E) + @=? P.parseBytes (Base128.word32 ()) (bytes "\x7E") + , testCase "B" $ + P.Success (Slice 5 0 0x200000) + @=? P.parseBytes (Base128.word32 ()) (bytes "\x81\x80\x80\x00") + , testCase "C" $ + P.Success (Slice 4 0 1656614) + @=? P.parseBytes (Base128.word32 ()) (bytes "\xE5\x8E\x26") + -- , testProperty "iso" $ \w -> -- TODO + -- P.parseBytesMaybe (Base.word32 ()) (encodeBase128 (fromIntegral w)) + -- === + -- Just w + ] + , testGroup + "leb128-w32" + [ testCase "A" $ + P.Success (Slice 2 0 0x7E) + @=? P.parseBytes (Leb128.word32 ()) (bytes "\x7E") + , testCase "B" $ + P.Success (Slice 5 0 0x200000) + @=? P.parseBytes (Leb128.word32 ()) (bytes "\x80\x80\x80\x01") + , testCase "C" $ + P.Success (Slice 4 0 624485) + @=? P.parseBytes (Leb128.word32 ()) (bytes "\xE5\x8E\x26") + , testProperty "iso" $ \w -> + P.parseBytesMaybe (Leb128.word32 ()) (encodeLeb128 (fromIntegral w)) + === Just w + ] + , testGroup + "leb128-w16" + [ testCase "A" $ + P.Failure () + @=? P.parseBytes (Leb128.word16 ()) (bytes "\x80\x80\x04") + , testCase "B" $ + P.Success (Slice 4 0 0xFFFF) + @=? P.parseBytes (Leb128.word16 ()) (bytes "\xFF\xFF\x03") + , testProperty "iso" $ \w -> + P.parseBytesMaybe (Leb128.word16 ()) (encodeLeb128 (fromIntegral w)) + === Just w + ] + , testGroup + "leb128-i16" + [ testProperty "iso" $ \(w :: Int16) -> + P.parseBytesMaybe + (Leb128.int16 ()) + (encodeLeb128 (fromIntegral @Word16 @Natural (zigzag16 w))) + === Just w + ] + , testGroup + "leb128-i32" + [ testProperty "iso" $ \(w :: Int32) -> + P.parseBytesMaybe + (Leb128.int32 ()) + (encodeLeb128 (fromIntegral @Word32 @Natural (zigzag32 w))) + === Just w + ] + , testGroup + "satisfy" + [ testCase "A" $ + P.Success (Slice 2 0 0x20) + @=? P.parseBytes (P.satisfy () (== 0x20)) (bytes "\x20") + ] ] - ] bytes :: String -> Bytes bytes s = let b = pack ('x' : s) in Bytes b 1 (PM.sizeofByteArray b - 1) @@ -451,9 +461,15 @@ pack :: String -> ByteArray pack = Exts.fromList . map (fromIntegral @Int @Word8 . ord) bigEndianWord64 :: - Word8 -> Word8 -> Word8 -> Word8 - -> Word8 -> Word8 -> Word8 -> Word8 - -> QC.Property + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Word8 -> + QC.Property bigEndianWord64 a b c d e f g h = let arr = runST $ do m <- PM.newByteArray 11 @@ -469,22 +485,25 @@ bigEndianWord64 a b c d e f g h = PM.writeByteArray m 9 (h :: Word8) PM.writeByteArray m 10 (0xEE :: Word8) PM.unsafeFreezeByteArray m - expected = (0 :: Word64) - + fromIntegral a * 256 ^ (7 :: Integer) - + fromIntegral b * 256 ^ (6 :: Integer) - + fromIntegral c * 256 ^ (5 :: Integer) - + fromIntegral d * 256 ^ (4 :: Integer) - + fromIntegral e * 256 ^ (3 :: Integer) - + fromIntegral f * 256 ^ (2 :: Integer) - + fromIntegral g * 256 ^ (1 :: Integer) - + fromIntegral h * 256 ^ (0 :: Integer) + expected = + (0 :: Word64) + + fromIntegral a * 256 ^ (7 :: Integer) + + fromIntegral b * 256 ^ (6 :: Integer) + + fromIntegral c * 256 ^ (5 :: Integer) + + fromIntegral d * 256 ^ (4 :: Integer) + + fromIntegral e * 256 ^ (3 :: Integer) + + fromIntegral f * 256 ^ (2 :: Integer) + + fromIntegral g * 256 ^ (1 :: Integer) + + fromIntegral h * 256 ^ (0 :: Integer) in P.parseBytes (BigEndian.word64 ()) (Bytes arr 2 9) - === - P.Success (Slice 10 1 expected) + === P.Success (Slice 10 1 expected) bigEndianWord32 :: - Word8 -> Word8 -> Word8 -> Word8 - -> QC.Property + Word8 -> + Word8 -> + Word8 -> + Word8 -> + QC.Property bigEndianWord32 a b c d = let arr = runST $ do m <- PM.newByteArray 7 @@ -496,18 +515,21 @@ bigEndianWord32 a b c d = PM.writeByteArray m 5 (d :: Word8) PM.writeByteArray m 6 (0xEE :: Word8) PM.unsafeFreezeByteArray m - expected = (0 :: Word32) - + fromIntegral a * 256 ^ (3 :: Integer) - + fromIntegral b * 256 ^ (2 :: Integer) - + fromIntegral c * 256 ^ (1 :: Integer) - + fromIntegral d * 256 ^ (0 :: Integer) + expected = + (0 :: Word32) + + fromIntegral a * 256 ^ (3 :: Integer) + + fromIntegral b * 256 ^ (2 :: Integer) + + fromIntegral c * 256 ^ (1 :: Integer) + + fromIntegral d * 256 ^ (0 :: Integer) in P.parseBytes (BigEndian.word32 ()) (Bytes arr 2 5) - === - P.Success (Slice 6 1 expected) + === P.Success (Slice 6 1 expected) littleEndianWord32 :: - Word8 -> Word8 -> Word8 -> Word8 - -> QC.Property + Word8 -> + Word8 -> + Word8 -> + Word8 -> + QC.Property littleEndianWord32 a b c d = let arr = runST $ do m <- PM.newByteArray 7 @@ -519,33 +541,34 @@ littleEndianWord32 a b c d = PM.writeByteArray m 5 (d :: Word8) PM.writeByteArray m 6 (0xEE :: Word8) PM.unsafeFreezeByteArray m - expected = (0 :: Word32) - + fromIntegral a * 256 ^ (0 :: Integer) - + fromIntegral b * 256 ^ (1 :: Integer) - + fromIntegral c * 256 ^ (2 :: Integer) - + fromIntegral d * 256 ^ (3 :: Integer) + expected = + (0 :: Word32) + + fromIntegral a * 256 ^ (0 :: Integer) + + fromIntegral b * 256 ^ (1 :: Integer) + + fromIntegral c * 256 ^ (2 :: Integer) + + fromIntegral d * 256 ^ (3 :: Integer) in P.parseBytes (LittleEndian.word32 ()) (Bytes arr 2 5) - === - P.Success (Slice 6 1 expected) + === P.Success (Slice 6 1 expected) -- The Arbitrary instance for Integer that comes with -- QuickCheck only generates small numbers. newtype LargeInteger = LargeInteger Integer - deriving (Eq,Show) + deriving (Eq, Show) instance QC.Arbitrary Word128 where arbitrary = liftA2 Word128 QC.arbitrary QC.arbitrary instance QC.Arbitrary LargeInteger where arbitrary = do - n <- QC.choose (1, 27) - sign <- QC.arbitrary - r <- (if sign then negate else id) . foldr f 0 + n <- QC.choose (1, 27) + sign <- QC.arbitrary + r <- + (if sign then negate else id) . foldr f 0 <$> replicateM n QC.arbitrary - pure (LargeInteger r) - where - f :: Word8 -> Integer -> Integer - f w acc = (acc `Bits.shiftL` 8) + fromIntegral w + pure (LargeInteger r) + where + f :: Word8 -> Integer -> Integer + f w acc = (acc `Bits.shiftL` 8) + fromIntegral w -- We add an extra 1 since bytes gives us a slice that -- starts at that offset. @@ -556,15 +579,17 @@ untype :: PrimArray a -> ByteArray untype (PrimArray x) = ByteArray x encodeLeb128 :: Natural -> Bytes -encodeLeb128 x = Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)) where +encodeLeb128 x = Bytes.unsafeDrop 1 (Exts.fromList (0xFF : go [] x)) + where go !xs !n = - let (q,r) = quotRem n 128 + let (q, r) = quotRem n 128 r' = fromIntegral @Natural @Word8 r - w = if q == 0 - then r' - else Bits.setBit r' 7 + w = + if q == 0 + then r' + else Bits.setBit r' 7 xs' = w : xs - in if q == 0 + in if q == 0 then List.reverse xs' else go xs' q