Skip to content

Commit

Permalink
Merge pull request #6594 from phadej/parser-benchmark
Browse files Browse the repository at this point in the history
Parser benchmark
  • Loading branch information
phadej authored Mar 20, 2020
2 parents dde0d9c + e3dff2a commit f8ad7bb
Show file tree
Hide file tree
Showing 43 changed files with 524 additions and 393 deletions.
11 changes: 11 additions & 0 deletions Cabal/Cabal-quickcheck/src/Test/QuickCheck/Instances/Cabal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Distribution.Types.LibraryName
import Distribution.Types.PackageName
import Distribution.Types.SourceRepo
import Distribution.Types.UnqualComponentName
import Distribution.ModuleName
import Distribution.Types.VersionRange.Internal
import Distribution.Verbosity
import Distribution.Version
Expand Down Expand Up @@ -129,6 +130,16 @@ instance Arbitrary VersionIntervals where
instance Arbitrary Bound where
arbitrary = elements [ExclusiveBound, InclusiveBound]

-------------------------------------------------------------------------------
-- ModuleName
-------------------------------------------------------------------------------

instance Arbitrary ModuleName where
arbitrary = fromString . intercalate "." <$> shortListOf1 4 comp where
comp = (:) <$> elements upper <*> shortListOf1 10 (elements moduleChar)
upper = ['A'..'Z']
moduleChar = [ c | c <- ['\0' .. '\255'], isAlphaNum c || c `elem` "_'" ]

-------------------------------------------------------------------------------
-- Dependency
-------------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions Cabal/Distribution/Backpack.hs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ instance Pretty OpenUnitId where
--Right (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "foobar"}))
--
-- >>> eitherParsec "foo[Str=text-1.2.3:Data.Text.Text]" :: Either String OpenUnitId
-- Right (IndefFullUnitId (ComponentId "foo") (fromList [(ModuleName ["Str"],OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "text-1.2.3"})) (ModuleName ["Data","Text","Text"]))]))
-- Right (IndefFullUnitId (ComponentId "foo") (fromList [(ModuleName "Str",OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "text-1.2.3"})) (ModuleName "Data.Text.Text"))]))
--
instance Parsec OpenUnitId where
parsec = P.try parseOpenUnitId <|> fmap DefiniteUnitId parsec
Expand Down Expand Up @@ -180,7 +180,7 @@ instance Pretty OpenModule where
-- |
--
-- >>> eitherParsec "Includes2-0.1.0.0-inplace-mysql:Database.MySQL" :: Either String OpenModule
-- Right (OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "Includes2-0.1.0.0-inplace-mysql"})) (ModuleName ["Database","MySQL"]))
-- Right (OpenModule (DefiniteUnitId (DefUnitId {unDefUnitId = UnitId "Includes2-0.1.0.0-inplace-mysql"})) (ModuleName "Database.MySQL"))
--
instance Parsec OpenModule where
parsec = parsecModuleVar <|> parsecOpenModule
Expand Down
8 changes: 6 additions & 2 deletions Cabal/Distribution/Compat/DList.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
module Distribution.Compat.DList (
DList,
runDList,
empty,
singleton,
fromList,
toList,
snoc,
) where

import Prelude ()
import Distribution.Compat.Prelude hiding (toList)
import Distribution.Compat.Prelude hiding (toList, empty)

-- | Difference list.
newtype DList a = DList ([a] -> [a])
Expand All @@ -31,6 +32,9 @@ runDList (DList run) = run []
singleton :: a -> DList a
singleton a = DList (a:)

empty :: DList a
empty = DList id

fromList :: [a] -> DList a
fromList as = DList (as ++)

Expand All @@ -41,7 +45,7 @@ snoc :: DList a -> a -> DList a
snoc xs x = xs <> singleton x

instance Monoid (DList a) where
mempty = DList id
mempty = empty
mappend = (<>)

instance Semigroup (DList a) where
Expand Down
4 changes: 4 additions & 0 deletions Cabal/Distribution/FieldGrammar/Described.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module Distribution.FieldGrammar.Described (
-- * Character Sets
csChar,
csAlphaNum,
csUpper,
csNotSpace,
csNotSpaceOrComma,
) where
Expand Down Expand Up @@ -128,6 +129,9 @@ csChar = CS.singleton
csAlphaNum :: CS.CharSet
csAlphaNum = CS.alphanum

csUpper :: CS.CharSet
csUpper = CS.upper

csNotSpace :: CS.CharSet
csNotSpace = CS.difference CS.universe $ CS.singleton ' '

Expand Down
122 changes: 53 additions & 69 deletions Cabal/Distribution/ModuleName.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
-----------------------------------------------------------------------------
-- |
-- Module : Distribution.ModuleName
Expand All @@ -13,7 +14,7 @@
-- Data type for Haskell module names.

module Distribution.ModuleName (
ModuleName (..), -- TODO: move Parsec instance here, don't export constructor
ModuleName,
fromString,
fromComponents,
components,
Expand All @@ -33,41 +34,65 @@ import Distribution.Utils.ShortText (ShortText, fromShortText, toShortTex
import System.FilePath (pathSeparator)

import qualified Distribution.Compat.CharParsing as P
import qualified Distribution.Compat.DList as DList
import qualified Text.PrettyPrint as Disp

-- | A valid Haskell module name.
--
newtype ModuleName = ModuleName ShortTextLst
newtype ModuleName = ModuleName ShortText
deriving (Eq, Generic, Ord, Read, Show, Typeable, Data)

unModuleName :: ModuleName -> String
unModuleName (ModuleName s) = fromShortText s

instance Binary ModuleName
instance Structured ModuleName

instance NFData ModuleName where
rnf (ModuleName ms) = rnf ms

instance Pretty ModuleName where
pretty (ModuleName ms) =
Disp.hcat (intersperse (Disp.char '.') (map Disp.text $ stlToStrings ms))
pretty = Disp.text . unModuleName

instance Parsec ModuleName where
parsec = fromComponents <$> toList <$> P.sepByNonEmpty component (P.char '.')
where
component = do
c <- P.satisfy isUpper
cs <- P.munch validModuleChar
return (c:cs)
parsec = parsecModuleName

parsecModuleName :: forall m. CabalParsing m => m ModuleName
parsecModuleName = state0 DList.empty where
upper :: m Char
!upper = P.satisfy isUpper

ch :: m Char
!ch = P.satisfy (\c -> validModuleChar c || c == '.')

alt :: m ModuleName -> m ModuleName -> m ModuleName
!alt = (<|>)

state0 :: DList.DList Char -> m ModuleName
state0 acc = do
c <- upper
state1 (DList.snoc acc c)

state1 :: DList.DList Char -> m ModuleName
state1 acc = state1' acc `alt` return (fromString (DList.toList acc))

state1' :: DList.DList Char -> m ModuleName
state1' acc = do
c <- ch
case c of
'.' -> state0 (DList.snoc acc c)
_ -> state1 (DList.snoc acc c)

instance Described ModuleName where
describe _ = RETodo
describe _ = REMunch1 (reChar '.') component where
component = RECharSet csUpper <> reMunchCS (csAlphaNum <> fromString "_'")

validModuleChar :: Char -> Bool
validModuleChar c = isAlphaNum c || c == '_' || c == '\''

validModuleComponent :: String -> Bool
validModuleComponent [] = False
validModuleComponent (c:cs) = isUpper c
&& all validModuleChar cs
validModuleComponent (c:cs) = isUpper c && all validModuleChar cs

-- | Construct a 'ModuleName' from a valid module name 'String'.
--
Expand All @@ -76,77 +101,36 @@ validModuleComponent (c:cs) = isUpper c
-- are parsing user input then use 'Distribution.Text.simpleParse' instead.
--
instance IsString ModuleName where
fromString string = fromComponents (split string)
where
split cs = case break (=='.') cs of
(chunk,[]) -> chunk : []
(chunk,_:rest) -> chunk : split rest
fromString = ModuleName . toShortText

-- | Construct a 'ModuleName' from valid module components, i.e. parts
-- separated by dots.
fromComponents :: [String] -> ModuleName
fromComponents components'
| null components' = error zeroComponents
| all validModuleComponent components' = ModuleName (stlFromStrings components')
| otherwise = error badName
where
zeroComponents = "ModuleName.fromComponents: zero components"
badName = "ModuleName.fromComponents: invalid components " ++ show components'
fromComponents comps = fromString (intercalate "." comps)
{-# DEPRECATED fromComponents "Exists for cabal-install only" #-}

-- | The module name @Main@.
--
main :: ModuleName
main = ModuleName (stlFromStrings ["Main"])
main = ModuleName (fromString "Main")

-- | The individual components of a hierarchical module name. For example
--
-- > components (fromString "A.B.C") = ["A", "B", "C"]
--
components :: ModuleName -> [String]
components (ModuleName ms) = stlToStrings ms
components mn = split (unModuleName mn)
where
split cs = case break (=='.') cs of
(chunk,[]) -> chunk : []
(chunk,_:rest) -> chunk : split rest

-- | Convert a module name to a file path, but without any file extension.
-- For example:
--
-- > toFilePath (fromString "A.B.C") = "A/B/C"
--
toFilePath :: ModuleName -> FilePath
toFilePath = intercalate [pathSeparator] . components

----------------------------------------------------------------------------
-- internal helper

-- | Strict/unpacked representation of @[ShortText]@
data ShortTextLst = STLNil
| STLCons !ShortText !ShortTextLst
deriving (Eq, Generic, Ord, Typeable, Data)

instance NFData ShortTextLst where
rnf = flip seq ()

instance Show ShortTextLst where
showsPrec p = showsPrec p . stlToList


instance Read ShortTextLst where
readsPrec p = map (first stlFromList) . readsPrec p

instance Binary ShortTextLst where
put = put . stlToList
get = stlFromList <$> get

instance Structured ShortTextLst

stlToList :: ShortTextLst -> [ShortText]
stlToList STLNil = []
stlToList (STLCons st next) = st : stlToList next

stlToStrings :: ShortTextLst -> [String]
stlToStrings = map fromShortText . stlToList

stlFromList :: [ShortText] -> ShortTextLst
stlFromList [] = STLNil
stlFromList (x:xs) = STLCons x (stlFromList xs)

stlFromStrings :: [String] -> ShortTextLst
stlFromStrings = stlFromList . map toShortText
toFilePath = map f . unModuleName where
f '.' = pathSeparator
f c = c
18 changes: 13 additions & 5 deletions Cabal/Distribution/PackageDescription/Parsec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ import Distribution.PackageDescription
import Distribution.PackageDescription.Configuration (freeVars)
import Distribution.PackageDescription.FieldGrammar
import Distribution.PackageDescription.Quirks (patchQuirks)
import Distribution.Parsec (parsec, simpleParsec)
import Distribution.Parsec (parsec, simpleParsecBS)
import Distribution.Parsec.FieldLineStream (fieldLineStreamFromBS)
import Distribution.Parsec.Newtypes (CommaFSep, List, SpecVersion (..), Token)
import Distribution.Parsec.Position (Position (..), zeroPos)
import Distribution.Parsec.Warning (PWarnType (..))
import Distribution.Pretty (prettyShow)
import Distribution.Simple.Utils (fromUTF8BS)
import Distribution.Simple.Utils (fromUTF8BS, toUTF8BS)
import Distribution.Types.CondTree
import Distribution.Types.Dependency (Dependency)
import Distribution.Types.ForeignLib
Expand Down Expand Up @@ -109,12 +109,12 @@ parseGenericPackageDescription bs = do
"Unsupported cabal-version. See https://github.com/haskell/cabal/issues/4899."
_ -> pure ()

case readFields' bs' of
case readFields' bs'' of
Right (fs, lexWarnings) -> do
when patched $
parseWarning zeroPos PWTQuirkyCabalFile "Legacy cabal file"
-- UTF8 is validated in a prepass step, afterwards parsing is lenient.
parseGenericPackageDescription' ver lexWarnings (validateUTF8 bs') fs
parseGenericPackageDescription' ver lexWarnings invalidUtf8 fs
-- TODO: better marshalling of errors
Left perr -> parseFatalFailure pos (show perr) where
ppos = P.errorPos perr
Expand All @@ -123,6 +123,14 @@ parseGenericPackageDescription bs = do
(patched, bs') = patchQuirks bs
ver = scanSpecVersion bs'

invalidUtf8 = validateUTF8 bs'

-- if there are invalid utf8 characters, we make the bytestring valid.
bs'' = case invalidUtf8 of
Nothing -> bs'
Just _ -> toUTF8BS (fromUTF8BS bs')


-- | 'Maybe' variant of 'parseGenericPackageDescription'
parseGenericPackageDescriptionMaybe :: BS.ByteString -> Maybe GenericPackageDescription
parseGenericPackageDescriptionMaybe =
Expand Down Expand Up @@ -851,7 +859,7 @@ scanSpecVersion bs = do
--
-- This is currently more tolerant regarding leading 0 digits.
--
ver <- simpleParsec (BS8.unpack vers)
ver <- simpleParsecBS vers
guard $ case versionNumbers ver of
[_,_] -> True
[_,_,_] -> True
Expand Down
Loading

0 comments on commit f8ad7bb

Please sign in to comment.