diff --git a/IHP/Test/CssQuery.hs b/IHP/Test/CssQuery.hs new file mode 100644 index 000000000..6166d12aa --- /dev/null +++ b/IHP/Test/CssQuery.hs @@ -0,0 +1,97 @@ +module IHP.Test.CssQuery where + +import IHP.Prelude hiding (takeWhile) +import Data.Text as Text (Text) +import Data.Attoparsec.Text +import Control.Applicative +import Data.Char + +data SelectorGroup + = DirectChildren [Selector] + | DeepChildren [Selector] + deriving (Show, Eq) + +data Selector + = ById Text + | ByClass Text + | ByTagName Text + | ByAttrExists Text + | ByAttrEquals Text Text + | ByAttrContains Text Text + | ByAttrStarts Text Text + | ByAttrEnds Text Text + deriving (Show, Eq) + + +-- The official syntax specification for CSS2 can be found here: +-- http://www.w3.org/TR/CSS2/syndata.html +-- but that spec is tricky to fully support. Instead we do the minimal and we +-- can extend it as needed. + + +-- | Parses a query into an intermediate format which is easy to feed to HXT +-- +-- * The top-level lists represent the top level comma separated queries. +-- +-- * SelectorGroup is a group of qualifiers which are separated +-- with spaces or > like these three: /table.main.odd tr.even > td.big/ +-- +-- * A SelectorGroup as a list of Selector items, following the above example +-- the selectors in the group are: /table/, /.main/ and /.odd/ +parseQuery :: Text -> Either String [[SelectorGroup]] +parseQuery = parseOnly cssQuery + +-- Below this line is the Parsec parser for css queries. +cssQuery :: Parser [[SelectorGroup]] +cssQuery = many (char ' ') >> sepBy rules (char ',' >> many (char ' ')) + +rules :: Parser [SelectorGroup] +rules = many $ directChildren <|> deepChildren + +directChildren :: Parser SelectorGroup +directChildren = + string "> " >> (many (char ' ')) >> DirectChildren <$> pOptionalTrailingSpace parseSelectors + +deepChildren :: Parser SelectorGroup +deepChildren = pOptionalTrailingSpace $ DeepChildren <$> parseSelectors + +parseSelectors :: Parser [Selector] +parseSelectors = many1 $ + parseId <|> parseClass <|> parseTag <|> parseAttr + +parseId :: Parser Selector +parseId = char '#' >> ById <$> pIdent + +parseClass :: Parser Selector +parseClass = char '.' >> ByClass <$> pIdent + +parseTag :: Parser Selector +parseTag = ByTagName <$> pIdent + +parseAttr :: Parser Selector +parseAttr = pSquare $ choice + [ ByAttrEquals <$> pIdent <*> (string "=" *> pAttrValue) + , ByAttrContains <$> pIdent <*> (string "*=" *> pAttrValue) + , ByAttrStarts <$> pIdent <*> (string "^=" *> pAttrValue) + , ByAttrEnds <$> pIdent <*> (string "$=" *> pAttrValue) + , ByAttrExists <$> pIdent + ] + +-- | pIdent : Parse an identifier (not yet supporting escapes and unicode as +-- part of the identifier). Basically the regex: [-]?[_a-zA-Z][_a-zA-Z0-9]* +pIdent :: Parser Text +pIdent = do + leadingMinus <- string "-" <|> pure "" + nmstart <- singleton <$> satisfy (\c -> isAlpha c || c == '_') + nmchar <- takeWhile (\c -> isAlphaNum c || c == '_' || c == '-') + return $ mconcat [ leadingMinus, cs nmstart, nmchar ] + + +pAttrValue :: Parser Text +pAttrValue = takeWhile (/= ']') + +pSquare :: Parser a -> Parser a +pSquare p = char '[' *> p <* char ']' + +pOptionalTrailingSpace :: Parser a -> Parser a +pOptionalTrailingSpace p = p <* many (char ' ') \ No newline at end of file diff --git a/IHP/Test/TransversingCSS.hs b/IHP/Test/TransversingCSS.hs new file mode 100644 index 000000000..febb6e76d --- /dev/null +++ b/IHP/Test/TransversingCSS.hs @@ -0,0 +1,79 @@ +module IHP.Test.TransversingCSS where + +import IHP.Test.CssQuery +import qualified Data.Text as Text +import qualified Control.Applicative +import Text.XML +import Text.XML.Cursor +import qualified Data.ByteString.Lazy as LBS +import qualified Text.HTML.DOM as HD +import Text.Blaze.Html (toHtml) +import Text.Blaze.Html.Renderer.String (renderHtml) + +type Query = Text.Text +type HtmlLBS = LBS.ByteString + +-- | Perform a css 'Query' on 'Html'. Returns Either +-- +-- * Left: Query parse error. +-- +-- * Right: List of matching Html fragments. +findBySelector :: HtmlLBS -> Query -> Either String [String] +findBySelector html query = + map (renderHtml . toHtml . node) Control.Applicative.<$> findCursorsBySelector html query + +-- | Perform a css 'Query' on 'Html'. Returns Either +-- +-- * Left: Query parse error. +-- +-- * Right: List of matching Cursors +findCursorsBySelector :: HtmlLBS -> Query -> Either String [Cursor] +findCursorsBySelector html query = + runQuery (fromDocument $ HD.parseLBS html) + Control.Applicative.<$> parseQuery query + +-- | Perform a css 'Query' on 'Html'. Returns Either +-- +-- * Left: Query parse error. +-- +-- * Right: List of matching Cursors +findAttributeBySelector :: HtmlLBS -> Query -> Text.Text -> Either String [[Text.Text]] +findAttributeBySelector html query attr = + map (laxAttribute attr) Control.Applicative.<$> findCursorsBySelector html query + + +-- Run a compiled query on Html, returning a list of matching Html fragments. +runQuery :: Cursor -> [[SelectorGroup]] -> [Cursor] +runQuery html query = concatMap (runGroup html) query + +runGroup :: Cursor -> [SelectorGroup] -> [Cursor] +runGroup c [] = [c] +runGroup c (DirectChildren s:gs) = concatMap (flip runGroup gs) $ c $/ selectors s +runGroup c (DeepChildren s:gs) = concatMap (flip runGroup gs) $ c $// selectors s + +selectors :: [Selector] -> Cursor -> [Cursor] +selectors ss c + | all (selector c) ss = [c] + | otherwise = [] + +selector :: Cursor -> Selector -> Bool +selector c (ById x) = not $ null $ attributeIs "id" x c +selector c (ByClass x) = + case attribute "class" c of + t:_ -> x `elem` Text.words t + [] -> False +selector c (ByTagName t) = not $ null $ element (Name t Nothing Nothing) c +selector c (ByAttrExists t) = not $ null $ hasAttribute (Name t Nothing Nothing) c +selector c (ByAttrEquals t v) = not $ null $ attributeIs (Name t Nothing Nothing) v c +selector c (ByAttrContains n v) = + case attribute (Name n Nothing Nothing) c of + t:_ -> v `Text.isInfixOf` t + [] -> False +selector c (ByAttrStarts n v) = + case attribute (Name n Nothing Nothing) c of + t:_ -> v `Text.isPrefixOf` t + [] -> False +selector c (ByAttrEnds n v) = + case attribute (Name n Nothing Nothing) c of + t:_ -> v `Text.isSuffixOf` t + [] -> False \ No newline at end of file diff --git a/ihp.cabal b/ihp.cabal index 881fefce7..d933cccc2 100644 --- a/ihp.cabal +++ b/ihp.cabal @@ -94,6 +94,7 @@ common shared-properties , minio-hs , temporary , conduit-extra + , html-conduit , wai-cors , lens , random @@ -247,6 +248,8 @@ library , IHP.Telemetry , IHP.Test.Mocking , IHP.Test.Database + , IHP.Test.TransversingCSS + , IHP.Test.CssQuery , IHP.Version , Paths_ihp , IHP.Job.Queue diff --git a/ihp.nix b/ihp.nix index 2de496ec4..4b5ff7ced 100644 --- a/ihp.nix +++ b/ihp.nix @@ -58,6 +58,7 @@ , lens , random , hspec +, html-conduit , cereal-text , neat-interpolation , unagi-chan