Skip to content

Commit

Permalink
T.P.URI: pBase64DataURI now returns mime + bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
jgm committed Dec 19, 2024
1 parent 3900791 commit 449824c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/Text/Pandoc/Readers/Markdown.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1844,7 +1844,7 @@ source = do
base64DataURI :: PandocMonad m => ParsecT Sources s m Text
base64DataURI = do
Sources ((pos, txt):rest) <- getInput
let r = A.parse pBase64DataURI txt
let r = A.parse (fst <$> A.match pBase64DataURI) txt
case r of
A.Done remaining consumed -> do
let pos' = incSourceColumn pos (T.length consumed)
Expand Down
38 changes: 24 additions & 14 deletions src/Text/Pandoc/URI.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ module Text.Pandoc.URI ( urlEncode
, pBase64DataURI
) where
import qualified Network.HTTP.Types as HTTP
import Data.ByteString.Base64 (decodeLenient)
import Text.Pandoc.MIME (MimeType)
import qualified Data.ByteString as B
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
import Data.Char (isSpace, isAscii)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)
import qualified Data.Attoparsec.Text as A
import Data.Text.Encoding (encodeUtf8)
import Control.Applicative (many)

urlEncode :: T.Text -> T.Text
Expand Down Expand Up @@ -117,24 +121,30 @@ uriPathToPath (T.unpack -> path) =
path
#endif

pBase64DataURI :: A.Parser T.Text
pBase64DataURI = fst <$> A.match base64uri
pBase64DataURI :: A.Parser (B.ByteString, MimeType)
pBase64DataURI = base64uri
where
base64uri = do
A.string "data:"
restrictedName
A.char '/'
restrictedName
A.char ';'
many mediaParam
A.string "base64,"
A.skipWhile (A.inClass "A-Za-z0-9+/")
mime <- do
n1 <- restrictedName
A.char '/'
n2 <- restrictedName
mps <- many mediaParam
pure $ n1 <> "/" <> n2 <> mconcat mps
A.string ";base64,"
b64 <- A.takeWhile (A.inClass "A-Za-z0-9+/")
A.skipWhile (== '=')
-- this decode should be lazy:
pure (decodeLenient (encodeUtf8 b64), mime)
restrictedName = do
A.satisfy (A.inClass "A-Za-z0-9")
A.skipWhile (A.inClass "A-Za-z0-9!#$&^_.+-")
c <- A.satisfy (A.inClass "A-Za-z0-9")
rest <- A.takeWhile (A.inClass "A-Za-z0-9!#$&^_.+-")
pure $ T.singleton c <> rest
mediaParam = do
restrictedName
A.char '='
A.skipWhile (/=';')
A.char ';'
A.skipWhile isSpace
k <- restrictedName
A.char '='
v <- A.takeWhile (/=';')
pure $ ";" <> k <> "=" <> v

0 comments on commit 449824c

Please sign in to comment.