From fbe7891a8876b33bcd1457b89b09dcfb43d8eb89 Mon Sep 17 00:00:00 2001 From: mb21 Date: Fri, 30 Mar 2018 21:48:14 +0200 Subject: [PATCH] introduce --metadata-file option, closes #1960 --- src/Text/Pandoc/App.hs | 26 +++++++ src/Text/Pandoc/Readers/Markdown.hs | 115 ++++++++++++---------------- 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 9a3e00c9fc1a4..1b030ddac669a 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -90,7 +90,9 @@ import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Builder (setMeta, deleteMeta) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.Highlighting (highlightingStyles) +import Text.Pandoc.Parsing (defaultParserState, readWithM, runF) import Text.Pandoc.PDF (makePDF) +import Text.Pandoc.Readers.Markdown (yamlBsToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, ordNub, safeRead, tabFilter) @@ -399,6 +401,18 @@ convertWithOpts opts = do ("application/xml", jatsCSL) return $ ("csl", jatsEncoded) : optMetadata opts else return $ optMetadata opts + metadataFromFile <- + case optMetadataFile opts of + Nothing -> return mempty + Just file -> do + bs <- readFileStrict file + let parser = do + meta <- yamlBsToMeta bs + return $ runF meta defaultParserState + parsed <- readWithM parser def "" + case parsed of + Right result -> return result + Left e -> throwError e case lookup "lang" (optMetadata opts) of Just l -> case parseBCP47 l of @@ -491,6 +505,7 @@ convertWithOpts opts = do ( (if isJust (optExtractMedia opts) then fillMediaBag else return) + >=> return . addNonPresentMetadata metadataFromFile >=> return . addMetadata metadata >=> applyTransforms transforms >=> applyFilters readerOpts filters' [format] @@ -556,6 +571,7 @@ data Opt = Opt , optTemplate :: Maybe FilePath -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set , optMetadata :: [(String, String)] -- ^ Metadata fields to set + , optMetadataFile :: Maybe FilePath -- ^ Name of YAML metadata file , optOutputFile :: Maybe FilePath -- ^ Name of output file , optInputFiles :: [FilePath] -- ^ Names of input files , optNumberSections :: Bool -- ^ Number sections in LaTeX @@ -628,6 +644,7 @@ defaultOpts = Opt , optTemplate = Nothing , optVariables = [] , optMetadata = [] + , optMetadataFile = Nothing , optOutputFile = Nothing , optInputFiles = [] , optNumberSections = False @@ -687,6 +704,9 @@ defaultOpts = Opt , optStripComments = False } +addNonPresentMetadata :: Text.Pandoc.Meta -> Pandoc -> Pandoc +addNonPresentMetadata newmeta (Pandoc meta bs) = Pandoc (meta <> newmeta) bs + addMetadata :: [(String, String)] -> Pandoc -> Pandoc addMetadata kvs pdc = foldr addMeta (removeMetaKeys kvs pdc) kvs @@ -962,6 +982,12 @@ options = "KEY[:VALUE]") "" + , Option "" ["metadata-file"] + (ReqArg + (\arg opt -> return opt{ optMetadataFile = Just arg }) + "FILE") + "" + , Option "V" ["variable"] (ReqArg (\arg opt -> do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 156b2b62231be..066c2a1d8156c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,11 +31,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( readMarkdown ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlBsToMeta ) where import Prelude import Control.Monad import Control.Monad.Except (throwError) +import qualified Data.ByteString as BS import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) import qualified Data.HashMap.Strict as H import Data.List (intercalate, sortBy, transpose, elemIndex) @@ -238,7 +239,6 @@ pandocTitleBlock = try $ do yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock = try $ do guardEnabled Ext_yaml_metadata_block - pos <- getPosition string "---" blankline notFollowedBy blankline -- if --- is followed by a blank it's an HRULE @@ -246,49 +246,39 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - case Yaml.decodeEither' $ UTF8.fromString rawYaml of - Right (Yaml.Object hashmap) -> do - let alist = H.toList hashmap - mapM_ (\(k, v) -> - if ignorable k - then return () - else do - v' <- yamlToMeta v - let k' = T.unpack k - updateState $ \st -> st{ stateMeta' = - do m <- stateMeta' st - -- if there's already a value, leave it unchanged - case lookupMeta k' m of - Just _ -> return m - Nothing -> do - v'' <- v' - return $ B.setMeta (T.unpack k) v'' m} - ) alist - Right Yaml.Null -> return () - Right _ -> do - logMessage $ - CouldNotParseYamlMetadata "not an object" - pos - return () - Left err' -> do - case err' of - InvalidYaml (Just YamlParseException{ - yamlProblem = problem - , yamlContext = _ctxt - , yamlProblemMark = Yaml.YamlMark { - yamlLine = yline - , yamlColumn = ycol - }}) -> - logMessage $ CouldNotParseYamlMetadata - problem (setSourceLine - (setSourceColumn pos - (sourceColumn pos + ycol)) - (sourceLine pos + 1 + yline)) - _ -> logMessage $ CouldNotParseYamlMetadata - (show err') pos - return () + newMetaF <- yamlBsToMeta $ UTF8.fromString rawYaml + updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty +yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta) +yamlBsToMeta bs = do + pos <- getPosition + case Yaml.decodeEither' bs of + Right (Yaml.Object o) -> (fmap Meta) <$> objectToMap o + Right Yaml.Null -> return . return $ mempty + Right _ -> do + logMessage $ + CouldNotParseYamlMetadata "not an object" + pos + return . return $ mempty + Left err' -> do + case err' of + InvalidYaml (Just YamlParseException{ + yamlProblem = problem + , yamlContext = _ctxt + , yamlProblemMark = Yaml.YamlMark { + yamlLine = yline + , yamlColumn = ycol + }}) -> + logMessage $ CouldNotParseYamlMetadata + problem (setSourceLine + (setSourceColumn pos + (sourceColumn pos + ycol)) + (sourceLine pos + 1 + yline)) + _ -> logMessage $ CouldNotParseYamlMetadata + (show err') pos + return . return $ mempty + -- ignore fields ending with _ ignorable :: Text -> Bool ignorable t = (T.pack "_") `T.isSuffixOf` t @@ -317,29 +307,22 @@ yamlToMeta :: PandocMonad m yamlToMeta (Yaml.String t) = toMetaValue t yamlToMeta (Yaml.Number n) -- avoid decimal points for numbers that don't need them: - | base10Exponent n >= 0 = return $ return $ MetaString $ show - $ coefficient n * (10 ^ base10Exponent n) - | otherwise = return $ return $ MetaString $ show n -yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b -yamlToMeta (Yaml.Array xs) = do - xs' <- mapM yamlToMeta (V.toList xs) - return $ do - xs'' <- sequence xs' - return $ B.toMetaValue xs'' -yamlToMeta (Yaml.Object o) = do - let alist = H.toList o - foldM (\m (k,v) -> - if ignorable k - then return m - else do - v' <- yamlToMeta v - return $ do - MetaMap m' <- m - v'' <- v' - return (MetaMap $ M.insert (T.unpack k) v'' m')) - (return $ MetaMap M.empty) - alist -yamlToMeta _ = return $ return $ MetaString "" + | base10Exponent n >= 0 = return . return $ MetaString $ show $ coefficient n * (10 ^ base10Exponent n) + | otherwise = return . return $ MetaString $ show n +yamlToMeta (Yaml.Bool b) = return . return $ MetaBool b +yamlToMeta (Yaml.Array xs) = fmap B.toMetaValue . sequence <$> mapM yamlToMeta (V.toList xs) +yamlToMeta (Yaml.Object o) = fmap B.toMetaValue <$> objectToMap o +yamlToMeta _ = return . return $ MetaString "" + +objectToMap :: PandocMonad m => Yaml.Object -> MarkdownParser m (F (M.Map String MetaValue)) +objectToMap o = + let kvs = filter (not . ignorable . fst) $ H.toList o + toMeta (k, v) = do + fv <- yamlToMeta v + return $ do + v' <- fv + return (T.unpack k, v') + in (fmap M.fromList . sequence) <$> mapM toMeta kvs stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()