diff --git a/MANUAL.txt b/MANUAL.txt index 596ba73f2c00..7e0bdfcddb48 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -2078,12 +2078,12 @@ But Markdown also allows a "lazy" format: list item. * and my second. -### The four-space rule ### +### Block content in list items ### A list item may contain multiple paragraphs and other block-level content. However, subsequent paragraphs must be preceded by a blank line -and indented four spaces or a tab. The list will look better if the first -paragraph is aligned with the rest: +and indented to line up with the first non-space content after +the list marker. * First paragraph. @@ -2094,19 +2094,29 @@ paragraph is aligned with the rest: { code } +Exception: if the list marker is followed by an indented code +block, which must begin 5 spaces after the list marker, then +subsequent paragraphs must begin two columns after the last +character of the list marker: + + * code + + continuation paragraph + List items may include other lists. In this case the preceding blank -line is optional. The nested list must be indented four spaces or -one tab: +line is optional. The nested list must be indented to line up with +the first non-space character after the list marker of the +containing list item. * fruits - + apples - - macintosh - - red delicious - + pears - + peaches + + apples + - macintosh + - red delicious + + pears + + peaches * vegetables - + broccoli - + chard + + broccoli + + chard As noted above, Markdown allows you to write list items "lazily," instead of indenting continuation lines. However, if there are multiple paragraphs or @@ -2121,21 +2131,6 @@ other blocks in a list item, the first line of each must be indented. Second paragraph of second list item. -**Note:** Although the four-space rule for continuation paragraphs -comes from the official [Markdown syntax guide], the reference implementation, -`Markdown.pl`, does not follow it. So pandoc will give different results than -`Markdown.pl` when authors have indented continuation paragraphs fewer than -four spaces. - -The [Markdown syntax guide] is not explicit whether the four-space -rule applies to *all* block-level content in a list item; it only -mentions paragraphs and code blocks. But it implies that the rule -applies to all block-level content (including nested lists), and -pandoc interprets it that way. - - [Markdown syntax guide]: - http://daringfireball.net/projects/markdown/syntax#list - ### Ordered lists ### Ordered lists work just like bulleted lists, except that the items @@ -3606,6 +3601,12 @@ implied by pandoc's default `all_symbols_escapable`. Allow a list to occur right after a paragraph, with no intervening blank space. +#### Extension: `four_space_rule` #### + +Selects the pandoc <= 2.0 behavior for parsing lists, so that +four spaces indent are needed for list item continuation +paragraphs. + #### Extension: `spaced_reference_links` #### Allow whitespace between the two components of a reference link, diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index e6a3ca044eb4..95e59063be4e 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -111,6 +111,7 @@ data Extension = | Ext_autolink_bare_uris -- ^ Make all absolute URIs into links | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank + | Ext_four_space_rule -- ^ Require 4-space indent for list contents | Ext_startnum -- ^ Make start number of ordered list significant | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php | Ext_compact_definition_lists -- ^ Definition lists without diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 37a0b53b4680..9ed18d4e0840 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Parsing ( takeWhileP, blankline, blanklines, gobbleSpaces, + gobbleAtMostSpaces, enclosed, stringAnyCase, parseFromString, @@ -380,14 +381,33 @@ blanklines = many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. -gobbleSpaces :: Monad m => ReaderOptions -> Int -> ParserT [Char] st m () -gobbleSpaces _ 0 = return () -gobbleSpaces opts n = try $ do - char ' ' <|> do char '\t' - inp <- getInput - setInput $ replicate (readerTabStop opts - 1) ' ' ++ inp - return ' ' - gobbleSpaces opts (n - 1) +gobbleSpaces :: (HasReaderOptions st, Monad m) + => Int -> ParserT [Char] st m () +gobbleSpaces 0 = return () +gobbleSpaces n + | n < 0 = error "gobbleSpaces called with negative number" + | otherwise = try $ do + char ' ' <|> eatOneSpaceOfTab + gobbleSpaces (n - 1) + +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char +eatOneSpaceOfTab = do + char '\t' + tabstop <- getOption readerTabStop + inp <- getInput + setInput $ replicate (tabstop - 1) ' ' ++ inp + return ' ' + +-- | Gobble up to n spaces; if tabs are encountered, expand them +-- and gobble some or all of their spaces, leaving the rest. +gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) + => Int -> ParserT [Char] st m Int +gobbleAtMostSpaces 0 = return 0 +gobbleAtMostSpaces n + | n < 0 = error "gobbleAtMostSpaces called with negative number" + | otherwise = option 0 $ do + char ' ' <|> eatOneSpaceOfTab + (+ 1) <$> gobbleAtMostSpaces (n - 1) -- | Parses material enclosed between start and end parsers. enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 26263d674e20..664691c8c959 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -138,12 +138,7 @@ nonindentSpaces = do skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int skipNonindentSpaces = do tabStop <- getOption readerTabStop - atMostSpaces (tabStop - 1) <* notFollowedBy spaceChar - -atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int -atMostSpaces n - | n > 0 = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 - | otherwise = return 0 + gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar litChar :: PandocMonad m => MarkdownParser m Char litChar = escapedChar' @@ -809,49 +804,51 @@ blockQuote = do bulletListStart :: PandocMonad m => MarkdownParser m () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - () <$ atMostSpaces (tabStop - (endpos - startpos)) + gobbleSpaces 1 <|> () <$ lookAhead newline + try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return () -anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do +orderedListStart :: PandocMonad m + => Maybe (ListNumberStyle, ListNumberDelim) + -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) +orderedListStart mbstydelim = try $ do optional newline -- if preceded by a Plain block in a list context - startpos <- sourceColumn <$> getPosition skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - res <- do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead - char '.' - return (start, DefaultStyle, DefaultDelim) - <|> do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, - -- insist on more than one space - when (delim == Period && (style == UpperAlpha || - (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ - () <$ spaceChar - return (num, style, delim) - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res + (do guardDisabled Ext_fancy_lists + start <- many1 digit >>= safeRead + char '.' + gobbleSpaces 1 <|> () <$ lookAhead newline + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (start, DefaultStyle, DefaultDelim)) + <|> + (do (num, style, delim) <- maybe + anyOrderedListMarker + (\(sty,delim) -> (\start -> (start,sty,delim)) <$> + orderedListMarker sty delim) + mbstydelim + gobbleSpaces 1 <|> () <$ lookAhead newline + -- if it could be an abbreviated first name, + -- insist on more than one space + when (delim == Period && (style == UpperAlpha || + (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ + () <$ lookAhead (newline <|> spaceChar) + optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) + return (num, style, delim)) listStart :: PandocMonad m => MarkdownParser m () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) +listStart = bulletListStart <|> (orderedListStart Nothing >> return ()) -listLine :: PandocMonad m => MarkdownParser m String -listLine = try $ do - notFollowedBy' (do indentSpaces - many spaceChar +listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine continuationIndent = try $ do + notFollowedBy' (do gobbleSpaces continuationIndent + skipMany spaceChar listStart) notFollowedByHtmlCloser - optional (() <$ indentSpaces) + optional (() <$ gobbleSpaces continuationIndent) listLineCommon listLineCommon :: PandocMonad m => MarkdownParser m String @@ -864,26 +861,39 @@ listLineCommon = concat <$> manyTill -- parse raw text for one list item, excluding start marker and continuations rawListItem :: PandocMonad m => MarkdownParser m a - -> MarkdownParser m String + -> MarkdownParser m (String, Int) rawListItem start = try $ do + pos1 <- getPosition start + pos2 <- getPosition + continuationIndent <- (4 <$ guardEnabled Ext_four_space_rule) + <|> return (sourceColumn pos2 - sourceColumn pos1) first <- listLineCommon rest <- many (do notFollowedBy listStart notFollowedBy (() <$ codeBlockFenced) notFollowedBy blankline - listLine) + listLine continuationIndent) blanks <- many blankline - return $ unlines (first:rest) ++ blanks + let result = unlines (first:rest) ++ blanks + return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => MarkdownParser m String -listContinuation = try $ do - lookAhead indentSpaces - result <- many1 listContinuationLine +listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation continuationIndent = try $ do + x <- try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + gobbleSpaces continuationIndent + anyLineNewline + xs <- many $ try $ do + notFollowedBy blankline + notFollowedByHtmlCloser + gobbleSpaces continuationIndent <|> notFollowedBy' listStart + anyLineNewline blanks <- many blankline - return $ concat result ++ blanks + return $ concat (x:xs) ++ blanks notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m () notFollowedByHtmlCloser = do @@ -892,20 +902,12 @@ notFollowedByHtmlCloser = do Just t -> notFollowedBy' $ htmlTag (~== TagClose t) Nothing -> return () -listContinuationLine :: PandocMonad m => MarkdownParser m String -listContinuationLine = try $ do - notFollowedBy blankline - notFollowedBy' listStart - notFollowedByHtmlCloser - optional indentSpaces - anyLineNewline - listItem :: PandocMonad m => MarkdownParser m a -> MarkdownParser m (F Blocks) listItem start = try $ do - first <- rawListItem start - continuations <- many listContinuation + (first, continuationIndent) <- rawListItem start + continuations <- many (listContinuation continuationIndent) -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. -- see definition of "endline" @@ -920,23 +922,14 @@ listItem start = try $ do orderedList :: PandocMonad m => MarkdownParser m (F Blocks) orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart + (start, style, delim) <- lookAhead (orderedListStart Nothing) unless (style `elem` [DefaultStyle, Decimal, Example] && delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists items <- fmap sequence $ many1 $ listItem - ( try $ do - optional newline -- if preceded by Plain block in a list - startpos <- sourceColumn <$> getPosition - skipNonindentSpaces - res <- orderedListMarker style delim - endpos <- sourceColumn <$> getPosition - tabStop <- getOption readerTabStop - lookAhead (newline <|> spaceChar) - atMostSpaces (tabStop - (endpos - startpos)) - return res ) - start' <- option 1 $ guardEnabled Ext_startnum >> return start + (orderedListStart (Just (style, delim))) + start' <- (start <$ guardEnabled Ext_startnum) <|> return 1 return $ B.orderedListWith (start', style, delim) <$> fmap compactify items bulletList :: PandocMonad m => MarkdownParser m (F Blocks) @@ -1122,7 +1115,7 @@ rawHtmlBlocks = do updateState $ \st -> st{ stateInHtmlBlock = Just tagtype } let closer = htmlTag (\x -> x ~== TagClose tagtype) let block' = do notFollowedBy' closer - atMostSpaces indentlevel + gobbleAtMostSpaces indentlevel block contents <- mconcat <$> many block' result <- diff --git a/test/command/2434.md b/test/command/2434.md index aa03e5fc3648..4f12b6f56b47 100644 --- a/test/command/2434.md +++ b/test/command/2434.md @@ -31,7 +31,7 @@ ``` % pandoc -t opendocument -(@) text +(@) text some text diff --git a/test/command/3511.md b/test/command/3511.md new file mode 100644 index 000000000000..b8bcedbb0f39 --- /dev/null +++ b/test/command/3511.md @@ -0,0 +1,46 @@ +``` +% pandoc -t native +- a + - b + - c + +- code + +1000. one + + not continuation +^D +[BulletList + [[Plain [Str "a"] + ,BulletList + [[Plain [Str "b"] + ,BulletList + [[Plain [Str "c"]]]]]] + ,[CodeBlock ("",[],[]) "code"]] +,OrderedList (1000,Decimal,Period) + [[Plain [Str "one"]]] +,CodeBlock ("",[],[]) "not continuation"] +``` + +``` +% pandoc -t native -f markdown+four_space_rule +- a + - b + - c + +- not code + +1000. one + + continuation +^D +[BulletList + [[Plain [Str "a"]] + ,[Plain [Str "b"] + ,BulletList + [[Plain [Str "c"]]]] + ,[CodeBlock ("",[],[]) "not code"]] +,OrderedList (1000,Decimal,Period) + [[Para [Str "one"] + ,Para [Str "continuation"]]]] +``` diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native index a24417ffea3a..2e55dbb183ba 100644 --- a/test/markdown-reader-more.native +++ b/test/markdown-reader-more.native @@ -29,13 +29,13 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,[Plain [Str "three"]]] ,Header 2 ("indented-code-at-beginning-of-list",[],[]) [Str "Indented",Space,Str "code",Space,Str "at",Space,Str "beginning",Space,Str "of",Space,Str "list"] ,BulletList - [[CodeBlock ("",[],[]) "code\ncode"]] -,OrderedList (1,Decimal,Period) - [[CodeBlock ("",[],[]) "code\ncode"] - ,[CodeBlock ("",[],[]) "code\ncode"]] -,BulletList - [[CodeBlock ("",[],[]) "code\ncode"] - ,[Plain [Str "no",Space,Str "code"]]] + [[CodeBlock ("",[],[]) "code\ncode" + ,OrderedList (1,Decimal,Period) + [[CodeBlock ("",[],[]) "code\ncode"] + ,[CodeBlock ("",[],[]) "code\ncode"]] + ,BulletList + [[CodeBlock ("",[],[]) "code\ncode"] + ,[Plain [Str "no",Space,Str "code"]]]]] ,Header 2 ("backslash-newline",[],[]) [Str "Backslash",Space,Str "newline"] ,Para [Str "hi",LineBreak,Str "there"] ,Header 2 ("code-spans",[],[]) [Str "Code",Space,Str "spans"] diff --git a/test/markdown-reader-more.txt b/test/markdown-reader-more.txt index 73c9500a0ba2..0480e41ccc29 100644 --- a/test/markdown-reader-more.txt +++ b/test/markdown-reader-more.txt @@ -84,14 +84,14 @@ $PATH 90 $PATH ## Indented code at beginning of list -- code - code +- code + code - 1. code - code + 1. code + code - 12345678. code - code + 12345678. code + code - code code