Skip to content

Commit

Permalink
Support EndOfBlock and EndOfSection in HTML writer
Browse files Browse the repository at this point in the history
This works similarly to the markdown writer, but with special care
taken to skipping section divs with what regards to the block level.

The change also takes care to not modify the output if `EndOfDocument`
is used.
  • Loading branch information
bitonic committed Aug 19, 2021
1 parent 1f1a30b commit 5db0a59
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 39 deletions.
2 changes: 1 addition & 1 deletion MANUAL.txt
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ header when requesting a document from a URL:
: Specify whether footnotes (and references, if `reference-links` is
set) are placed at the end of the current (top-level) block, the
current section, or the document. The default is
`document`. Currently only affects the markdown writer.
`document`. Currently only affects the markdown and HTML writers.

`--markdown-headings=setext`|`atx`

Expand Down
2 changes: 1 addition & 1 deletion man/pandoc.1
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ Specify whether footnotes (and references, if \f[C]reference-links\f[R]
is set) are placed at the end of the current (top-level) block, the
current section, or the document.
The default is \f[C]document\f[R].
Currently only affects the markdown writer.
Currently only affects the markdown and HTML writers.
.TP
\f[B]\f[CB]--markdown-headings=setext\f[B]\f[R]|\f[B]\f[CB]atx\f[B]\f[R]
Specify whether to use ATX-style (\f[C]#\f[R]-prefixed) or Setext-style
Expand Down
121 changes: 89 additions & 32 deletions src/Text/Pandoc/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ import Text.TeXMath
import Text.XML.Light (elChildren, unode, unqual)
import qualified Text.XML.Light as XML
import Text.XML.Light.Output
import Data.String (fromString)

data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
, stEmittedNotes :: Int -- ^ How many notes we've already pushed out to the HTML
, stMath :: Bool -- ^ Math is used in document
, stQuotes :: Bool -- ^ <q> tag is used
, stHighlighting :: Bool -- ^ Syntax highlighting is used
Expand All @@ -88,10 +90,11 @@ data WriterState = WriterState
, stCodeBlockNum :: Int -- ^ Number of code block
, stCsl :: Bool -- ^ Has CSL references
, stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing
, stBlockLevel :: Int -- ^ Current block depth, excluding section divs
}

defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
defaultWriterState = WriterState {stNotes= [], stEmittedNotes = 0, stMath = False, stQuotes = False,
stHighlighting = False,
stHtml5 = False,
stEPUBVersion = Nothing,
Expand All @@ -100,7 +103,8 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
stInSection = False,
stCodeBlockNum = 0,
stCsl = False,
stCslEntrySpacing = Nothing}
stCslEntrySpacing = Nothing,
stBlockLevel = 0}

-- Helpers to render HTML with the appropriate function.

Expand Down Expand Up @@ -266,8 +270,16 @@ pandocToHtml opts (Pandoc meta blocks) = do
then fmap renderHtml' <$> tableOfContents opts sects
else return Nothing
blocks' <- blockListToHtml opts sects
notes <- do
-- make the st private just to be safe, since we modify it right afterwards
st <- get
if null (stNotes st)
then return mempty
else do
notes <- footnoteSection opts EndOfDocument (stEmittedNotes st + 1) (reverse (stNotes st))
modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') })
return notes
st <- get
notes <- footnoteSection opts (reverse (stNotes st))
let thebody = blocks' >> notes
let math = case writerHTMLMathMethod opts of
MathJax url
Expand Down Expand Up @@ -490,28 +502,43 @@ tableOfContents opts sects = do

-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: PandocMonad m
=> WriterOptions -> [Html] -> StateT WriterState m Html
footnoteSection opts notes = do
footnoteSection ::
PandocMonad m => WriterOptions -> ReferenceLocation -> Int -> [Html] -> StateT WriterState m Html
footnoteSection opts refLocation startCounter notes = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
let hrtag = if html5 then H5.hr else H.hr
let hrtag = if refLocation /= EndOfBlock then (if html5 then H5.hr else H.hr) else mempty
let additionalClassName = case refLocation of
EndOfBlock -> "footnotes-end-of-block"
EndOfDocument -> "footnotes-end-of-document"
EndOfSection -> "footnotes-end-of-section"
let className = "footnotes " <> additionalClassName
epubVersion <- gets stEPUBVersion
let container x
| html5
, epubVersion == Just EPUB3
= H5.section ! A.class_ "footnotes"
= H5.section ! A.class_ className
! customAttribute "epub:type" "footnotes" $ x
| html5 = H5.section ! A.class_ "footnotes"
| html5 = H5.section ! A.class_ className
! customAttribute "role" "doc-endnotes"
$ x
| slideVariant /= NoSlides = H.div ! A.class_ "footnotes slide" $ x
| otherwise = H.div ! A.class_ "footnotes" $ x
| otherwise = H.div ! A.class_ className $ x
return $
if null notes
then mempty
else nl opts >> container (nl opts >> hrtag >> nl opts >>
H.ol (mconcat notes >> nl opts) >> nl opts)
else do
nl opts
container $ do
nl opts
hrtag
nl opts
-- Keep the previous output exactly the same if we don't
-- have multiple notes sections
if startCounter == 1
then H.ol $ mconcat notes >> nl opts
else H.ol ! A.start (fromString (show startCounter)) $ mconcat notes >> nl opts
nl opts

-- | Parse a mailto link; return Just (name, domain) or Nothing.
parseMailto :: Text -> Maybe (Text, Text)
Expand Down Expand Up @@ -702,11 +729,10 @@ adjustNumbers opts doc =
fixnum x = x
showSecNum = T.intercalate "." . map tshow

-- | Convert Pandoc block element to HTML.
blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtml _ Null = return mempty
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)])
blockToHtmlInner :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtmlInner _ Null = return mempty
blockToHtmlInner opts (Plain lst) = inlineListToHtml opts lst
blockToHtmlInner opts (Para [Image attr@(_,classes,_) txt (src,tit)])
| "stretch" `elem` classes = do
slideVariant <- gets stSlideVariant
case slideVariant of
Expand All @@ -716,20 +742,20 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)])
inlineToHtml opts (Image attr txt (src, tit))
_ -> figure opts attr txt (src, tit)
-- title beginning with fig: indicates that the image is a figure
blockToHtml opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) =
blockToHtmlInner opts (Para [Image attr txt (s,T.stripPrefix "fig:" -> Just tit)]) =
figure opts attr txt (s,tit)
blockToHtml opts (Para lst) = do
blockToHtmlInner opts (Para lst) = do
contents <- inlineListToHtml opts lst
case contents of
Empty _ | not (isEnabled Ext_empty_paragraphs opts) -> return mempty
_ -> return $ H.p contents
blockToHtml opts (LineBlock lns) =
blockToHtmlInner opts (LineBlock lns) =
if writerWrapText opts == WrapNone
then blockToHtml opts $ linesToPara lns
else do
htmlLines <- inlineListToHtml opts $ intercalate [LineBreak] lns
return $ H.div ! A.class_ "line-block" $ htmlLines
blockToHtml opts (Div (ident, "section":dclasses, dkvs)
blockToHtmlInner opts (Div (ident, "section":dclasses, dkvs)
(Header level
hattr@(hident,hclasses,hkvs) ils : xs)) = do
slideVariant <- gets stSlideVariant
Expand Down Expand Up @@ -810,7 +836,7 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
if null innerSecs
then mempty
else nl opts <> innerContents
blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
blockToHtmlInner opts (Div attr@(ident, classes, kvs') bs) = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes
Expand Down Expand Up @@ -864,7 +890,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
_ -> return mempty
else addAttrs opts (ident, classes'', kvs) $
divtag contents'
blockToHtml opts (RawBlock f str) = do
blockToHtmlInner opts (RawBlock f str) = do
ishtml <- isRawHtml f
if ishtml
then return $ preEscapedText str
Expand All @@ -875,10 +901,10 @@ blockToHtml opts (RawBlock f str) = do
else do
report $ BlockNotRendered (RawBlock f str)
return mempty
blockToHtml _ HorizontalRule = do
blockToHtmlInner _ HorizontalRule = do
html5 <- gets stHtml5
return $ if html5 then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
blockToHtmlInner opts (CodeBlock (id',classes,keyvals) rawCode) = do
id'' <- if T.null id'
then do
modify $ \st -> st{ stCodeBlockNum = stCodeBlockNum st + 1 }
Expand Down Expand Up @@ -910,7 +936,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
-- we set writerIdentifierPrefix to "" since id'' already
-- includes it:
addAttrs opts{writerIdentifierPrefix = ""} (id'',[],keyvals) h
blockToHtml opts (BlockQuote blocks) = do
blockToHtmlInner opts (BlockQuote blocks) = do
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
Expand All @@ -932,7 +958,7 @@ blockToHtml opts (BlockQuote blocks) = do
else do
contents <- blockListToHtml opts blocks
return $ H.blockquote $ nl opts >> contents >> nl opts
blockToHtml opts (Header level (ident,classes,kvs) lst) = do
blockToHtmlInner opts (Header level (ident,classes,kvs) lst) = do
contents <- inlineListToHtml opts lst
let secnum = fromMaybe mempty $ lookup "number" kvs
let contents' = if writerNumberSections opts && not (T.null secnum)
Expand All @@ -955,12 +981,12 @@ blockToHtml opts (Header level (ident,classes,kvs) lst) = do
5 -> H.h5 contents'
6 -> H.h6 contents'
_ -> H.p ! A.class_ "heading" $ contents'
blockToHtml opts (BulletList lst) = do
blockToHtmlInner opts (BulletList lst) = do
contents <- mapM (listItemToHtml opts) lst
let isTaskList = not (null lst) && all isTaskListItem lst
(if isTaskList then (! A.class_ "task-list") else id) <$>
unordList opts contents
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
blockToHtmlInner opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (listItemToHtml opts) lst
html5 <- gets stHtml5
let numstyle' = case numstyle of
Expand All @@ -983,17 +1009,47 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
else [])
l <- ordList opts contents
return $ foldl' (!) l attribs
blockToHtml opts (DefinitionList lst) = do
blockToHtmlInner opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
do term' <- liftM H.dt $ inlineListToHtml opts term
defs' <- mapM (liftM (\x -> H.dd (x >> nl opts)) .
blockListToHtml opts) defs
return $ mconcat $ nl opts : term' : nl opts :
intersperse (nl opts) defs') lst
defList opts contents
blockToHtml opts (Table attr caption colspecs thead tbody tfoot) =
blockToHtmlInner opts (Table attr caption colspecs thead tbody tfoot) =
tableToHtml opts (Ann.toTable attr caption colspecs thead tbody tfoot)

-- | Convert Pandoc block element to HTML. All the legwork is done by
-- 'blockToHtmlInner', this just takes care of emitting the notes after
-- the block if necessary.
blockToHtml :: PandocMonad m => WriterOptions -> Block -> StateT WriterState m Html
blockToHtml opts block = do
-- Ignore inserted section divs -- they are not blocks as they came from
-- the document itself (at least not when coming from markdown)
let isSection = case block of
Div (_, classes, _) _ | "section" `elem` classes -> True
_ -> False
let increaseLevel = not isSection
when increaseLevel $
modify (\st -> st{ stBlockLevel = stBlockLevel st + 1 })
doc <- blockToHtmlInner opts block
st <- get
let emitNotes =
(writerReferenceLocation opts == EndOfBlock && stBlockLevel st == 1) ||
(writerReferenceLocation opts == EndOfSection && isSection)
res <- if emitNotes
then do
notes <- if null (stNotes st)
then return mempty
else footnoteSection opts (writerReferenceLocation opts) (stEmittedNotes st + 1) (reverse (stNotes st))
modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') })
return (doc <> notes)
else return doc
when increaseLevel $
modify (\st' -> st'{ stBlockLevel = stBlockLevel st' - 1 })
return res

tableToHtml :: PandocMonad m
=> WriterOptions
-> Ann.Table
Expand Down Expand Up @@ -1468,7 +1524,8 @@ inlineToHtml opts inline = do
-- note: null title included, as in Markdown.pl
(Note contents) -> do
notes <- gets stNotes
let number = length notes + 1
emittedNotes <- gets stEmittedNotes
let number = emittedNotes + length notes + 1
let ref = tshow number
htmlContents <- blockListToNote opts ref contents
epubVersion <- gets stEPUBVersion
Expand Down
75 changes: 74 additions & 1 deletion test/Tests/Writers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder

htmlWithOpts :: (ToPandoc a) => WriterOptions -> a -> String
htmlWithOpts opts = unpack . purely (writeHtml4String opts{ writerWrapText = WrapNone }) . toPandoc

html :: (ToPandoc a) => a -> String
html = unpack . purely (writeHtml4String def{ writerWrapText = WrapNone }) . toPandoc
html = htmlWithOpts def

htmlQTags :: (ToPandoc a) => a -> String
htmlQTags = unpack
Expand All @@ -33,6 +36,21 @@ infix 4 =:
=> String -> (a, String) -> TestTree
(=:) = test html

noteTestDoc :: Blocks
noteTestDoc =
header 1 "Page title" <>
header 2 "First section" <>
para ("This is a footnote." <>
note (para "Down here.") <>
" And this is a " <>
link "https://www.google.com" "" "link" <>
".") <>
blockQuote (para ("A note inside a block quote." <>
note (para "The second note.")) <>
para "A second paragraph.") <>
header 2 "Second section" <>
para "Some more text."

tests :: [TestTree]
tests =
[ testGroup "inline code"
Expand Down Expand Up @@ -86,6 +104,61 @@ tests =
=?> ("<var><code class=\"sourceCode haskell\">" ++
"<span class=\"op\">&gt;&gt;=</span></code></var>")
]
, testGroup "footnotes"
[ test (htmlWithOpts def{writerReferenceLocation=EndOfDocument})
"at the end of a document" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
, "<div class=\"footnotes footnotes-end-of-document\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfBlock})
"at the end of a block" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<div class=\"footnotes footnotes-end-of-block\"><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<div class=\"footnotes footnotes-end-of-block\"><ol start=\"2\"><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfSection})
"at the end of a section" $
noteTestDoc =?>
concat
[ "<h1>Page title</h1>"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p>"
, "<blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "<div class=\"footnotes footnotes-end-of-section\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<h2>Second section</h2>"
, "<p>Some more text.</p>"
]
, test (htmlWithOpts def{writerReferenceLocation=EndOfSection, writerSectionDivs=True})
"at the end of a section, with section divs" $
noteTestDoc =?>
-- Footnotes are rendered _after_ their section (in this case after the level2 section
-- that contains it).
concat
[ "<div class=\"section level1\">"
, "<h1>Page title</h1>"
, "<div class=\"section level2\">"
, "<h2>First section</h2>"
, "<p>This is a footnote.<a href=\"#fn1\" class=\"footnote-ref\" id=\"fnref1\"><sup>1</sup></a> And this is a <a href=\"https://www.google.com\">link</a>.</p><blockquote><p>A note inside a block quote.<a href=\"#fn2\" class=\"footnote-ref\" id=\"fnref2\"><sup>2</sup></a></p><p>A second paragraph.</p></blockquote>"
, "</div>"
, "<div class=\"footnotes footnotes-end-of-section\"><hr /><ol><li id=\"fn1\"><p>Down here.<a href=\"#fnref1\" class=\"footnote-back\">↩︎</a></p></li><li id=\"fn2\"><p>The second note.<a href=\"#fnref2\" class=\"footnote-back\">↩︎</a></p></li></ol></div>"
, "<div class=\"section level2\"><h2>Second section</h2><p>Some more text.</p></div>"
, "</div>"
]
]
]
where
tQ :: (ToString a, ToPandoc a)
Expand Down
2 changes: 1 addition & 1 deletion test/command/4235.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
This.^[Has a footnote.]
^D
<p>This.<a href="#foofn1" class="footnote-ref" id="foofnref1" role="doc-noteref"><sup>1</sup></a></p>
<section class="footnotes" role="doc-endnotes">
<section class="footnotes footnotes-end-of-document" role="doc-endnotes">
<hr />
<ol>
<li id="foofn1" role="doc-endnote"><p>Has a footnote.<a href="#foofnref1" class="footnote-back" role="doc-backlink">↩︎</a></p></li>
Expand Down
Loading

0 comments on commit 5db0a59

Please sign in to comment.