-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathdownlit-html.R
96 lines (82 loc) · 3.04 KB
/
downlit-html.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#' Syntax highlight and link an HTML page
#'
#' @description
#' * Code blocks, identified by `<pre>` tags with class `sourceCode r`
#' or any `<pre>` tag inside of `<div class='downlit'>`, are
#' processed with [highlight()].
#'
#' * Inline code, identified by `<code>` tags that contain only text
#' (and don't have a header tag (e.g. `<h1>`) or `<a>` as an ancestor)
#' are processed processed with [autolink()].
#'
#' Use `downlit_html_path()` to process an `.html` file on disk;
#' use `downlit_html_node()` to process an in-memory `xml_node` as part of a
#' larger pipeline.
#'
#' @param in_path,out_path Input and output paths for HTML file
#' @inheritParams highlight
#' @param x An `xml2::xml_node`
#' @return `downlit_html_path()` invisibly returns `output_path`;
#' `downlit_html_node()` modifies `x` in place and returns nothing.
#' @export
#' @examples
#' node <- xml2::read_xml("<p><code>base::t()</code></p>")
#' node
#'
#' # node is modified in place
#' downlit_html_node(node)
#' node
downlit_html_path <- function(in_path, out_path, classes = classes_pandoc()) {
if (!is_installed("xml2")) {
abort("xml2 package required .html transformation")
}
html <- xml2::read_html(in_path, encoding = "UTF-8")
downlit_html_node(html, classes = classes)
xml2::write_html(html, out_path, format = FALSE)
invisible(out_path)
}
#' @export
#' @rdname downlit_html_path
downlit_html_node <- function(x, classes = classes_pandoc()) {
stopifnot(inherits(x, "xml_node"))
xpath <- c(
# Usual block generated by pandoc (after syntax highlighting)
".//pre[contains(@class, 'sourceCode r')]",
# Special case that allows us to override usually rules if needed
".//div[contains(@class, 'downlit')]//pre"
)
xpath_block <- paste(xpath, collapse = "|")
tweak_children(x, xpath_block, highlight,
pre_class = "downlit sourceCode r",
classes = classes,
replace = "node",
code = TRUE
)
# Identify <code> containing only text (i.e. no children)
# that are not descendants of an element where links are undesirable
bad_ancestor <- c("h1", "h2", "h3", "h4", "h5", "a", "summary")
bad_ancestor <- paste0("ancestor::", bad_ancestor, collapse = "|")
xpath_inline <- paste0(".//code[count(*) = 0 and not(", bad_ancestor, ")]")
# replace inline code "{packagename}" with linked text if possible
tweak_children(x, xpath_inline, autolink_curly, replace = "node")
# handle remaining inline code
tweak_children(x, xpath_inline, autolink, replace = "contents")
invisible()
}
tweak_children <- function(node, xpath, fun, ..., replace = c("node", "contents")) {
replace <- arg_match(replace)
nodes <- xml2::xml_find_all(node, xpath)
text <- xml2::xml_text(nodes)
replacement <- map_chr(text, fun, ...)
to_update <- !is.na(replacement)
old <- nodes[to_update]
if (replace == "contents") {
old <- xml2::xml_contents(old)
}
new <- lapply(replacement[to_update], as_xml)
xml2::xml_replace(old, new, .copy = FALSE)
invisible()
}
as_xml <- function(x) {
xml2::xml_contents(xml2::xml_contents(xml2::read_html(x)))[[1]]
}