From 9d1927d52fa22144da7e2853fc63980206ff790c Mon Sep 17 00:00:00 2001 From: Steve Condylios Date: Tue, 11 Feb 2020 02:33:49 -0500 Subject: [PATCH] Add html_to_r() --- DESCRIPTION | 2 + NAMESPACE | 3 ++ R/extract.R | 88 +++++++++++++++++++++++++++++++++++++ man/html_to_r.Rd | 39 ++++++++++++++++ tests/testit/test-extract.R | 69 +++++++++++++++++++++++++++++ 5 files changed, 201 insertions(+) create mode 100644 R/extract.R create mode 100644 man/html_to_r.Rd create mode 100644 tests/testit/test-extract.R diff --git a/DESCRIPTION b/DESCRIPTION index 381636e6ed..d5b8aae1de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,6 +86,7 @@ Authors@R: c( person("Sebastian", "Meyer", role = "ctb"), person("Sietse", "Brouwer", role = "ctb"), person(c("Simon", "de"), "Bernard", role = "ctb"), + person("Steve", "Condylios", role = c("ctb"), comment = c(ORCID = "0000-0003-0599-844X")), person("Sylvain", "Rousseau", role = "ctb"), person("Taiyun", "Wei", role = "ctb"), person("Thibaut", "Assus", role = "ctb"), @@ -153,6 +154,7 @@ Collate: 'defaults.R' 'concordance.R' 'engine.R' + 'extract.R' 'highlight.R' 'themes.R' 'header.R' diff --git a/NAMESPACE b/NAMESPACE index 2838edef5a..be59c55419 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ export(hook_pngquant) export(hook_purl) export(hook_r2swf) export(hook_scianimator) +export(html_to_r) export(image_uri) export(imgur_upload) export(include_app) @@ -138,6 +139,8 @@ import(graphics) import(methods) import(stats) import(utils) +importFrom(stringr,str_match_all) +importFrom(stringr,str_replace_all) importFrom(xfun,attr) importFrom(xfun,file_ext) importFrom(xfun,file_string) diff --git a/R/extract.R b/R/extract.R new file mode 100644 index 0000000000..10594a92ef --- /dev/null +++ b/R/extract.R @@ -0,0 +1,88 @@ +#' Extract R code from a knitted R Markdown HTML file. +#' +#' @name html_to_r +#' @author Steve Condylios +#' @usage html_to_r(rmarkdown_html, padding, inc_out) +#' @param rmarkdown_html The HTML output of a knitted Rmd file +#' @param padding Specifies what goes between the last character of a code block and the +#' first character of the next code block. Defaults to two newlines (which gives +#' the visual appearance of one newline between code blocks). +#' @param inc_out \code{TRUE}/\code{FALSE} as to whether to include output of code chucks. Defaults +#' to \code{TRUE}. +#' +#' @return A character vector of length 1 containing the R code extracted from the +#' R Markdown HTML file. +#' +#' @export +#' +#' @examples +#' cat((rmarkdown_html <- "Intro +#' ```R\n1 * 1\n```\nmore text\n```r\n2 * 2\n``` +#' some more text\n```\n3 * 3\n```\nThe End. +#' \n")) +#' +#' html_to_r(rmarkdown_html) +#' +#' cat(html_to_r(rmarkdown_html)) +#' +#' @importFrom stringr str_match_all str_replace_all + + + +html_to_r <- function(rmarkdown_html, padding, inc_out) { + + if(missing(padding)) { padding = "\n\n" } + if(missing(inc_out)) { inc_out = TRUE } + + extract_body <- function(rmarkdown_html) { + # light-weight replacement for html_nodes() + as.character(xml2::xml_find_all(xml2::read_html(rmarkdown_html), ".//body")) + } + + body <- extract_body(rmarkdown_html) + + remove_body_tags <- function(body) { + # light-weight replacement for html_text() + inner <- substr(body, 7, nchar(body)) + substr(inner, 1, nchar(inner)-6) + } + + inner <- remove_body_tags(body) + + # only include |``` if output is to be included + chunks <- if(inc_out == TRUE) { + str_match_all(inner, "(```R|```r|```)((.|\\s)*?)```") + }else{ + str_match_all(inner, "(```R|```r)((.|\\s)*?)```") + } + + + clean_chunks <- function(chunks) { + + # remove first char + neat_chunks <- substr(chunks[[1]][,3] , 2, nchar(chunks[[1]][,3])) + + # last first char + substr(neat_chunks , 1, nchar(neat_chunks) - 1) + } + + neat_chunks <- clean_chunks(chunks) + + + replace_character_entities <- function(char_entity){ + xml2::xml_text(xml2::read_html(paste0("", char_entity, ""))) + } + + neat_chunks <- unname(sapply(neat_chunks, replace_character_entities)) + + paste0(neat_chunks, collapse=padding) +} + + + + + + + + + diff --git a/man/html_to_r.Rd b/man/html_to_r.Rd new file mode 100644 index 0000000000..27db5302f6 --- /dev/null +++ b/man/html_to_r.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{html_to_r} +\alias{html_to_r} +\title{Extract R code from a knitted R Markdown HTML file.} +\usage{ +html_to_r(rmarkdown_html, padding, inc_out) +} +\arguments{ +\item{rmarkdown_html}{The HTML output of a knitted Rmd file} + +\item{padding}{Specifies what goes between the last character of a code block and the +first character of the next code block. Defaults to two newlines (which gives +the visual appearance of one newline between code blocks).} + +\item{inc_out}{\code{TRUE}/\code{FALSE} as to whether to include output of code chucks. Defaults +to \code{TRUE}.} +} +\value{ +A character vector of length 1 containing the R code extracted from the + R Markdown HTML file. +} +\description{ +Extract R code from a knitted R Markdown HTML file. +} +\examples{ +cat((rmarkdown_html <- "Intro +```R\n1 * 1\n```\nmore text\n```r\n2 * 2\n``` +some more text\n```\n3 * 3\n```\nThe End. +\n")) + +html_to_r(rmarkdown_html) + +cat(html_to_r(rmarkdown_html)) + +} +\author{ +Steve Condylios +} diff --git a/tests/testit/test-extract.R b/tests/testit/test-extract.R new file mode 100644 index 0000000000..592d304269 --- /dev/null +++ b/tests/testit/test-extract.R @@ -0,0 +1,69 @@ +library(testit) + +assert( + 'html_to_r() extracts R code within ```R decorator', + identical( + { + rmarkdown_html <- "Intro + ```R\n1 * 1\n```\nmore text\n\n" + + html_to_r(rmarkdown_html) + + }, + + "1 * 1") +) + + + +assert( + 'html_to_r() extracts R code within ```r decorator', + identical( + { + rmarkdown_html <- "more text\n```r\n2 * 2\n``` + some more text\n\n" + + html_to_r(rmarkdown_html) + + }, + + "2 * 2") +) + + + +assert( + 'html_to_r() extracts R code within ``` decorator', + identical( + { + rmarkdown_html <- "some more text\n```\n3 * 3\n```\nThe End. + \n" + + html_to_r(rmarkdown_html) + + }, + + "3 * 3") +) + + + +assert( + 'html_to_r() extracts R code from R Markdown HTML file containing a varity of decorators', + identical( + { + rmarkdown_html <- "Intro + ```R\n1 * 1\n```\nmore text\n```r\n2 * 2\n``` + some more text\n```\n3 * 3\n```\nThe End. + \n" + + html_to_r(rmarkdown_html) + + }, + + "1 * 1\n\n2 * 2\n\n3 * 3") +) + + + +