## Installs and loads papisr
devtools::install_github("stasvlasov/papisr")
library("papisr")
Suppose you have a papis library located in testdata/papis
that includes 4 simple bibliographic records (see below for the content of files). This example papis library is provided with the package and accessible with system.file()
Now, for example, if you want to tabulate year
and url
fields and number of tags
for each record that is not tagged as ‘classics’ you can do it with the following simple script:
system.file("testdata", "papis", package = "papisr") |>
papisr::collect_papis_records(!("classics" %in% info$tags)) |>
papisr::tabulate_papis_records(`Year` = info$year
, `URL` = info$url
, `No. of tags` = length(info$tags))
The script returns the following data.frame:
Year | URL | No. of tags |
---|---|---|
2022 | example.com | 2 |
1985 | uvt.nl | 2 |
2222 | 1 |
Here 3 out of 4 records were tabulated because the one records with tag
“classics” was filtered out.
testdata/papis/a/info.yml
tags:
- data
- research
url: example.com
year: 2022
testdata/papis/b/info.yml
tags:
- research
- phd
url: uvt.nl
year: 1985
testdata/papis/c/INFO.YML
tags: data
year: 2222
testdata/papis/d/info.yaml
tags: classics
year: 2000
(save-excursion
(let ((calls '(
"reset-working-directory"
"readme-header"
"tangle-readme"
"tangle-buffer"
"description-and-license"
"roxygenize"
"setup-tinytest"
))
;; turn off babel prompts
org-confirm-babel-evaluate)
(mapcar
(lambda (name)
(save-excursion
(org-babel-goto-named-src-block name)
(or (org-babel-execute-src-block-maybe)
(org-babel-lob-execute-maybe))))
calls)))
## Remove tangled and generated files
## --------------------------------------------------------------------------------
dirs_to_remove <-c(
"man"
, "inst"
, "tests"
, "data"
, "R"
)
files_to_remove <- c(
"DESCRIPTION"
, "NAMESPACE"
, "LICENSE.md"
, "LICENSE"
, "README.md"
, ".gitignore"
, ".Rbuildignore"
)
dirs_to_remove |>
lapply(list.files
, recursive = TRUE
, full.names = TRUE
, include.dirs = TRUE) |>
unlist() |>
rev() |>
c(dirs_to_remove, files_to_remove) |>
sapply(\(file) if(file.exists(file)) file.remove(file))
(org-babel-tangle)
tinytest::setup_tinytest(".")
https://github.com/r-lib/actions/blob/v1/examples/README.md
usethis::use_github_action("check-release")
usethis::use_github_action("test-coverage")
usethis::use_github_action("pkgdown")
usethis::use_github_actions_badge(name = "R-CMD-check")
*.project.org
.DS_Store
# History files
.Rhistory
.Rapp.history
# Session Data files
.RData
# User-specific files
.Ruserdata
# Example code in package build process
*-Ex.R
# Output files from R CMD build
/*.tar.gz
# Output files from R CMD check
/*.Rcheck/
# RStudio files
.Rproj.user/
# produced vignettes
vignettes/*.html
vignettes/*.pdf
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
# knitr and R markdown default cache directories
*_cache/
/cache/
# Temporary files created by R markdown
*.utf8.md
*.knit.md
# R Environment Variables
.Renviron
^LICENSE\.md$
^\.github$
^_pkgdown\.yml$
^docs$
^pkgdown$
(require 'org-goto)
(save-excursion
(org-goto--local-search-headings "frontpage" nil t)
;; (org-pandoc-export-to-markdown nil 'subtreep)
(org-md-export-to-markdown nil 'subtreep)
)
names | link |
---|---|
tinytest | https://github.com/markvanderloo/tinytest/blob/master/pkg/README.md |
checkmate | https://mllg.github.io/checkmate/ |
packages | current_version | ensure_version | link |
---|---|---|---|
R | nil | 4.0 | |
yaml | 2.3.5 | 2.3.0 | R package for converting objects to and from YAML |
packageVersion(pkg)
## set description
suppressWarnings(file.remove("DESCRIPTION"))
list(Title = "<<package-title>>"
, Description = "<<package-description>>"
, Date = Sys.Date()
, `Authors@R` = 'as.person("Stanislav Vlasov <[email protected]> [aut, cre]")'
, Version = "0.0.0.9000"
, URL = "https://stasvlasov.github.io/papisr/"
, BugReports = "https://github.com/stasvlasov/papisr/issues"
, Suggests = paste(suggest_packages$names, collapse = ", ")
, References = 'Alejandro Gallo, Nicolò Balzarotti, Michael Plews, Alex Fikl, Jackson Woodruff, Matthieu Coudron, Alexander Von Moll, gouderm, Sébastien M. Popoff, Henrik Grimler, JP-Ellis, Katrin Leinweber, Manuel Haussmann, Andrew Ramsey, Andrey Akinshin, CosmosAtlas, dbruggner, hayk, Henning Timm, … prataffel. (2022). papis/papis: VERSION 0.12 (v0.12). Zenodo. https://doi.org/10.5281/zenodo.6573964'
, Depends = paste(
paste0(dependencies$packages, " (>= ", dependencies$ensure_version, ")")
, collapse = ", ")) |>
usethis::use_description()
## Set license
suppressWarnings(file.remove("LICENSE", "LICENSE.md"))
usethis::use_mit_license()
## usethis::use_lgpl_license()
## usethis::use_apache_license()
#' @details
#' <<package-title>>
#'
#' <<package-description>>
#' @md
"_PACKAGE"
## Update name spaces and documentation for functions
roxygen2::roxygenize()
## Unload and uninstall package
## --------------------------------------------------------------------------------
detach(package:papisr, unload = TRUE)
remove.packages("papisr")
## Install package
## --------------------------------------------------------------------------------
devtools::install(".")
##' Collects papis records
##'
##' The collection is done by (1) looking for all subdirectories with info.yml file that defines papis record, (2) filtering those records and (3) returning lists of 'path' (root dir of papis record) and 'info' (content of info.yml) for each record
##'
##' @param dir A character vector of directories to scan recursevely for papis records.
##' @param filter_info Exprocion that allows to filter info.yml files that is evaluated in the environment with two variables bound for each record - 'path' (root dir of papis record) and 'info' (content of info.yml). The expression should return TRUE in order for record to be filtered in. Other returned value will filter the record out. Example: `'data' %in% info$tags` will filter only records that have tag 'data' in their info.yml descriptions
##' @return list of 'path' (root dir of papis record) and 'info' (content of info.yml) for each record
##'
##' @md
##' @export
collect_papis_records <- function(dir, filter_info) {
env <- parent.frame()
sys_call <- sys.call()
papis_info_yml_files <-
list.files(dir
, pattern = "^info\\.y[a]?ml$"
, full.names = TRUE
, recursive = TRUE
, ignore.case = TRUE)
papis_records <-
papis_info_yml_files |>
lapply(\(info_yml_file)
list(path = dirname(info_yml_file)
, info = yaml::read_yaml(info_yml_file)))
## filter info.yml files based on some filter criteria
if(length(sys_call) == 3) {
filter_info <- sys_call[[3]]
papis_records_filter <-
papis_records |>
sapply(\(papis_record) {
filter_info |>
eval(papis_record, env) |>
isTRUE()
})
return(papis_records[papis_records_filter])
} else {
return(papis_records)
}
}
expect_equal(
system.file("testdata", "papis", package = "papisr") |>
collect_papis_records("data" %in% info$tags) |>
lapply(`[[`, "info")
## remove paths as in test environment it is different
, list(list(tags = c("data", "research"), url = "example.com",
year = 2022L), list(tags = "data", year = 2222L)))
##' Tablulate papis records
##'
##' @param papis_records List of papis records as returned by `collect_papis_records()`
##' @param ... Colums specification as named expressions that are evaluated in papis record environment where two variables are bound - `path` and `info` (see `collect_papis_records()` for details. The evaluation environment is enclosed in parent frame (aka `tabulate_papis_records` calling environment)
##'
##' @param .omit_all_na_rows Whether to remove rows with all NAs.
##' @param .bind_dot_n_and_dot_dot Whethet to bind two extra variables to the evaluation environment for columns. Namely `.n` (current row/record's number) and `..` as the entire `papis_records` input.
##' @return Data frame. If some of the column values have length > 1 then the table will be filled with these values.
##'
##' @md
##' @export
tabulate_papis_records <- function(papis_records, ...
, .omit_all_na_rows = TRUE
, .bind_dot_n_and_dot_dot = TRUE) {
fun_call <- sys.call()
col_names <- ...names()
env <- parent.frame()
if(.bind_dot_n_and_dot_dot) {
papis_records <-
mapply(
\(papis_record, n) {
c(papis_record, list(.n = n, .. = papis_records))
}
, papis_records
, seq_along(papis_records)
, SIMPLIFY = FALSE)
}
papis_table <-
lapply(papis_records, \(papis_record) {
lapply(col_names
, \(col_name) {
col_val <-
fun_call[[col_name]] |>
eval(papis_record, enclos = env)
if(length(col_val) == 0) {
return(NA)
} else if(is.list(col_val) || length(col_val) > 1) {
col_val <-
col_val |>
lapply(\(col_val_el) {
if(length(col_val_el) == 0) NA else col_val_el
}) |>
unlist()
return(col_val)
} else {
return(col_val)
}
}) |>
`names<-`(col_names) |>
as.data.frame(stringsAsFactors = FALSE
, check.names = FALSE)
})
papis_table <- do.call(rbind, papis_table)
if(.omit_all_na_rows) {
papis_table <- papis_table[rowSums(is.na(papis_table)) != ncol(papis_table), ]
}
return(papis_table)
}
expect_equal(system.file("testdata", "papis", package = "papisr") |>
collect_papis_records() |>
tabulate_papis_records(year = info$year
, url = info$url
, tag = length(info$tags))
, structure(list(year = c(2022L, 1985L, 2222L, 2000L), url = c("example.com",
"uvt.nl", NA, NA), tag = c(2L, 2L, 1L, 1L)), row.names = c(NA,
-4L), class = "data.frame"))
## test filling the columns
expect_equal(
system.file("testdata", "papis", package = "papisr") |>
collect_papis_records() |>
tabulate_papis_records(year = info$year
, url = info$url
, tag = info$tags)
, structure(list(year = c(2022L, 2022L, 1985L, 1985L, 2222L, 2000L
), url = c("example.com", "example.com", "uvt.nl", "uvt.nl",
NA, NA), tag = c("data", "research", "research", "phd", "data",
"classics")), row.names = c(NA, -6L), class = "data.frame"))
## test eval environment
test_function_length_plus <- function(x) length(x) + 1
expect_equal(
system.file("testdata", "papis", package = "papisr") |>
collect_papis_records() |>
tabulate_papis_records(year = info$year
, url = info$url
, tag = test_function_length_plus(info$tags))
, structure(list(year = c(2022L, 1985L, 2222L, 2000L), url = c("example.com",
"uvt.nl", NA, NA), tag = c(3, 3, 2, 2)), row.names = c(NA, -4L
), class = "data.frame"))
validate_with_json_schema <- function(yml_file = "eee-ppat/info.yml"
, json_shema = "test-schema.json"
## , yq_path = "."
, yq_path = '.data_description') {
if(all(dependencies <- Sys.which(c('yq', 'ajv')) != "")) {
tmp_json_file_name <-
"tmp.json"
## tempfile("papis-info-yml-data", fileext = c(".json"))
file.create(tmp_json_file_name)
tmp_json_file <- file(tmp_json_file_name)
## do things here
yq_cmd <- paste0(
"yq --output-format=json '", yq_path, "' ", shQuote(yml_file)
)
system(yq_cmd, intern = TRUE) |>
writeLines(tmp_json_file)
ajv_cmd <- paste0(
"ajv test",
" -s ", shQuote(json_shema)
, " -d ", shQuote(tmp_json_file_name)
, " --all-errors"
, " --valid"
, " --messages=false"
)
ajv_out <-
system(ajv_cmd
, intern = TRUE
, ignore.stderr = TRUE, invisible = TRUE)
close(tmp_json_file)
## file.remove(tmp_json_file_name)
} else {
stop("The following dependencies are not available on your system: "
, paste(names(dependencies[!dependencies]), collapse = ", "))
}
return(ajv_out)
}