Skip to content

Commit

Permalink
#16 more work on cff_reader - add tests and checks for required eleme…
Browse files Browse the repository at this point in the history
…nts of references
  • Loading branch information
sckott committed Mar 6, 2020
1 parent ea5326a commit 53b255b
Show file tree
Hide file tree
Showing 9 changed files with 314 additions and 18 deletions.
54 changes: 54 additions & 0 deletions R/a_types.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#' cff references types
#' @references http://bit.ly/2PRK1Vt
#' @details cff citation format types for references
#' @name cff_reference_types
#' @docType data
cff_reference_types <- c(
"art",
"article",
"audiovisual",
"bill",
"blog",
"book",
"catalogue",
"conference",
"conference-paper",
"data",
"database",
"dictionary",
"edited-work",
"encyclopedia",
"film-broadcast",
"generic",
"government-document",
"grant",
"hearing",
"historical-work",
"legal-case",
"legal-rule",
"magazine-article",
"manual",
"map",
"multimedia",
"music",
"newspaper-article",
"pamphlet",
"patent",
"personal-communication",
"proceedings",
"report",
"serial",
"slides",
"software",
"software-code",
"software-container",
"software-executable",
"software-virtual-machine",
"sound-recording",
"standard",
"statute",
"thesis",
"unpublished",
"video",
"website"
)
110 changes: 94 additions & 16 deletions R/cff_reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,36 @@
#' @family cff
#' @references CFF format:
#' https://github.com/citation-file-format/citation-file-format/blob/master/README.md
#' @details CFF only supports one citation, so `many` will always be
#' `FALSE`. You can though have many references in your CFF file
#' associated with the citation.
#'
#' `references` is an optional component in cff files. If included, we check
#' the following:
#' - each reference must have the 3 required fields: type, authors, title
#' - type must be in the allowed set, see [cff_reference_types]
#' - the elements within authors must each be an entity or person object
#' https://github.com/citation-file-format/citation-file-format/blob/master/README.md#entity-objects
#' https://github.com/citation-file-format/citation-file-format/blob/master/README.md#person-objects
#' - title must be a string
#' @examples
#' (z <- system.file('extdata/citation.cff', package = "handlr"))
#' cff_reader(x = z)
#' res <- cff_reader(x = z)
#' res
#' res$cff_version
#' res$b_version
#' res$message
#' res$id
#' res$doi
#' res$title
#' res$author
#' res$references
#'
#' # no references
#' (z <- system.file('extdata/citation-norefs.cff', package = "handlr"))
#' out <- cff_reader(x = z)
#' out
#' out$references
cff_reader <- function(x) {
assert(x, "character")
txt <- if (is_file(x)) yaml::yaml.load_file(x) else yaml::yaml.load(x)
Expand All @@ -26,40 +53,91 @@ cff_read_one <- function(x) {
type = "Person",
name = pcsp(pcsp(z$`given-names`), pcsp(z$`family-names`)),
givenName = pcsp(z$`given-names`),
familyName = pcsp(z$`family-names`)
familyName = pcsp(z$`family-names`),
orcid = pcsp(z$orcid)
)
})
state <- if (!is.null(doi)) "findable" else "not_found"

type <- "SoftwareSourceCode"
list(
"cff_version" = x$`cff-version`,
"message" = x$message,
# "key" = attr(x, "key"),
"id" = normalize_doi(doi),
# "type" = type,
# "bibtex_type" = type,
# "citeproc_type" = CFF_TO_CP_TRANSLATIONS[[type]] %||% "misc",
# "ris_type" = CFF_TO_RIS_TRANSLATIONS[[type]] %||% "GEN",
# "resource_type_general" = SO_TO_DC_TRANSLATIONS[[type]],
"type" = type,
# "additional_type" = CFF_TO_CR_TRANSLATIONS[[type]] %||% type,
"citeproc_type" = SO_TO_CP_TRANSLATIONS[[type]] %||% "article-journal",
"bibtex_type" = SO_TO_BIB_TRANSLATIONS[[type]] %||% "misc",
"ris_type" = SO_TO_RIS_TRANSLATIONS[[type]] %||% "GEN",
"resource_type_general" = SO_TO_DC_TRANSLATIONS[[type]] %||% "Other",
"identifier" = doi,
"doi" = doi,
"b_url" = x$url %||% NULL,
"title" = x$title %||% NULL,
"author" = author,
"publisher" = x$publisher %||% NULL,
"is_part_of" = NULL,
"date_published" = x$`date-released` %||% NULL,
"b_version" = x$version %||% NULL,
"volume" = x$volume %||% NULL,
"first_page" = NULL,
"last_page" = NULL,
# "description" = list(text = x$abstract %||% NULL && sanitize(x$abstract)),
"description" = list(text = x$abstract %||% NULL),
"license" = list(id = x$copyright %||% NULL),
"state" = state
"license" = list(id = x$license %||% NULL),
"state" = state,
"references" = process_refs(x$references)
# "description" = list(text = x$abstract %||% NULL && sanitize(x$abstract)),
# "first_page" = NULL,
# "last_page" = NULL,
# "publisher" = x$publisher %||% NULL,
# "is_part_of" = NULL,
# "volume" = x$volume %||% NULL,
)
}

process_refs <- function(w) {
if (is.null(w)) return(NULL)

# check that required fields are given
cff_required_nms <- c('type', 'authors', 'title')
cff_required_nms_c <- paste0(cff_required_nms, collapse = ", ")
for (i in seq_along(w)) {
mtch <- all(names(w[[i]]) %in% cff_required_nms)
if (!mtch) {
stop("reference ", i, " malformed; must have required fields: ",
cff_required_nms_c)
}
}

# check that title field is a string
for (i in w) if (!is.character(i$title)) stop("'title' must be a string")

# check that type values are within allowed set
types <- vapply(w, "[[", "", "type")
mtch_type <- types %in% cff_reference_types
if (!all(mtch_type)) {
stop("these reference types not in allowed set: ",
paste0(types[!mtch_type], collapse = ", "),
" (see ?cff_reader)", call. = FALSE)
}

# check that authors is a list of type entity's or person's
auths <- unlist(lapply(w, "[[", "authors"), FALSE)
for (i in auths) {
if (!is_cff_entity(i) && !is_cff_person(i))
stop("each element in 'authors' must be of type entity or person\n",
" see ?cff_reader Details", call. = FALSE)
}

return(w)
}

# check that ALL list elements are named
is_named <- function(x) all(nzchar(names(x)))
is_cff_entity <- function(x) {
is.list(x) && is_named(x) && "name" %in% names(x)
}
is_cff_person <- function(x) {
is.list(x) &&
is_named(x) &&
all(c("family-names", "given-names") %in% names(x))
}

# CFF_TO_CP_TRANSLATIONS <- list(
# article = "article-journal",
# phdthesis = "thesis"
Expand Down
16 changes: 15 additions & 1 deletion R/client.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
handlr_readers <- c('citeproc', 'ris', 'bibtex', 'codemeta')
handlr_readers <- c('citeproc', 'ris', 'bibtex', 'codemeta', 'cff')
handlr_writers <- c('citeproc', 'ris', 'bibtex', 'schema_org',
'rdfxml', 'codemeta')

Expand Down Expand Up @@ -102,6 +102,15 @@ handlr_writers <- c('citeproc', 'ris', 'bibtex', 'schema_org',
#' x$read("codemeta")
#' x$parsed
#' x$write("codemeta")
#'
#' # cff
#' (z <- system.file('extdata/citation.cff', package = "handlr"))
#' (x <- HandlrClient$new(x = z))
#' x$path
#' x$format_guessed
#' x$read("cff")
#' x$parsed
#' x$write("codemeta")
#'
#' # > 1
#' z <- system.file('extdata/citeproc-many.json', package = "handlr")
Expand Down Expand Up @@ -201,6 +210,7 @@ HandlrClient <- R6::R6Class(
ris = ris_reader(self$path %||% self$string, ...),
bibtex = bibtex_reader(self$path %||% self$string, ...),
codemeta = codemeta_reader(self$path %||% self$string, ...),
cff = cff_reader(self$path %||% self$string, ...),
stop("format must be one of ",
paste(handlr_readers, collapse = ", "))
)
Expand Down Expand Up @@ -284,6 +294,10 @@ HandlrClient <- R6::R6Class(
# && self$ext == "bib"
) {
return("bibtex")
} else {
# decide between ris and cff
fmt <- if (!is.null(cff_reader(x)$cff_version)) "cff" else "ris"
return(fmt)
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions inst/extdata/citation-norefs.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cff-version: 1.1.0
message: If you use this software, please cite it as below.
authors:
- family-names: Druskat
given-names: Stephan
orcid: https://orcid.org/0000-0003-4925-7248
title: "My Research Software"
version: 2.0.4
doi: 10.5281/zenodo.1234
date-released: 2017-12-18
13 changes: 13 additions & 0 deletions inst/extdata/citation.cff
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,16 @@ title: "My Research Software"
version: 2.0.4
doi: 10.5281/zenodo.1234
date-released: 2017-12-18
references:
- type: book
authors:
- family-names: Doe
given-names: Jane
- name: "Foo Bar Working Group"
website: https://foo-bar.com
title: The science of citation
- type: software
authors:
- family-names: Doe
given-names: John
title: Software Citation Tool
9 changes: 9 additions & 0 deletions man/HandlrClient.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 32 additions & 1 deletion man/cff_reader.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/cff_reference_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 53b255b

Please sign in to comment.