Skip to content

Commit

Permalink
Improve tidyverse compatibility
Browse files Browse the repository at this point in the history
- swap from `data.frame` to `tibble` format throughout package
- decommission `class() ==` syntax in favor of `inherits()` #22
- make `merge_columns()` a shell for `dplyr::bind_rows()`
  • Loading branch information
mjwestgate committed Jul 4, 2023
1 parent b048cc0 commit 03ba4dd
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 53 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ Description: A critical first step in systematic literature reviews
Depends: R (>= 3.5.0)
Imports:
rlang,
stringdist
stringdist,
tibble
Suggests:
knitr,
rmarkdown,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export(string_soundex)
export(write_bib)
export(write_refs)
export(write_ris)
importFrom(dplyr,bind_rows)
importFrom(rlang,abort)
importFrom(rlang,warn)
importFrom(stringdist,stringdist)
importFrom(tibble,tibble)
2 changes: 1 addition & 1 deletion R/bibliography_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ as.data.frame.bibliography <- function(x, ...){
#' @export
as.bibliography <- function(x, ...){

if(class(x) != "data.frame"){
if(!inherits(x, "data.frame")){
abort("as.bibliography can only be called for objects of class 'data.frame'")
}

Expand Down
17 changes: 17 additions & 0 deletions R/clean_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ clean_df <- function(data){
if(any(colnames(data) == "author")){
data$author <- clean_authors(data$author)
}
data <- remove_factors(data)
return(data)
}

Expand Down Expand Up @@ -50,3 +51,19 @@ clean_colnames <- function(
x <- gsub(" ", "_", x)
return(x)
}

#' Remove factors from an object
#'
#' Internal functions called by `clean_df()`:
#' @description This function converts factors to characters to avoid errors with
#' levels.
#' @param z A data.frame
#' @return Returns the input data.frame with all factors converted to character.
#' @noRd
#' @keywords Internal
remove_factors <- function(z){
z[] <- lapply(z, function(x){
if(is.factor(x)){as.character(x)}else{x}
})
return(z)
}
2 changes: 1 addition & 1 deletion R/deduplication_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ find_duplicates <- function(
if(inherits(data, "data.frame")){
abort("'data' must be a character vector, not a data.frame")
}
if(class(data) != "character"){
if(!inherits(data, "character")){
data <- as.character(data)
}

Expand Down
2 changes: 1 addition & 1 deletion R/format_citation.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ format_citation <- function(
abort("format_citation expects input data to be an object of class data.frame, bibliography, or list")
}

if(class(data)!="data.frame"){
if(!inherits(data, "data.frame")){
data <- as.data.frame(data)
}

Expand Down
61 changes: 13 additions & 48 deletions R/generic_data_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ match_columns <- function(df){

#' Bind two or more data frames with different columns
#'
#' @description Takes two or more data.frames with different column names or
#' different column orders and binds them to a single data.frame.
#' NOTE: Should be possible to replace this with `dplyr::bind_rows()`
#' @description Takes two or more `data.frames` with different column names or
#' different column orders and binds them to a single `data.frame.` This
#' function is maintained for backwards compatibility, but it is synonymous with
#' `dplyr::bind_rows()` and will be depracated in future.
#' @param x Either a data.frame or a list of data.frames.
#' @param y A data.frame, optional if x is a list.
#' @return Returns a single data.frame with all the input data frames merged.
#' @example inst/examples/merge_columns.R
#' @importFrom dplyr bind_rows
#' @importFrom rlang abort
#' @export
merge_columns <- function(
Expand All @@ -48,57 +50,20 @@ merge_columns <- function(
if(missing(x)){
abort("object x is missing with no default")
}

if(!any(c("data.frame", "list") == class(x))){
if(!(inherits(x, "data.frame") | inherits(x, "list"))){
abort("object x must be either a data.frame or a list")
}

if(class(x) == "data.frame"){
if(inherits(x, "data.frame")){
if(missing(y)){
abort("If x is a data.frame, then y must be supplied")
return(x)
# abort("If x is a data.frame, then y must be supplied")
}else{
x <- list(x, y)
}
x <- list(x, y)
}else{ # i.e. for lists
if(!all(unlist(lapply(x, class)) == "data.frame")){
if(!all(unlist(lapply(x, function(a){inherits(a, "data.frame")})))){
abort("x must only contain data.frames")
}
}

x <- lapply(x, remove_factors)

col_names_all <- unique(unlist(lapply(x, colnames)))

result_list <- lapply(x, function(a, cn){
missing_names <- !(cn %in% colnames(a))
if(any(missing_names)){
new_names <- cn[missing_names]
result <- data.frame(
c(a, sapply(new_names, function(b){NA})),
stringsAsFactors = FALSE)
return(result[, cn])
}else{
return(a[, cn])
}
},
cn = col_names_all
)

return(do.call(rbind, result_list))

}

#' Remove factors from an object
#'
#' Internal functions called by merge_columns:
#' @description This function converts factors to characters to avoid errors with
#' levels.
#' @param z A data.frame
#' @return Returns the input data.frame with all factors converted to character.
#' @noRd
#' @keywords Internal
remove_factors <- function(z){
z[] <- lapply(z, function(x){
if(is.factor(x)){as.character(x)}else{x}
})
return(z)
bind_rows(x)
}
3 changes: 2 additions & 1 deletion R/read_refs.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ read_refs <- function(
#' @return Returns a data.frame or list of assembled search results.
#' @importFrom rlang abort
#' @importFrom rlang warn
#' @importFrom tibble tibble
#' @noRd
#' @keywords Internal
read_ref <- function(
Expand Down Expand Up @@ -141,7 +142,7 @@ read_ref <- function(
}else{
if(return_df){df <- as.data.frame.bibliography(df)}
}
if(inherits(df, "data.frame")){df <- clean_df(df)}
if(inherits(df, "data.frame")){df <- tibble(clean_df(df))}
if(verbose){cat("done\n")}
return(df)

Expand Down
14 changes: 14 additions & 0 deletions tests/testthat/test-clean.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,17 @@ test_that("clean_colnames() works", {
expect_false(any(duplicated(cleaned)))
expect_false(any(grepl("^(X|Y|Z)\\.+", cleaned)))
})

test_that("clean_df() cleans authors, colnames and factors", {
test_df <- data.frame(
authors = c("Haddaway, N.R., A. Feirman AND M.J. Grainger", "Some authors"),
"..misc." = c("text", "text2"),
JOURNAL = as.factor(c("A journal", "Another journal"))
)
result <- clean_df(test_df)
expect_false(any(grepl("AND", result$authors)))
test_cols <- colnames(result)
expect_false(any(grepl("[[:punct::]]", test_cols)))
expect_equal(tolower(test_cols), test_cols)
expect_false(any(unlist(lapply(result, is.factor))))
})
2 changes: 2 additions & 0 deletions tests/testthat/test-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ test_that("read_ref() works for simple imports", {
df <- read_refs("testdata/eviatlas.txt",
return_df = TRUE,
verbose = FALSE)
expect_true(inherits(df, c("tbl", "data.frame")))
expect_equal(nrow(df), 1)
expect_true(any(grep("EviAtlas", df[1, ])))
})
Expand All @@ -12,6 +13,7 @@ test_that("read_refs() works for simple imports", {
df <- read_refs(testfiles,
return_df = TRUE,
verbose = FALSE)
expect_true(inherits(df, c("tbl", "data.frame")))
expect_equal(nrow(df), 4)
expect_true(any(grep("EviAtlas", df[1, ])))
expect_true(any(grep("litsearchr", df[2, ])))
Expand Down

0 comments on commit 03ba4dd

Please sign in to comment.