-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New str_extract() function to extract substrings by regex pattern.
- Loading branch information
Showing
3 changed files
with
78 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#' Extract string based on regex pattern | ||
#' | ||
#' Use base::strsplit to | ||
#' @param d vector of character strings | ||
#' @param pattern regex pattern to match | ||
#' | ||
#' @return vector of character strings | ||
#' @export | ||
#' | ||
#' @examples | ||
#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){ | ||
#' paste(sample(letters,i,TRUE),collapse = "")})) | ||
#' ds <- do.call(c,lapply(1:20,function(i){ | ||
#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")})) | ||
#' str_extract(ds,"([0-9]+)") | ||
str_extract <- function(d,pattern){ | ||
if (!is.vector(d)) stop("Please provide a vector") | ||
|
||
## Drawing on the solution in REDCapCAST::strsplitx to split around pattern | ||
nl <- strsplit(gsub("~~", "~", # Removes double ~ | ||
gsub("^~", "", # Removes leading ~ | ||
gsub( | ||
# Splits and inserts ~ at all delimiters | ||
paste0("(", pattern, ")"), "~\\1~", d | ||
))), "~") | ||
|
||
## Reusing the pattern, to sub with "" and match on length 0 to index the | ||
## element containing the pattern. Only first occurance included. | ||
indx <- lapply(nl,function(i){ | ||
match(0,nchar(sub(pattern,"",i))) | ||
}) | ||
|
||
## Using lapply to subsset the given index for each element in list | ||
do.call(c,lapply(seq_along(nl), function(i){ | ||
nl[[i]][indx[[i]]] | ||
} )) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# library(testthat) | ||
test_that("str_extract returns correct", { | ||
ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) { | ||
paste(sample(letters, i, T), collapse = "") | ||
})) | ||
|
||
ds <- do.call(c, lapply(1:20, function(i) { | ||
paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_") | ||
})) | ||
|
||
expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11))) | ||
|
||
expect_error(str_extract(data.frame(ds), "([0-9]+)")) | ||
|
||
}) |