Skip to content

Commit

Permalink
New str_extract() function to extract substrings by regex pattern.
Browse files Browse the repository at this point in the history
  • Loading branch information
agdamsbo committed Sep 5, 2023
1 parent 193844c commit bd647a9
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 0 deletions.
37 changes: 37 additions & 0 deletions R/str_extract.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Extract string based on regex pattern
#'
#' Use base::strsplit to
#' @param d vector of character strings
#' @param pattern regex pattern to match
#'
#' @return vector of character strings
#' @export
#'
#' @examples
#' ls <- do.call(c,lapply(sample(4:8,20,TRUE),function(i){
#' paste(sample(letters,i,TRUE),collapse = "")}))
#' ds <- do.call(c,lapply(1:20,function(i){
#' paste(sample(ls,1),i,sample(ls,1),"23",sep = "_")}))
#' str_extract(ds,"([0-9]+)")
str_extract <- function(d,pattern){
if (!is.vector(d)) stop("Please provide a vector")

## Drawing on the solution in REDCapCAST::strsplitx to split around pattern
nl <- strsplit(gsub("~~", "~", # Removes double ~
gsub("^~", "", # Removes leading ~
gsub(
# Splits and inserts ~ at all delimiters
paste0("(", pattern, ")"), "~\\1~", d
))), "~")

## Reusing the pattern, to sub with "" and match on length 0 to index the
## element containing the pattern. Only first occurance included.
indx <- lapply(nl,function(i){
match(0,nchar(sub(pattern,"",i)))
})

## Using lapply to subsset the given index for each element in list
do.call(c,lapply(seq_along(nl), function(i){
nl[[i]][indx[[i]]]
} ))
}
26 changes: 26 additions & 0 deletions man/str_extract.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions tests/testthat/test-str_extract.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# library(testthat)
test_that("str_extract returns correct", {
ls <- do.call(c, lapply(sample(4:8, 20, T), function(i) {
paste(sample(letters, i, T), collapse = "")
}))

ds <- do.call(c, lapply(1:20, function(i) {
paste(sample(ls, 1), i, sample(ls, 1), "23", sep = "_")
}))

expect_equal(nchar(str_extract(ds, "([0-9]+)")),c(rep(1,9),rep(2,11)))

expect_error(str_extract(data.frame(ds), "([0-9]+)"))

})

0 comments on commit bd647a9

Please sign in to comment.