Skip to content

Commit

Permalink
Merge pull request #311 from OHDSI/main
Browse files Browse the repository at this point in the history
merge main
  • Loading branch information
edward-burn authored Sep 9, 2024
2 parents 9914ba9 + 1a3eb7d commit 5a08988
Show file tree
Hide file tree
Showing 88 changed files with 1,209 additions and 927 deletions.
12 changes: 5 additions & 7 deletions R/collapseCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
#' `collapseCohorts()` concatenates cohort records, allowing for some number
#' of days between one finishing and the next starting.
#'
#' @param cohort A cohort table
#' @param cohortId IDs of the cohorts to modify. If NULL, all cohorts will be
#' used; otherwise, only the specified cohorts will be modified, and the
#' rest will remain unchanged.
#' @param gap Number of days to use when merging cohort entries.
#' @param name Name of the cohort table.
#' @inheritParams cohortDoc
#' @inheritParams cohortIdModifyDoc
#' @inheritParams gapDoc
#' @inheritParams nameDoc
#'
#' @export
#'
Expand All @@ -24,7 +22,7 @@ collapseCohorts <- function(cohort,
validateCDM(cdm)
cohort <- validateCohortTable(cohort, dropExtraColumns = TRUE)
ids <- settings(cohort)$cohort_definition_id
cohortId <- validateCohortId(cohortId, ids)
cohortId <- validateCohortId(cohortId, settings(cohort))
if (gap != Inf) {
gap <- validateGap(gap)
}
Expand Down
131 changes: 78 additions & 53 deletions R/conceptCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@
#' * If a record ends outside of an observation period it will be
#' trimmed so as to end at the preceding observation period end date.
#'
#' @param cdm A cdm reference.
#' @param conceptSet A conceptSet, which can either be a codelist
#' or a conceptSetExpression.
#' @param name Name of the cohort in the cdm object.
#' @inheritParams cdmDoc
#' @inheritParams conceptSetDoc
#' @inheritParams nameDoc
#' @param exit How the cohort end date is defined. Can be either
#' "event_end_date" or "event_start_date".
#' @param useSourceFields If TRUE, the source concept_id fields will also be
Expand Down Expand Up @@ -113,9 +112,9 @@ conceptCohort <- function(cdm,
cdm[[tableCohortCodelist]] <- NULL

if (cdm[[name]] |>
utils::head(1) |>
dplyr::tally() |>
dplyr::pull("n") == 0) {
utils::head(1) |>
dplyr::tally() |>
dplyr::pull("n") == 0) {
cli::cli_inform(c("i" = "No cohort entries found, returning empty cohort table."))
cdm[[name]] <- cdm[[name]] |>
dplyr::select(
Expand Down Expand Up @@ -180,69 +179,65 @@ unerafiedConceptCohort <- function(cdm,
extraCols,
exit,
useSourceFields) {

domains <- sort(cdm[[tableCohortCodelist]] |>
dplyr::select("domain_id") |>
dplyr::distinct() |>
dplyr::pull())
dplyr::select("domain_id") |>
dplyr::distinct() |>
dplyr::pull())

tableRef <- domainsData |>
dplyr::filter(.data$domain_id %in% .env$domains)

if(isFALSE(useSourceFields)){
fields <- "concept"
} else {
fields <- c("concept", "source")
}

cohorts <- list()
workingTblNames <- paste0(
omopgenerics::uniqueTableName(),
"_",
seq_along(tableRef$domain_id)
)
for (k in seq_along(tableRef$domain_id)) {
for (j in seq_along(fields)){
domain <- tableRef$domain_id[k]

table <- tableRef$table[k]
if(fields[j] == "concept"){
concept <- tableRef$concept[k]
} else {
concept <- tableRef$source[k]
}
start <- tableRef$start[k]
if (exit == "event_start_date") {
end <- start
} else {
end <- tableRef$end[k]
}
domain <- tableRef$domain_id[k]
n <- cdm[[tableCohortCodelist]] |>
dplyr::filter(.data$domain_id %in% .env$domain) |>
dplyr::tally() |>
dplyr::pull()

if (table %in% names(cdm)) {
nameK <- paste(workingTblNames[k])
start <- tableRef$start[k]
if (exit == "event_start_date") {
end <- start
} else {
end <- tableRef$end[k]
}
cli::cli_inform(
c("i" = "Subsetting table {.strong {table}} using {n} concept{?s} with domain: {.strong {domain}}.")
)
tempCohort <- cdm[[table]] |>
dplyr::select(
"subject_id" = "person_id",
"concept_id" = dplyr::all_of(.env$concept),
"cohort_start_date" = dplyr::all_of(.env$start),
"cohort_end_date" = dplyr::all_of(.env$end),
dplyr::all_of(extraCols)
) |>
dplyr::inner_join(
cdm[[tableCohortCodelist]] |>
dplyr::filter(.data$domain_id %in% .env$domain) |>
dplyr::select("concept_id", "cohort_definition_id"),
by = "concept_id"
) |>
dplyr::compute(temporary = FALSE, name = paste(workingTblNames[k], "_", j))
cohorts[[domain]] <- tempCohort

## Get standard
concept <- tableRef$concept[k]
tempCohort <- getDomainCohort(
cdm, table, concept, start, end, extraCols, tableCohortCodelist,
domain, nameK
)
## Get source
if (isTRUE(useSourceFields)) {
concept <- tableRef$source[k]
tempCohort <- tempCohort |>
dplyr::union_all(
getDomainCohort(
cdm, table, concept, start, end, extraCols,
tableCohortCodelist, domain, nameK, TRUE
)
) |>
dplyr::compute(name = nameK, temporary = FALSE)
}

if (tempCohort |>
utils::head(1) |>
dplyr::tally() |>
dplyr::pull("n") > 0) {
utils::head(1) |>
dplyr::tally() |>
dplyr::pull("n") > 0) {
cohorts[[k]] <- tempCohort
}
} else {
Expand All @@ -251,12 +246,12 @@ unerafiedConceptCohort <- function(cdm,
)
}
}
}

cohorts <- cohorts %>%
purrr::discard(is.null)

if (length(cohorts) == 0) {
omopgenerics::dropTable(cdm, name = dplyr::starts_with(workingTblNames))
cdm <- omopgenerics::emptyCohortTable(cdm = cdm, name = name)
return(cdm[[name]])
}
Expand Down Expand Up @@ -338,9 +333,9 @@ conceptSetToCohortCodelist <- function(conceptSet) {
dplyr::inner_join(cohortSet, by = "cohort_name") |>
dplyr::mutate("type" = "index event", "value" = as.integer(.data$value)) |>
dplyr::select("cohort_definition_id",
"codelist_name" = "cohort_name",
"concept_id" = "value",
"type"
"codelist_name" = "cohort_name",
"concept_id" = "value",
"type"
)
}

Expand All @@ -355,7 +350,7 @@ uploadCohortCodelistToCdm <- function(cdm, cohortCodelist, tableCohortCodelist)

cdm[[tableCohortCodelist]] <- cdm[[tableCohortCodelist]] |>
dplyr::left_join(cdm[["concept"]] |>
dplyr::select("concept_id", "domain_id"), by = "concept_id") |>
dplyr::select("concept_id", "domain_id"), by = "concept_id") |>
dplyr::mutate(
"concept_id" = as.integer(.data$concept_id),
"domain_id" = tolower(.data$domain_id)
Expand Down Expand Up @@ -403,3 +398,33 @@ addIndex <- function(cdm, name, cols) {
suppressMessages(DBI::dbExecute(con, query))
}
}

getDomainCohort <- function(cdm,
table,
concept,
start,
end,
extraCols,
tableCohortCodelist,
domain,
name,
source = FALSE) {
if (source) {
name = paste0(name, "_source")
}
tempCohort <- cdm[[table]] |>
dplyr::select(
"subject_id" = "person_id",
"concept_id" = dplyr::all_of(.env$concept),
"cohort_start_date" = dplyr::all_of(.env$start),
"cohort_end_date" = dplyr::all_of(.env$end),
dplyr::all_of(extraCols)
) |>
dplyr::inner_join(
cdm[[tableCohortCodelist]] |>
dplyr::filter(.data$domain_id %in% .env$domain) |>
dplyr::select("concept_id", "cohort_definition_id"),
by = "concept_id"
) |>
dplyr::compute(temporary = FALSE, name = name)
}
9 changes: 3 additions & 6 deletions R/demographicsCohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,9 @@
#' enter the cohort. When they stop satisfying any of the criteria their
#' cohort entry ends.
#'
#' @param cdm A cdm reference.
#' @param name Name of the new cohort table
#' @param ageRange A list of vectors specifying minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female".
#' @param minPriorObservation A minimum number of prior observation days in
#' the database.
#' @inheritParams cdmDoc
#' @inheritParams nameDoc
#' @inheritParams requireDemographicsDoc
#'
#' @return A cohort table
#'
Expand Down
138 changes: 138 additions & 0 deletions R/documentationHelper.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
# Argument descriptions repeated > 1:

#' Helper for consistent documentation of `cohort`.
#'
#' @param cohort A cohort table in a cdm reference.
#'
#' @name cohortDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `cohortId`.
#'
#' @param cohortId Vector identifying which cohorts to include
#' (cohort_definition_id or cohort_name). Cohorts not included will be
#' removed from the cohort set.
#'
#' @name cohortIdSubsetDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `cohortId`.
#'
#' @param cohortId Vector identifying which cohorts to modify
#' (cohort_definition_id or cohort_name). If NULL, all cohorts will be
#' used; otherwise, only the specified cohorts will be modified, and the
#' rest will remain unchanged.
#'
#' @name cohortIdModifyDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `name`.
#'
#' @param name Name of the new cohort table created in the cdm object.
#'
#' @name nameDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `conceptSet`.
#'
#' @param conceptSet A conceptSet, which can either be a codelist
#' or a conceptSetExpression.
#'
#' @name conceptSetDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `cdm`.
#'
#' @param cdm A cdm reference.
#'
#' @name cdmDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `gap`.
#'
#' @param gap Number of days between two subsequent cohort entries to be merged
#' in a single cohort record.
#'
#' @name gapDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `dateColumns` and `returnReason`.
#'
#' @param dateColumns Character vector indicating date columns in the cohort
#' table to consider.
#' @param returnReason If TRUE it will return a column indicating which of the
#' `dateColumns` was used.
#'
#' @name columnDateDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of `window`.
#'
#' @param window A list of vectors specifying minimum and maximum days from
#' `indexDate` to consider events over.
#'
#' @name windowDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of arguments in `requireIntersect`
#' functions.
#'
#' @param indexDate Name of the column in the cohort that contains the date to
#' compute the intersection.
#' @param intersections A range indicating number of intersections for
#' criteria to be fulfilled. If a single number is passed, the number of
#' intersections must match this.
#' @param targetStartDate Start date of reference in cohort table.
#' @param targetEndDate End date of reference in cohort table. If NULL,
#' incidence of target event in the window will be considered as intersection,
#' otherwise prevalence of that event will be used as intersection (overlap
#' between cohort and event).
#' @param censorDate Whether to censor overlap events at a specific date or a
#' column date of the cohort.
#' @param targetCohortTable Name of the cohort that we want to check for
#' intersect.
#' @param targetCohortId Vector of cohort definition ids to include.
#' @param tableName Name of the table to check for intersect.
#'
#' @name requireIntersectDoc
#' @keywords internal
NULL


#' Helper for consistent documentation of arguments in `requireDemographics`.
#'
#' @param ageRange A list of vectors specifying minimum and maximum age.
#' @param sex Can be "Both", "Male" or "Female".
#' @param minPriorObservation A minimum number of continuous prior observation
#' days in the database.
#' @param minFutureObservation A minimum number of continuous future observation
#' days in the database.
#' @param indexDate Variable in cohort that contains the date to compute the
#' demographics characteristics on which to restrict on.
#' @param requirementInteractions If TRUE, cohorts will be created for
#' all combinations of ageGroup, sex, and daysPriorObservation. If FALSE, only
#' the first value specified for the other factors will be used. Consequently,
#' order of values matters when requirementInteractions is FALSE.
#'
#' @name requireDemographicsDoc
#' @keywords internal
NULL

Loading

0 comments on commit 5a08988

Please sign in to comment.