diff --git a/NEWS.md b/NEWS.md index 96baca9..3c0e323 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +CohortMethod 5.4.1 +================== + +Changes: + +1. `createPs()` now checks if filtering of the covariate data is necessary (either because subject have been removed from the study population or because `excludeCovariateIds` or `includeCovariateIds` was specified). If no filtering is required, no extra copy of the covariate data data is created, saving IO time. + CohortMethod 5.4.0 ================== diff --git a/R/PsFunctions.R b/R/PsFunctions.R index 04bb79c..246a8fd 100644 --- a/R/PsFunctions.R +++ b/R/PsFunctions.R @@ -126,29 +126,39 @@ createPs <- function(cohortMethodData, sampled <- FALSE ref <- NULL } else { - covariates <- cohortMethodData$covariates %>% - filter(.data$rowId %in% local(population$rowId)) + rowIds <- cohortMethodData$covariates %>% + distinct(.data$rowId) %>% + pull() + if (all(rowIds %in% population$rowId) && + length(includeCovariateIds) == 0 && + length(excludeCovariateIds) == 0) { + # No filtering necessary, send to tidyCovariateData: + covariateData <- FeatureExtraction::tidyCovariateData(cohortMethodData) + } else { + # Need filtering here before sending it to tidyCovariateData: + covariates <- cohortMethodData$covariates %>% + filter(.data$rowId %in% local(population$rowId)) - if (length(includeCovariateIds) != 0) { - covariates <- covariates %>% - filter(.data$covariateId %in% includeCovariateIds) - } - if (length(excludeCovariateIds) != 0) { - covariates <- covariates %>% - filter(!.data$covariateId %in% excludeCovariateIds) + if (length(includeCovariateIds) != 0) { + covariates <- covariates %>% + filter(.data$covariateId %in% includeCovariateIds) + } + if (length(excludeCovariateIds) != 0) { + covariates <- covariates %>% + filter(!.data$covariateId %in% excludeCovariateIds) + } + filteredCovariateData <- Andromeda::andromeda( + covariates = covariates, + covariateRef = cohortMethodData$covariateRef, + analysisRef = cohortMethodData$analysisRef + ) + metaData <- attr(cohortMethodData, "metaData") + metaData$populationSize <- nrow(population) + attr(filteredCovariateData, "metaData") <- metaData + class(filteredCovariateData) <- "CovariateData" + covariateData <- FeatureExtraction::tidyCovariateData(filteredCovariateData) + close(filteredCovariateData) } - filteredCovariateData <- Andromeda::andromeda( - covariates = covariates, - covariateRef = cohortMethodData$covariateRef, - analysisRef = cohortMethodData$analysisRef - ) - metaData <- attr(cohortMethodData, "metaData") - metaData$populationSize <- nrow(population) - attr(filteredCovariateData, "metaData") <- metaData - class(filteredCovariateData) <- "CovariateData" - - covariateData <- FeatureExtraction::tidyCovariateData(filteredCovariateData) - close(filteredCovariateData) on.exit(close(covariateData)) covariates <- covariateData$covariates attr(population, "metaData")$deletedInfrequentCovariateIds <- attr(covariateData, "metaData")$deletedInfrequentCovariateIds diff --git a/tests/testthat/test-parameterSweep.R b/tests/testthat/test-parameterSweep.R index e5760c4..dd25a1b 100644 --- a/tests/testthat/test-parameterSweep.R +++ b/tests/testthat/test-parameterSweep.R @@ -77,6 +77,11 @@ test_that("Create study population functions", { }) test_that("Propensity score functions", { + # No filtering required: + studyPop <- cohortMethodData$cohorts |> + collect() + ps <- createPs(cohortMethodData, studyPop) + studyPop <- createStudyPopulation(cohortMethodData, outcomeId = 194133, removeSubjectsWithPriorOutcome = TRUE, @@ -85,6 +90,7 @@ test_that("Propensity score functions", { # Cross-validation: ps <- createPs(cohortMethodData, studyPop) + ps <- createPs(cohortMethodData, studyPop, prior = createPrior("laplace", 0.1, exclude = 0)) expect_lt(0.65, computePsAuc(ps)[1])