Merge pull request #76 from OHDSI/population_diag

Population diag
OHDSI · Oct 14, 2024 · 540078e · 540078e
2 parents 2d22ee3 + 372c6bf
commit 540078e
Show file tree

Hide file tree

Showing 10 changed files with 130 additions and 64 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,6 +17,7 @@ License: Apache License (>= 2)
 Encoding: UTF-8
 LazyData: true
 Suggests: 
+    CDMConnector,
     duckdb,
     DBI,
     gt,
@@ -29,23 +30,25 @@ Suggests:
     PatientProfiles,
     ggplot2,
     ggpubr, 
-    stringr
+    stringr,
+    shiny,
+    DiagrammeR,
+    sortable
 Config/testthat/edition: 3
 RoxygenNote: 7.3.2
 Imports: 
-    CDMConnector,
     CodelistGenerator (>= 3.1.0),
     CohortCharacteristics,
     CohortConstructor,
     cli,
     dplyr,
     here,
+    IncidencePrevalence (>= 0.8.0),
     omopgenerics,
     magrittr,
     purrr,
     rmarkdown,
     rlang,
-    shiny,
     vctrs,
     visOmopResults,
     glue

diff --git a/R/cohortDiagnostics.R b/R/cohortDiagnostics.R
@@ -46,41 +46,6 @@ cohortDiagnostics <- function(cohort,
                                                    density = TRUE)
     }
 
-
- # cli::cli_bullets(c("*" = "{.strong Creating denominator for incidence and prevalence}"))
- # denominatorTable <- omopgenerics::uniqueTableName()
- # cdm <- IncidencePrevalence::generateDenominatorCohortSet(
- #   cdm = cdm,
- #   name = denominatorTable,
- #   ageGroup = list(c(0,17),
- #                   c(18,64),
- #                   c(65,199)),
- #   sex = c("Male", "Female", "Both"),
- #   daysPriorObservation = c(0, 180)
- # )
- #
- # cli::cli_bullets(c("*" = "{.strong Estimating incidence}"))
- # results[["incidence"]] <- IncidencePrevalence::estimateIncidence(
- #   cdm = cdm,
- #   denominatorTable = denominatorTable,
- #   outcomeTable = cohortName,
- #   interval = "years",
- #   repeatedEvents = c(TRUE, FALSE),
- #   outcomeWashout = c(0, Inf),
- #   completeDatabaseIntervals = c(TRUE, FALSE),
- #   minCellCount = 0)
- #
- # cli::cli_bullets(c("*" = "{.strong Estimating prevalence}"))
- # results[["prevalence"]] <- IncidencePrevalence::estimatePeriodPrevalence(
- #   cdm = cdm,
- #   denominatorTable = denominatorTable,
- #   outcomeTable = cohortName,
- #   interval = "years",
- #   completeDatabaseIntervals = c(TRUE, FALSE),
- #   fullContribution = c(TRUE, FALSE),
- #   minCellCount = 0)
-
-
   results <- results |>
     vctrs::list_drop_empty() |>
     omopgenerics::bind() |>

diff --git a/R/populationDiagnostics.R b/R/populationDiagnostics.R
@@ -13,7 +13,56 @@
 populationDiagnostics <- function(cohort,
                                   populationSample = 1000000) {
 
-cli::cli_inform("populationDiagnostics not yet implemented")
-omopgenerics::emptySummarisedResult()
+  cdm <- omopgenerics::cdmReference(cohort)
+  cohortName <- omopgenerics::tableName(cohort)
+
+  cli::cli_bullets(c("*" = "{.strong Creating denominator for incidence and prevalence}"))
+  denominatorTable <- omopgenerics::uniqueTableName()
+
+  # add population sampling
+  cdm$person <- cdm$person |>
+    dplyr::slice_sample(n = populationSample)
+
+  cdm <- IncidencePrevalence::generateDenominatorCohortSet(
+    cdm = cdm,
+    name = denominatorTable,
+    ageGroup = list(c(0, 150),
+                    c(0, 17),
+                    c(18, 64),
+                    c(65, 150)),
+    sex = c("Both", "Male", "Female"),
+    daysPriorObservation = 0,
+    requirementInteractions = FALSE
+  )
+
+  results <- list()
+
+  cli::cli_bullets(c("*" = "{.strong Estimating incidence}"))
+  results[["incidence"]] <- IncidencePrevalence::estimateIncidence(
+    cdm = cdm,
+    denominatorTable = denominatorTable,
+    outcomeTable = cohortName,
+    interval = c("years", "overall"),
+    repeatedEvents = FALSE,
+    outcomeWashout = Inf,
+    completeDatabaseIntervals = FALSE,
+    minCellCount = 0)
+
+  cli::cli_bullets(c("*" = "{.strong Estimating prevalence}"))
+  results[["prevalence"]] <- IncidencePrevalence::estimatePeriodPrevalence(
+    cdm = cdm,
+    denominatorTable = denominatorTable,
+    outcomeTable = cohortName,
+    interval = "years",
+    completeDatabaseIntervals = TRUE,
+    fullContribution = FALSE,
+    minCellCount = 0)
+
+  results <- results |>
+    vctrs::list_drop_empty() |>
+    omopgenerics::bind() |>
+    omopgenerics::newSummarisedResult()
+
+  results
 
 }
diff --git a/R/shinyDiagnostics.R b/R/shinyDiagnostics.R
@@ -16,18 +16,26 @@ shinyDiagnostics <- function(result,
   result |>
     omopViewer::exportStaticApp(
       directory = directory,
-      background = getBackground(result),
+      # background = getBackground(result),
       summary = FALSE,
-      panels = list("summarise_omop_snapshot",
-                    "summarise_observation_period",
-                    "achilles_code_use",
-                    "cohort_code_use",
-                    "orphan_code_use",
-                    "summarise_characteristics",
-                    "summarise_cohort_attrition",
-                    "summarise_cohort_overlap",
-                    "summarise_cohort_timing",
-                    "summarise_large_scale_characteristics")
+      panels = list(
+                   "Database details" = c("Snapshot"= "summarise_omop_snapshot",
+                                          "Observation periods"= "summarise_observation_period"),
+                   "Codelist diagnostics" = c(
+                     "Achilles code use" = "achilles_code_use",
+                     "Cohort code use" = "cohort_code_use",
+                     "Orphan code use" = "orphan_code_use"),
+                   "Cohort diagnostics" = c(
+                     "Cohort characteristics" = "summarise_characteristics",
+                     "Cohort attrition" = "summarise_cohort_attrition",
+                     "Cohort overlap" = "summarise_cohort_overlap",
+                     "Cohort timing" = "summarise_cohort_timing"),
+                   "Matched diagnostics" = c(
+                     "Large scale characteristics" = "summarise_large_scale_characteristics"),
+                   "Population diagnostics" = c(
+                     "Incidence" = "incidence",
+                     "Period prevalence" = "period_prevalence")
+                   )
     )
 }
 

diff --git a/README.Rmd b/README.Rmd
@@ -33,6 +33,7 @@ The phenotypeR package helps us to assess the research-readiness of a set of coh
 - ___Codelist diagnostics___ which help to answer questions like what concepts from our codelist are used in the database? What concepts were present led to individuals' entry in the cohort? Are there any concepts being used in the database that we didn't include in our codelist but maybe we should have?  
 - ___Cohort diagnostics___ which help to answer questions like how many individuals did we include in our cohort and how many were excluded because of our inclusion criteria? If we have multiple cohorts, is there overlap between them and when do people enter one cohort relative to another? What is the incidence of cohort entry and what is the prevalence of the cohort in the database?  
 - ___Matched diagnostics___ which compares our study cohorts to the overall population in the database. By matching people in the cohorts to people with a similar age and sex in the database we can see how our cohorts differ from the general database population.  
+- ___Population diagnostics___ which estimates the frequency of our study cohorts in the database in terms of their incidence rates and prevalence.
 
 ## Installation
 

diff --git a/README.md b/README.md
@@ -35,7 +35,10 @@ set of cohorts we have defined. This assessment includes:
 - ***Matched diagnostics*** which compares our study cohorts to the
   overall population in the database. By matching people in the cohorts
   to people with a similar age and sex in the database we can see how
-  our cohorts differ from the general database population.
+  our cohorts differ from the general database population.  
+- ***Population diagnostics*** which estimates the frequency of our
+  study cohorts in the database in terms of their incidence rates and
+  prevalence.
 
 ## Installation
 
@@ -70,11 +73,15 @@ result <- cdm$gibleed |>
 
 ``` r
 summary(result)
-#> A summarised_result object with 6020 rows, 12 different result_id, 1 different
-#> cdm names, and 10 settings.
+#> A summarised_result object with 13334 rows, 48 different result_id, 1 different
+#> cdm names, and 25 settings.
 #> CDM names: Synthea synthetic health database.
 #> Settings: package_name, package_version, result_type, timing, table_name,
-#> cohort_definition_id, cdm_version, vocabulary_version, type, and analysis.
+#> cohort_definition_id, cdm_version, vocabulary_version,
+#> analysis_outcome_washout, analysis_repeated_events, analysis_interval,
+#> analysis_complete_database_intervals, denominator_age_group, denominator_sex,
+#> denominator_days_prior_observation, denominator_start_date,
+#> denominator_end_date, denominator_time_at_risk, …, type, and analysis.
 ```
 
 Once we have our results we can quickly view them in an interactive

diff --git a/tests/testthat/test-phenotypeDiagnostics.R b/tests/testthat/test-phenotypeDiagnostics.R
@@ -29,19 +29,23 @@ test_that("overall diagnostics function", {
                                  schema ="main", overwrite = TRUE)
 
  expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
+ attr(my_result, "settings") <- attr(my_result, "settings") |>
+   dplyr::mutate(min_cell_count = 0)
 
   expect_identical(phenotypeDiagnostics(cdm$my_cohort,
             databaseDiagnostics = FALSE,
             codelistDiagnostics = FALSE,
             cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE),
+            matchedDiagnostics = FALSE,
+            populationDiagnostics = FALSE),
   omopgenerics::emptySummarisedResult())
 
   dd_only <- phenotypeDiagnostics(cdm$my_cohort,
             databaseDiagnostics = TRUE,
             codelistDiagnostics = FALSE,
             cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE)
+            matchedDiagnostics = FALSE,
+            populationDiagnostics = FALSE)
   expect_true("summarise_omop_snapshot" %in%
                 (settings(dd_only) |> dplyr::pull("result_type")))
   expect_true("summarise_observation_period" %in%
@@ -52,13 +56,15 @@ test_that("overall diagnostics function", {
             databaseDiagnostics = FALSE,
             codelistDiagnostics = TRUE,
             cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE)
+            matchedDiagnostics = FALSE,
+            populationDiagnostics = FALSE)
 
   cohort_diag_only <-  phenotypeDiagnostics(cdm$my_cohort,
             databaseDiagnostics = FALSE,
             codelistDiagnostics = FALSE,
             cohortDiagnostics = TRUE,
-            matchedDiagnostics = FALSE)
+            matchedDiagnostics = FALSE,
+            populationDiagnostics = FALSE)
   expect_true(
    all(c("summarise_characteristics", "summarise_cohort_attrition",
       "summarise_cohort_attrition",
@@ -70,7 +76,8 @@ test_that("overall diagnostics function", {
             databaseDiagnostics = FALSE,
             codelistDiagnostics = FALSE,
             cohortDiagnostics = FALSE,
-            matchedDiagnostics = TRUE)
+            matchedDiagnostics = TRUE,
+            populationDiagnostics = FALSE)
   expect_true(
     all(c("summarise_characteristics",
           "summarise_large_scale_characteristics") %in%

diff --git a/tests/testthat/test-populationDiagnostics.R b/tests/testthat/test-populationDiagnostics.R
@@ -1,3 +1,9 @@
-test_that("multiplication works", {
-  expect_equal(2 * 2, 4)
-})
+test_that("population incidence and prevalence", {
+  cdm <- IncidencePrevalence::mockIncidencePrevalenceRef(sampleSize = 1000)
+  cdm <- IncidencePrevalence::generateDenominatorCohortSet(cdm, name = "denom")
+  pop_diag <- populationDiagnostics(cohort = cdm$outcome,
+                                    populationSample = 250)
+
+  CDMConnector::cdm_disconnect(cdm)
+
+  })
diff --git a/tests/testthat/test-shinyDiagnostics.R b/tests/testthat/test-shinyDiagnostics.R
@@ -37,7 +37,8 @@ test_that("basic working example with one cohort", {
                                  schema ="main", overwrite = TRUE)
 
   my_result_cohort_diag <- cdm$my_cohort |> phenotypeDiagnostics()
-  expect_no_error(shinyDiagnostics(my_result_cohort_diag))
+
+  # expect_no_error(shinyDiagnostics(my_result_cohort_diag))
 
 
 })
diff --git a/vignettes/a05_PopulationDiagnostics.Rmd b/vignettes/a05_PopulationDiagnostics.Rmd
@@ -0,0 +1,19 @@
+d---
+title: "a05_PopulationDiagnostics"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{a05_PopulationDiagnostics}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(phenotypeR)
+```