Skip to content

Commit

Permalink
Merge pull request #76 from OHDSI/population_diag
Browse files Browse the repository at this point in the history
Population diag
  • Loading branch information
edward-burn authored Oct 14, 2024
2 parents 2d22ee3 + 372c6bf commit 540078e
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 64 deletions.
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ License: Apache License (>= 2)
Encoding: UTF-8
LazyData: true
Suggests:
CDMConnector,
duckdb,
DBI,
gt,
Expand All @@ -29,23 +30,25 @@ Suggests:
PatientProfiles,
ggplot2,
ggpubr,
stringr
stringr,
shiny,
DiagrammeR,
sortable
Config/testthat/edition: 3
RoxygenNote: 7.3.2
Imports:
CDMConnector,
CodelistGenerator (>= 3.1.0),
CohortCharacteristics,
CohortConstructor,
cli,
dplyr,
here,
IncidencePrevalence (>= 0.8.0),
omopgenerics,
magrittr,
purrr,
rmarkdown,
rlang,
shiny,
vctrs,
visOmopResults,
glue
Expand Down
35 changes: 0 additions & 35 deletions R/cohortDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,41 +46,6 @@ cohortDiagnostics <- function(cohort,
density = TRUE)
}


# cli::cli_bullets(c("*" = "{.strong Creating denominator for incidence and prevalence}"))
# denominatorTable <- omopgenerics::uniqueTableName()
# cdm <- IncidencePrevalence::generateDenominatorCohortSet(
# cdm = cdm,
# name = denominatorTable,
# ageGroup = list(c(0,17),
# c(18,64),
# c(65,199)),
# sex = c("Male", "Female", "Both"),
# daysPriorObservation = c(0, 180)
# )
#
# cli::cli_bullets(c("*" = "{.strong Estimating incidence}"))
# results[["incidence"]] <- IncidencePrevalence::estimateIncidence(
# cdm = cdm,
# denominatorTable = denominatorTable,
# outcomeTable = cohortName,
# interval = "years",
# repeatedEvents = c(TRUE, FALSE),
# outcomeWashout = c(0, Inf),
# completeDatabaseIntervals = c(TRUE, FALSE),
# minCellCount = 0)
#
# cli::cli_bullets(c("*" = "{.strong Estimating prevalence}"))
# results[["prevalence"]] <- IncidencePrevalence::estimatePeriodPrevalence(
# cdm = cdm,
# denominatorTable = denominatorTable,
# outcomeTable = cohortName,
# interval = "years",
# completeDatabaseIntervals = c(TRUE, FALSE),
# fullContribution = c(TRUE, FALSE),
# minCellCount = 0)


results <- results |>
vctrs::list_drop_empty() |>
omopgenerics::bind() |>
Expand Down
53 changes: 51 additions & 2 deletions R/populationDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,56 @@
populationDiagnostics <- function(cohort,
populationSample = 1000000) {

cli::cli_inform("populationDiagnostics not yet implemented")
omopgenerics::emptySummarisedResult()
cdm <- omopgenerics::cdmReference(cohort)
cohortName <- omopgenerics::tableName(cohort)

cli::cli_bullets(c("*" = "{.strong Creating denominator for incidence and prevalence}"))
denominatorTable <- omopgenerics::uniqueTableName()

# add population sampling
cdm$person <- cdm$person |>
dplyr::slice_sample(n = populationSample)

cdm <- IncidencePrevalence::generateDenominatorCohortSet(
cdm = cdm,
name = denominatorTable,
ageGroup = list(c(0, 150),
c(0, 17),
c(18, 64),
c(65, 150)),
sex = c("Both", "Male", "Female"),
daysPriorObservation = 0,
requirementInteractions = FALSE
)

results <- list()

cli::cli_bullets(c("*" = "{.strong Estimating incidence}"))
results[["incidence"]] <- IncidencePrevalence::estimateIncidence(
cdm = cdm,
denominatorTable = denominatorTable,
outcomeTable = cohortName,
interval = c("years", "overall"),
repeatedEvents = FALSE,
outcomeWashout = Inf,
completeDatabaseIntervals = FALSE,
minCellCount = 0)

cli::cli_bullets(c("*" = "{.strong Estimating prevalence}"))
results[["prevalence"]] <- IncidencePrevalence::estimatePeriodPrevalence(
cdm = cdm,
denominatorTable = denominatorTable,
outcomeTable = cohortName,
interval = "years",
completeDatabaseIntervals = TRUE,
fullContribution = FALSE,
minCellCount = 0)

results <- results |>
vctrs::list_drop_empty() |>
omopgenerics::bind() |>
omopgenerics::newSummarisedResult()

results

}
30 changes: 19 additions & 11 deletions R/shinyDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,26 @@ shinyDiagnostics <- function(result,
result |>
omopViewer::exportStaticApp(
directory = directory,
background = getBackground(result),
# background = getBackground(result),
summary = FALSE,
panels = list("summarise_omop_snapshot",
"summarise_observation_period",
"achilles_code_use",
"cohort_code_use",
"orphan_code_use",
"summarise_characteristics",
"summarise_cohort_attrition",
"summarise_cohort_overlap",
"summarise_cohort_timing",
"summarise_large_scale_characteristics")
panels = list(
"Database details" = c("Snapshot"= "summarise_omop_snapshot",
"Observation periods"= "summarise_observation_period"),
"Codelist diagnostics" = c(
"Achilles code use" = "achilles_code_use",
"Cohort code use" = "cohort_code_use",
"Orphan code use" = "orphan_code_use"),
"Cohort diagnostics" = c(
"Cohort characteristics" = "summarise_characteristics",
"Cohort attrition" = "summarise_cohort_attrition",
"Cohort overlap" = "summarise_cohort_overlap",
"Cohort timing" = "summarise_cohort_timing"),
"Matched diagnostics" = c(
"Large scale characteristics" = "summarise_large_scale_characteristics"),
"Population diagnostics" = c(
"Incidence" = "incidence",
"Period prevalence" = "period_prevalence")
)
)
}

Expand Down
1 change: 1 addition & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The phenotypeR package helps us to assess the research-readiness of a set of coh
- ___Codelist diagnostics___ which help to answer questions like what concepts from our codelist are used in the database? What concepts were present led to individuals' entry in the cohort? Are there any concepts being used in the database that we didn't include in our codelist but maybe we should have?
- ___Cohort diagnostics___ which help to answer questions like how many individuals did we include in our cohort and how many were excluded because of our inclusion criteria? If we have multiple cohorts, is there overlap between them and when do people enter one cohort relative to another? What is the incidence of cohort entry and what is the prevalence of the cohort in the database?
- ___Matched diagnostics___ which compares our study cohorts to the overall population in the database. By matching people in the cohorts to people with a similar age and sex in the database we can see how our cohorts differ from the general database population.
- ___Population diagnostics___ which estimates the frequency of our study cohorts in the database in terms of their incidence rates and prevalence.

## Installation

Expand Down
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ set of cohorts we have defined. This assessment includes:
- ***Matched diagnostics*** which compares our study cohorts to the
overall population in the database. By matching people in the cohorts
to people with a similar age and sex in the database we can see how
our cohorts differ from the general database population.
our cohorts differ from the general database population.
- ***Population diagnostics*** which estimates the frequency of our
study cohorts in the database in terms of their incidence rates and
prevalence.

## Installation

Expand Down Expand Up @@ -70,11 +73,15 @@ result <- cdm$gibleed |>

``` r
summary(result)
#> A summarised_result object with 6020 rows, 12 different result_id, 1 different
#> cdm names, and 10 settings.
#> A summarised_result object with 13334 rows, 48 different result_id, 1 different
#> cdm names, and 25 settings.
#> CDM names: Synthea synthetic health database.
#> Settings: package_name, package_version, result_type, timing, table_name,
#> cohort_definition_id, cdm_version, vocabulary_version, type, and analysis.
#> cohort_definition_id, cdm_version, vocabulary_version,
#> analysis_outcome_washout, analysis_repeated_events, analysis_interval,
#> analysis_complete_database_intervals, denominator_age_group, denominator_sex,
#> denominator_days_prior_observation, denominator_start_date,
#> denominator_end_date, denominator_time_at_risk, …, type, and analysis.
```

Once we have our results we can quickly view them in an interactive
Expand Down
17 changes: 12 additions & 5 deletions tests/testthat/test-phenotypeDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,23 @@ test_that("overall diagnostics function", {
schema ="main", overwrite = TRUE)

expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
attr(my_result, "settings") <- attr(my_result, "settings") |>
dplyr::mutate(min_cell_count = 0)

expect_identical(phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE),
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE),
omopgenerics::emptySummarisedResult())

dd_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = TRUE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE)
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
expect_true("summarise_omop_snapshot" %in%
(settings(dd_only) |> dplyr::pull("result_type")))
expect_true("summarise_observation_period" %in%
Expand All @@ -52,13 +56,15 @@ test_that("overall diagnostics function", {
databaseDiagnostics = FALSE,
codelistDiagnostics = TRUE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE)
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)

cohort_diag_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = TRUE,
matchedDiagnostics = FALSE)
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
expect_true(
all(c("summarise_characteristics", "summarise_cohort_attrition",
"summarise_cohort_attrition",
Expand All @@ -70,7 +76,8 @@ test_that("overall diagnostics function", {
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = TRUE)
matchedDiagnostics = TRUE,
populationDiagnostics = FALSE)
expect_true(
all(c("summarise_characteristics",
"summarise_large_scale_characteristics") %in%
Expand Down
12 changes: 9 additions & 3 deletions tests/testthat/test-populationDiagnostics.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
test_that("multiplication works", {
expect_equal(2 * 2, 4)
})
test_that("population incidence and prevalence", {
cdm <- IncidencePrevalence::mockIncidencePrevalenceRef(sampleSize = 1000)
cdm <- IncidencePrevalence::generateDenominatorCohortSet(cdm, name = "denom")
pop_diag <- populationDiagnostics(cohort = cdm$outcome,
populationSample = 250)

CDMConnector::cdm_disconnect(cdm)

})
3 changes: 2 additions & 1 deletion tests/testthat/test-shinyDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ test_that("basic working example with one cohort", {
schema ="main", overwrite = TRUE)

my_result_cohort_diag <- cdm$my_cohort |> phenotypeDiagnostics()
expect_no_error(shinyDiagnostics(my_result_cohort_diag))

# expect_no_error(shinyDiagnostics(my_result_cohort_diag))


})
19 changes: 19 additions & 0 deletions vignettes/a05_PopulationDiagnostics.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
d---
title: "a05_PopulationDiagnostics"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{a05_PopulationDiagnostics}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

```{r setup}
library(phenotypeR)
```

0 comments on commit 540078e

Please sign in to comment.