From 5a1c7353760f97c3b4dcf9ed5498b3920a42f770 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 10:55:36 -0400 Subject: [PATCH 1/6] plot the data --- vignettes/eval_vignette.Rmd | 55 ++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index bab33f4..2c94f68 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -1,45 +1,74 @@ --- title: "SummRt" -output: html_document -date: "2024-09-24" +description: "An R(t) evaluation across multiple packages, fitting to simulated data, standardizing outputs, and evaluating outputs" +author: "Kaitlyn Johnson" +date: "2024-09-25" +output: + bookdown::html_vignette2: + fig_caption: yes + code_folding: show +pkgdown: + as_is: true +vignette: > + %\VignetteIndexEntry{R(t) evaluation across packages: fit to simulated data with known reports} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- +# Overview + +The goal of this tutorial is to use multiple R(t) packages to fit to a simple, simulated outbreak +with a known R(t), serial interval, generation interval, and reporting delay probability mass function +(PMF). We will use the `summrt` package to generate standardized outputs, plot results, and +quantitatively evaluate the accuracy in R(t) estimation. + +Eventually we will expand this to additional vignettes that will fit to and evaluate more complex +simulated and real datasets (this might require not evaluating just accuracy and reliability +in R(t) but also nowcasting and forecasting expected observations and comparing to +the true observations). ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) -# start with individual packages, eventually can just load in SummRt library(EpiNow2) library(EpiEstim) library(rtestim) library(EpiLPS) #library(SummRt) +library(ggplot2) library (tidyverse) ``` +## Load the simulated data + Load the dataset we will be fitting the R(t) estimation packages to in this vignette. -This will eventually use package data that will be documented and +This will eventually use data from the `rtdata` package that will be documented and will describe the specific epidemiological use case this -data scenario is meant to represent. -```{r load-package-data} +data scenario is meant to represent. In this case, we are going to +fit to data on the number of reported +```{r load-rtdata} # We will eventually replace this with package data specific to the dataset. # E.g. this might be our baseline infections, onset, report data url <- "https://raw.githubusercontent.com/cmilando/RtEval/main/all_data.RDS" all_data <- readRDS(url(url)) ``` -Fit each of the packages to the dataset. This will call `SummRt` package -functions which will use S3 methods to wrap around the individual -R packages. All outputs will be in a standardized format that we can use to -evaluate the R(t) estimation (either from directly comparing to the -true simulated R(t) or via comparing to an evaluation dataset of held out -retrospective observations). + +## Use each package to estimate R(t) + +Fit each of the packages to the dataset. ```{r fit-Rt-with-each-package} -# BAD practice but will source a file. This will generate a set of standardized outputs +#EpiNow2 source("02_EPINOW2_infections.R") ``` +This will call `SummRt` package +functions which will use S3 methods to wrap around the individual +R packages. All outputs will be in a standardized format that we can use to +evaluate the R(t) estimation (either from directly comparing to the +true simulated R(t) or via comparing to an evaluation dataset of held out +retrospective observations). ```{r plot-output} #source file, later will call function source("03_plot.R") From b1fc97081364f102d79ee749b434b432c25cf5a9 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 11:02:17 -0400 Subject: [PATCH 2/6] start writing epinow2 implementation --- vignettes/eval_vignette.Rmd | 76 +++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index 2c94f68..a2cc986 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -44,23 +44,91 @@ to in this vignette. This will eventually use data from the `rtdata` package that will be documented and will describe the specific epidemiological use case this -data scenario is meant to represent. In this case, we are going to -fit to data on the number of reported +data scenario is meant to represent. In this case, we are going to fit to data +on the number of reported cases, with a known discrete generation interval probability +mass function (PMF) and reporting delay PMF that are also provided as data. ```{r load-rtdata} # We will eventually replace this with package data specific to the dataset. # E.g. this might be our baseline infections, onset, report data url <- "https://raw.githubusercontent.com/cmilando/RtEval/main/all_data.RDS" all_data <- readRDS(url(url)) + +ggplot(all_data$rt) + + geom_line(aes(x = Day, y = Rt)) + + geom_hline(aes(yintercept = 1), linetype = "dashed") + + xlab("Day") + ylab("R(t)") + + scale_y_continuous(trans = "log") + + theme_bw() + ggtitle("Simulated R(t)") + +ggplot(all_data$generation) + + geom_bar(aes(x = Day, y = Px), stat = "identity") + + xlab("Day") + ylab("Generation interval PMF") + + theme_bw() + +ggplot(all_data$reporting_delay) + + geom_bar(aes(x = Day, y = Px), stat = "identity") + + xlab("Day") + ylab("Reporting delay PMF") + + theme_bw() + +ggplot(all_data$cases) + + geom_bar(aes(x = day, y = daily_reports), stat = "identity") + + xlab("Day") + ylab("Reported cases") + + theme_bw() ``` ## Use each package to estimate R(t) -Fit each of the packages to the dataset. +Fit each of the packages to the dataset. See the package specific vignettes for more +of a walk through for the decisions made for each package. ```{r fit-Rt-with-each-package} #EpiNow2 -source("02_EPINOW2_infections.R") +incidence_df = data.frame( + date = lubridate::make_date(2020, 3, 19) + 1:nrow(all_data$cases), + confirm = as.vector(all_data$cases$daily_reports) +) + +gi_pmf <- NonParametric(pmf = all_data$serial$Px) +delay_pmf <- NonParametric(pmf = all_data$reporting_delay$Px) +incubation_pmf <- NonParametric(pmf = all_data$incubation$Px) + + +res_epinow <- epinow( + data = incidence_df, + generation_time = generation_time_opts(gi_pmf), + delays = delay_opts(delay_pmf), + backcalc = backcalc_opts(prior = 'reports'), + rt = rt_opts(rw = 1), + stan = stan_opts(chains = 4, cores = 4) +) + +incidence_df$day = all_data$cases$day + +y_extract <- rstan::extract(res_epinow$estimates$fit)$R +dim(y_extract) + +# y_date = res_epinow$estimates$summarised %>% filter(variable == 'R') +# head(y_date) +# View(res_epinow$samples) + +## +# * including this since i havent figured out how to do it above +INCUBATION_SHIFT = round(weighted.mean(x = all_data$incubation$Day, + w = all_data$incubation$Px)) +## + + +plot_data <- data.frame( + package = "EpiNow2", + date = c(all_data$cases$day, max(all_data$cases$day) + 1:7) - INCUBATION_SHIFT, + Rt_median = apply(y_extract, 2, quantile, probs = 0.5), + Rt_lb = apply(y_extract, 2, quantile, probs = 0.025), + Rt_ub = apply(y_extract, 2, quantile, probs = 0.975) +) + + + ``` This will call `SummRt` package From c98bcbb079541990e87d35dde87fe0830eab53c0 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 11:55:56 -0400 Subject: [PATCH 3/6] add epinow2 fit and create plot data --- vignettes/eval_vignette.Rmd | 51 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index a2cc986..3122c36 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -35,6 +35,7 @@ library(EpiLPS) #library(SummRt) library(ggplot2) library (tidyverse) +library(epinowcast) # for add_pmf ``` ## Load the simulated data @@ -90,10 +91,21 @@ incidence_df = data.frame( ) gi_pmf <- NonParametric(pmf = all_data$serial$Px) -delay_pmf <- NonParametric(pmf = all_data$reporting_delay$Px) -incubation_pmf <- NonParametric(pmf = all_data$incubation$Px) - +delay_pmf <- all_data$reporting_delay$Px +incubation_pmf <- all_data$incubation$Px +pmfs <- list( + "incubation_period" = incubation_pmf, + "sym_report_delay_pmf" = sym_report_delay_pmf +) +to_simplex <- function(vector) { + return(vector / sum(vector)) +} +# Assign to non parametric PMF +delay_pmf <- NonParametric(pmf = + epinowcast::add_pmfs(pmfs) |> + to_simplex() +) res_epinow <- epinow( data = incidence_df, generation_time = generation_time_opts(gi_pmf), @@ -102,26 +114,12 @@ res_epinow <- epinow( rt = rt_opts(rw = 1), stan = stan_opts(chains = 4, cores = 4) ) - incidence_df$day = all_data$cases$day - y_extract <- rstan::extract(res_epinow$estimates$fit)$R dim(y_extract) - -# y_date = res_epinow$estimates$summarised %>% filter(variable == 'R') -# head(y_date) -# View(res_epinow$samples) - -## -# * including this since i havent figured out how to do it above -INCUBATION_SHIFT = round(weighted.mean(x = all_data$incubation$Day, - w = all_data$incubation$Px)) -## - - -plot_data <- data.frame( +EpiNow2_plot_data <- data.frame( package = "EpiNow2", - date = c(all_data$cases$day, max(all_data$cases$day) + 1:7) - INCUBATION_SHIFT, + date = c(all_data$cases$day, max(all_data$cases$day) + 1:7), Rt_median = apply(y_extract, 2, quantile, probs = 0.5), Rt_lb = apply(y_extract, 2, quantile, probs = 0.025), Rt_ub = apply(y_extract, 2, quantile, probs = 0.975) @@ -131,15 +129,14 @@ plot_data <- data.frame( ``` -This will call `SummRt` package -functions which will use S3 methods to wrap around the individual -R packages. All outputs will be in a standardized format that we can use to -evaluate the R(t) estimation (either from directly comparing to the -true simulated R(t) or via comparing to an evaluation dataset of held out -retrospective observations). -```{r plot-output} + +# Call the `SummRt` package to standardize the outputs +```{r get-standardized-outputs} +``` + +```{r plot-outputs} #source file, later will call function -source("03_plot.R") + plot_epinow2 <- this_plot(pd = plot_data, title = "title") ``` From 354b816b08d51ebd0e127f91c8097ebc9895f529 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 13:21:27 -0400 Subject: [PATCH 4/6] fix epinow2 --- vignettes/eval_vignette.Rmd | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index 3122c36..a6b63fc 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -91,25 +91,14 @@ incidence_df = data.frame( ) gi_pmf <- NonParametric(pmf = all_data$serial$Px) -delay_pmf <- all_data$reporting_delay$Px -incubation_pmf <- all_data$incubation$Px +sym_report_delay_pmf <- NonParametric(pmf = all_data$reporting_delay$Px) +incubation_pmf <- NonParametric(pmf = all_data$incubation$Px) + -pmfs <- list( - "incubation_period" = incubation_pmf, - "sym_report_delay_pmf" = sym_report_delay_pmf -) -to_simplex <- function(vector) { - return(vector / sum(vector)) -} -# Assign to non parametric PMF -delay_pmf <- NonParametric(pmf = - epinowcast::add_pmfs(pmfs) |> - to_simplex() -) res_epinow <- epinow( data = incidence_df, generation_time = generation_time_opts(gi_pmf), - delays = delay_opts(delay_pmf), + delays = delay_opts(incubation_pmf + sym_report_delay_pmf), backcalc = backcalc_opts(prior = 'reports'), rt = rt_opts(rw = 1), stan = stan_opts(chains = 4, cores = 4) From b3ddf408b841e13c809b0501ede519120b9ded64 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 13:24:58 -0400 Subject: [PATCH 5/6] try usign EpiEstim --- vignettes/eval_vignette.Rmd | 38 ++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index a6b63fc..bda4024 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -82,7 +82,7 @@ ggplot(all_data$cases) + Fit each of the packages to the dataset. See the package specific vignettes for more of a walk through for the decisions made for each package. -```{r fit-Rt-with-each-package} +```{r EpiNow2} #EpiNow2 incidence_df = data.frame( @@ -113,10 +113,46 @@ EpiNow2_plot_data <- data.frame( Rt_lb = apply(y_extract, 2, quantile, probs = 0.025), Rt_ub = apply(y_extract, 2, quantile, probs = 0.975) ) +``` +```{r EpiEstim} +incidence_df <- data.frame( + dates = all_data$cases$day, + I = as.vector(all_data$cases$daily_reports) +) +colnames(incidence_df) <- c('dates', 'I') +# Serial interval from data PMF +si_distr <- as.matrix(all_data$serial$Px) +if (all_data$serial$Day[1] == 1) si_distr <- c(0, si_distr) +si_distr + +# Estimate R DAILY +getR <- EpiEstim::estimate_R( + incid = incidence_df, + method = "non_parametric_si", + config = make_config(list( + si_distr = si_distr, + t_start = 2:nrow(incidence_df), + t_end = 2:nrow(incidence_df) + )), + backimputation_window = 10 +) +# +INCUBATION_SHIFT = round(weighted.mean(x = all_data$incubation$Day, + w = all_data$incubation$Px)) +REPORTINGDELAY_SHIFT = round(weighted.mean(x = all_data$reporting_delay$Day, + w = all_data$reporting_delay$Px)) +# PLOT DATA +plot_data <- data.frame( ## ANNE + package = "EpiEstim", + date = all_data$cases$day[getR$R$t_end] - INCUBATION_SHIFT - REPORTINGDELAY_SHIFT, + Rt_median = getR$R$`Median(R)`, + Rt_lb = getR$R$`Quantile.0.025(R)`, + Rt_ub = getR$R$`Quantile.0.975(R)` +) ``` # Call the `SummRt` package to standardize the outputs From a51e1cc5021f7c7e902fcca345ba7e67f198d765 Mon Sep 17 00:00:00 2001 From: Kaitlyn Johnson Date: Wed, 25 Sep 2024 13:27:33 -0400 Subject: [PATCH 6/6] change title --- vignettes/eval_vignette.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/eval_vignette.Rmd b/vignettes/eval_vignette.Rmd index bda4024..ffe71b8 100644 --- a/vignettes/eval_vignette.Rmd +++ b/vignettes/eval_vignette.Rmd @@ -1,5 +1,5 @@ --- -title: "SummRt" +title: "Simple R(t) estimation evaluation comparison" description: "An R(t) evaluation across multiple packages, fitting to simulated data, standardizing outputs, and evaluating outputs" author: "Kaitlyn Johnson" date: "2024-09-25"