diff --git a/NAMESPACE b/NAMESPACE index f5ffc57b..13150198 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(extract_seromodel_summary) export(fit_seromodel) export(get_exposure_ages) export(get_exposure_matrix) +export(get_foi_central_estimates) export(get_prev_expanded) export(get_table_rhats) export(plot_foi) diff --git a/R/model_comparison.R b/R/model_comparison.R index 3c6dd0ff..32825a8c 100644 --- a/R/model_comparison.R +++ b/R/model_comparison.R @@ -6,16 +6,16 @@ #' @return rhats table #' @examples #' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata = serodata) -#' model_constant <- run_seromodel(serodata = data_test, -#' foi_model = "constant", +#' data(chagas2012) +#' data_test <- prepare_serodata(serodata = chagas2012) +#' model_constant <- run_seromodel(serodata = data_test, +#' foi_model = "constant", #' n_iters = 1500) #' get_table_rhats(model_object = model_constant) #' } #' @export get_table_rhats <- function(seromodel_object) { - rhats <- bayesplot::rhat(seromodel_object$fit, "foi") + rhats <- bayesplot::rhat(seromodel_object$seromodel_fit, "foi") if (any(is.nan(rhats))) { rhats[which(is.nan(rhats))] <- 0 diff --git a/R/modelling.R b/R/modelling.R index 8558f109..36a25f80 100644 --- a/R/modelling.R +++ b/R/modelling.R @@ -43,9 +43,9 @@ #' @return \code{seromodel_object}. An object containing relevant information about the implementation of the model. For further details refer to \link{fit_seromodel}. #' @examples #' \dontrun{ -#' serodata <- prepare_serodata(serodata) -#' run_seromodel (serodata, -#' foi_model = "constant") +#' serodata <- prepare_serodata(chagas2012) +#' run_seromodel (chagas2012, +#' foi_model = "constant") #' } #' @export run_seromodel <- function(serodata, @@ -68,7 +68,8 @@ run_seromodel <- function(serodata, foi_model, " finished running ------")) if (print_summary){ - print(t(seromodel_object$model_summary)) + model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object) + print(t(model_summary)) } return(seromodel_object) } @@ -114,8 +115,8 @@ run_seromodel <- function(serodata, #' @examples #' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_fit <- fit_seromodel(serodata = serodata, #' foi_model = "constant") #' } @@ -146,27 +147,18 @@ fit_seromodel <- function(serodata, ) n_warmup <- floor(n_iters / 2) - if (foi_model == "tv_normal_log") { f_init <- function() { list(log_foi = rep(-3, length(exposure_ages))) - } - lower_quantile = 0.1 - upper_quantile = 0.9 - medianv_quantile = 0.5 } - + } else { f_init <- function() { list(foi = rep(0.01, length(exposure_ages))) } - lower_quantile = 0.05 - upper_quantile = 0.95 - medianv_quantile = 0.5 - } - fit <- rstan::sampling( + seromodel_fit <- rstan::sampling( model, data = stan_data, iter = n_iters, @@ -182,64 +174,21 @@ fit_seromodel <- function(serodata, chain_id = 0 # https://github.com/stan-dev/rstan/issues/761#issuecomment-647029649 ) - if (class(fit@sim$samples) != "NULL") { - loo_fit <- loo::loo(fit, save_psis = TRUE, "logLikelihood") - foi <- rstan::extract(fit, "foi", inc_warmup = FALSE)[[1]] - # foi <- rstan::extract(fit, "foi", inc_warmup = TRUE, permuted=FALSE)[[1]] - # generates central estimations - foi_cent_est <- data.frame( - year = exposure_years, - lower = apply(foi, 2, function(x) quantile(x, lower_quantile)), - - upper = apply(foi, 2, function(x) quantile(x, upper_quantile)), - - medianv = apply(foi, 2, function(x) quantile(x, medianv_quantile)) - ) - - - # generates a sample of iterations - if (n_iters >= 2000) { - foi_post_s <- dplyr::sample_n(as.data.frame(foi), size = 1000) - colnames(foi_post_s) <- exposure_years - } else { - foi_post_s <- as.data.frame(foi) - colnames(foi_post_s) <- exposure_years - } - + if (class(seromodel_fit@sim$samples) != "NULL") { seromodel_object <- list( - fit = fit, + seromodel_fit = seromodel_fit, serodata = serodata, stan_data = stan_data, exposure_years = exposure_years, - exposure_ages = exposure_ages, - n_iters = n_iters, - n_thin = n_thin, - n_warmup = n_warmup, - foi_model = foi_model, - delta = delta, - m_treed = m_treed, - loo_fit = loo_fit, - foi_cent_est = foi_cent_est, - foi_post_s = foi_post_s + exposure_ages = exposure_ages ) - seromodel_object$model_summary <- - extract_seromodel_summary(seromodel_object) } else { - loo_fit <- c(-1e10, 0) seromodel_object <- list( - fit = "no model", + seromodel_fit = "no model", serodata = serodata, stan_data = stan_data, exposure_years = exposure_years, - exposure_ages = exposure_ages, - n_iters = n_iters, - n_thin = n_thin, - n_warmup = n_warmup, - model = foi_model, - delta = delta, - m_treed = m_treed, - loo_fit = loo_fit, - model_summary = NA + exposure_ages = exposure_ages ) } @@ -255,8 +204,8 @@ fit_seromodel <- function(serodata, #' @return \code{exposure_ages}. An atomic vector with the numeration of the exposition years in serodata #' @examples #' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata = serodata, alpha = 0.05) +#' data(chagas2012) +#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) #' exposure_ages <- get_exposure_ages(serodata) #' } #' @export @@ -271,8 +220,8 @@ get_exposure_ages <- function(serodata) { #' @return \code{exposure_output}. An atomic matrix containing the expositions for each entry of \code{serodata} by year. #' @examples #' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata = serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(serodata = chagas2012) #' exposure_matrix <- get_exposure_matrix(serodata = serodata) #' } #' @export @@ -287,6 +236,46 @@ get_exposure_matrix <- function(serodata) { return(exposure_output) } +#' Function that generates the central estimates for the fitted forced FoI +#' +#' @param seromodel_object Object containing the results of fitting a model by means of \link{run_seromodel}. +#' generated by means of \link{get_exposure_ages}. +#' @return \code{foi_central_estimates}. Central estimates for the fitted forced FoI +#' @examples +#' \dontrun{ +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- fit_seromodel(serodata = serodata, +#' foi_model = "constant") +#' foi_central_estimates <- get_foi_central_estimates(seromodel_object) +#' } +#' +#' @export +get_foi_central_estimates <- function(seromodel_object) { + + if (seromodel_object$seromodel_fit@model_name == "tv_normal_log") { + lower_quantile = 0.1 + upper_quantile = 0.9 + medianv_quantile = 0.5 + } + else { + lower_quantile = 0.05 + upper_quantile = 0.95 + medianv_quantile = 0.5 + } + # extracts foi from stan fit + foi <- rstan::extract(seromodel_object$seromodel_fit, "foi", inc_warmup = FALSE)[[1]] + # generates central estimations + foi_central_estimates <- data.frame( + year = seromodel_object$exposure_years, + lower = apply(foi, 2, function(x) quantile(x, lower_quantile)), + + upper = apply(foi, 2, function(x) quantile(x, upper_quantile)), + + medianv = apply(foi, 2, function(x) quantile(x, medianv_quantile)) + ) + return(foi_central_estimates) +} #' Method to extact a summary of the specified serological model object #' @@ -313,34 +302,32 @@ get_exposure_matrix <- function(serodata) { #' } #' @examples #' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel(serodata = serodata, #' foi_model = "constant") #' extract_seromodel_summary(seromodel_object) #' } #' @export extract_seromodel_summary <- function(seromodel_object) { - foi_model <- seromodel_object$foi_model - serodata <- seromodel_object$serodata #------- Loo estimates - - loo_fit <- seromodel_object$loo_fit + loo_fit <- loo::loo(seromodel_object$seromodel_fit, save_psis = TRUE, "logLikelihood") if (sum(is.na(loo_fit)) < 1) { lll <- as.numeric((round(loo_fit$estimates[1, ], 2))) } else { lll <- c(-1e10, 0) } + #------- model_summary <- data.frame( - foi_model = foi_model, - dataset = serodata$survey[1], - country = serodata$country[1], - year = serodata$tsur[1], - test = serodata$test[1], - antibody = serodata$antibody[1], - n_sample = sum(serodata$total), - n_agec = length(serodata$age_mean_f), - n_iter = seromodel_object$n_iters, + foi_model = seromodel_object$seromodel_fit@model_name, + dataset = unique(seromodel_object$serodata$survey), + country = unique(seromodel_object$serodata$country), + year = unique(seromodel_object$serodata$tsur), + test = unique(seromodel_object$serodata$test), + antibody = unique(seromodel_object$serodata$antibody), + n_sample = sum(seromodel_object$serodata$total), + n_agec = length(seromodel_object$serodata$age_mean_f), + n_iter = seromodel_object$seromodel_fit@sim$iter, elpd = lll[1], se = lll[2], converged = NA @@ -360,16 +347,17 @@ extract_seromodel_summary <- function(seromodel_object) { #' #' This function computes the corresponding binomial confidence intervals for the obtained prevalence based on a fitting #' of the Force-of-Infection \code{foi} for plotting an analysis purposes. -#' @param foi Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$fit, "foi", inc_warmup = FALSE)[[1]]}. +#' @param foi Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}. #' @param serodata A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}. #' @param bin_data TBD #' @return \code{prev_final}. The expanded prevalence data. This is used for plotting purposes in the \code{visualization} module. #' @examples #' \dontrun{ -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel(serodata = serodata, #' foi_model = "constant") -#' foi <- rstan::extract(seromodel_object$fit, "foi")[[1]] +#' foi <- rstan::extract(seromodel_object$seromodel_fit, "foi")[[1]] #' get_prev_expanded <- function(foi, serodata) #' } #' @export diff --git a/R/serodata.R b/R/serodata.R deleted file mode 100644 index 1404ed4f..00000000 --- a/R/serodata.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage serodata -#' -#' @format An object of class \code{"cross"}; see \code{\link[qtl]{read.cross}}. -#' -#' @keywords datasets -#' -#' @examples -#' serodata -"serodata" \ No newline at end of file diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R index f4168cfe..8c466f26 100644 --- a/R/seroprevalence_data.R +++ b/R/seroprevalence_data.R @@ -30,8 +30,8 @@ #' } #' @examples #'\dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' } #' @export prepare_serodata <- function(serodata = serodata, @@ -91,7 +91,8 @@ prepare_serodata <- function(serodata = serodata, #' @return data set with the binomial confidence intervals #' @examples #'\dontrun{ -#' prepare_bin_data (serodata) +#' data(chagas2012) +#' prepare_bin_data(chagas2012) #' } #' @export prepare_bin_data <- function(serodata) { diff --git a/R/visualisation.R b/R/visualisation.R index 26f7fd81..2188a76f 100644 --- a/R/visualisation.R +++ b/R/visualisation.R @@ -17,9 +17,10 @@ #' @return A ggplot object containing the seropositivity-vs-age graph of the raw data of a given seroprevalence survey with its corresponging binomial confidence interval. #' @examples #' \dontrun{ -#' data_test <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel( -#' serodata = data_test, +#' serodata = serodata, #' foi_model = "constant", #' n_iters = 1000 #') @@ -52,9 +53,9 @@ plot_seroprev <- function(serodata, #' @return A ggplot object containing the seropositivity-vs-age graph including the data, the fitted model and their corresponding confindence intervals. #' @examples #' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel(serodata = data_test, +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel(serodata = serodata, #' foi_model = "constant", #' n_iters = 1000) #' plot_seroprev_fitted(seromodel_object, size_text = 15) @@ -63,10 +64,10 @@ plot_seroprev <- function(serodata, plot_seroprev_fitted <- function(seromodel_object, size_text = 6) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL" ) { + if (is.character(seromodel_object$seromodel_fit) == FALSE) { + if (class(seromodel_object$seromodel_fit@sim$samples) != "NULL" ) { - foi <- rstan::extract(seromodel_object$fit, "foi", inc_warmup = FALSE)[[1]] + foi <- rstan::extract(seromodel_object$seromodel_fit, "foi", inc_warmup = FALSE)[[1]] prev_expanded <- get_prev_expanded(foi, serodata = seromodel_object$serodata, bin_data = TRUE) prev_plot <- ggplot2::ggplot(prev_expanded) + @@ -133,12 +134,13 @@ plot_seroprev_fitted <- function(seromodel_object, #' @return A ggplot2 object containing the Force-of-infection-vs-time including the corresponding confidence interval. #' @examples #' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel( +#' serodata = serodata, #' foi_model = "constant", #' n_iters = 1000 -#' ) +#' ) #' plot_foi(seromodel_object, size_text = 15) #' } #' @export @@ -146,14 +148,14 @@ plot_foi <- function(seromodel_object, max_lambda = NA, size_text = 25, foi_sim = NULL) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { - foi <- rstan::extract(seromodel_object$fit, + if (is.character(seromodel_object$seromodel_fit) == FALSE) { + if (class(seromodel_object$seromodel_fit@sim$samples) != "NULL") { + foi <- rstan::extract(seromodel_object$seromodel_fit, "foi", inc_warmup = FALSE)[[1]] #-------- This bit is to get the actual length of the foi data - foi_data <- seromodel_object$foi_cent_est + foi_data <- get_foi_central_estimates(seromodel_object = seromodel_object) #-------- foi_data$medianv[1] <- NA @@ -228,21 +230,21 @@ plot_foi <- function(seromodel_object, #' @return The rhats-convergence plot of the selected model. #' @examples #' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel( -#' serodata = data_test, +#' serodata = serodata, #' foi_model = "constant", #' n_iters = 1000 -#') -#' plot_rhats(seromodel_object, +#' ) +#' plot_rhats(seromodel_object, #' size_text = 15) #' } #' @export plot_rhats <- function(seromodel_object, size_text = 25) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { + if (is.character(seromodel_object$seromodel_fit) == FALSE) { + if (class(seromodel_object$seromodel_fit@sim$samples) != "NULL") { rhats <- get_table_rhats(seromodel_object) rhats_plot <- @@ -294,12 +296,13 @@ plot_rhats <- function(seromodel_object, #' @return A ggplot object with a vertical arrange containing the seropositivity, force of infection, and convergence plots. #' @examples #' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, -#' foi_model = "constant", -#' n_iters = 1000 -#') +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel( +#' serodata = serodata, +#' foi_model = "constant", +#' n_iters = 1000 +#' ) #' plot_seromodel(seromodel_object, size_text = 15) #' } #' @export @@ -307,8 +310,8 @@ plot_seromodel <- function(seromodel_object, max_lambda = NA, size_text = 25, foi_sim = NULL) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { + if (is.character(seromodel_object$seromodel_fit) == FALSE) { + if (class(seromodel_object$seromodel_fit@sim$samples) != "NULL") { prev_plot <- plot_seroprev_fitted(seromodel_object = seromodel_object, size_text = size_text) @@ -321,9 +324,9 @@ plot_seromodel <- function(seromodel_object, rhats_plot <- plot_rhats(seromodel_object = seromodel_object, size_text = size_text) - + model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object) summary_table <- t( - dplyr::select(seromodel_object$model_summary, + dplyr::select(model_summary, c('foi_model', 'dataset', 'elpd', 'se', 'converged'))) summary_plot <- plot_info_table(summary_table, size_text = size_text) @@ -378,12 +381,12 @@ plot_seromodel <- function(seromodel_object, #' @return p the plot for the given table #' @examples #' \dontrun{ -#' data_test <- prepare_serodata(serodata) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel( -#' serodata = data_test, -#' foi_model = "constant", -#' n_iters = 1000 -#') +#' serodata = serodata, +#' foi_model = "constant", +#' n_iters = 1000 +#' ) #' info = t(seromodel_object$model_summary) #' plot_info_table (info, size_text = 15) #' } diff --git a/README.Rmd b/README.Rmd index e15802fb..65fc44b1 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ) ``` -## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data +## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data @@ -47,22 +47,20 @@ remotes::install_github("epiverse-trace/serofoi") ```{r cleaning, include = FALSE, echo = TRUE} library(serofoi) -rownames(serodata) <- NULL - ``` ***serofoi*** provides a minimal serosurvey dataset, `serodata`, that can be used to test out the package. ```{r ex, include = TRUE} -# Load example serodata data included with the package -data("serodata") -head(serodata, 5) +# Load example dataset chagas2012 included with the package +data(chagas2012) +head(chagas2012, 5) ``` The function `prepare_serodata` will prepare the entry data for the use of the modelling module; this function computes the sample size, the years of birth and the binomial confidence interval for each age group in the provided dataset. A visualisation of the prepared seroprevalence data can be obtained using the function plot_seroprev: ```{r data_test, include = TRUE, out.fig.height="30%", out.width="50%", fig.align="center", message=FALSE} -serodata_test <- prepare_serodata(serodata) +serodata_test <- prepare_serodata(chagas2012) plot_seroprev(serodata_test, size_text = 15) ``` diff --git a/README.md b/README.md index 5eef2793..0b6a0862 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data +## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data @@ -48,9 +48,9 @@ remotes::install_github("epiverse-trace/serofoi") can be used to test out the package. ``` r -# Load example serodata data included with the package -data("serodata") -head(serodata, 5) +# Load example dataset chagas2012 included with the package +data(chagas2012) +head(chagas2012, 5) #> survey total counts age_min age_max tsur country test antibody #> 1 COL-035-93 34 0 1 1 2012 COL ELISA IgG anti-T.cruzi #> 2 COL-035-93 25 0 2 2 2012 COL ELISA IgG anti-T.cruzi @@ -66,7 +66,7 @@ in the provided dataset. A visualisation of the prepared seroprevalence data can be obtained using the function plot_seroprev: ``` r -serodata_test <- prepare_serodata(serodata) +serodata_test <- prepare_serodata(chagas2012) plot_seroprev(serodata_test, size_text = 15) ``` @@ -97,10 +97,10 @@ More details on how to use ***serofoi*** can be found in the [online documentation](https://epiverse-trace.github.io/serofoi/) as package vignettes, under [**Get Started**](https://epiverse-trace.github.io/serofoi/articles/serofoi.html), -[**FoI +[**An Introduction to FoI Models**](https://epiverse-trace.github.io/serofoi/articles/foi_models.html) -and [**Use -Cases**](https://epiverse-trace.github.io/serofoi/articles/use_cases.html) +and [**Real-life Use Cases for +serofoi**](https://epiverse-trace.github.io/serofoi/articles/use_cases.html) ## Help diff --git a/data/serodata.RData b/data/serodata.RData deleted file mode 100644 index f9570b14..00000000 Binary files a/data/serodata.RData and /dev/null differ diff --git a/man/extract_seromodel_summary.Rd b/man/extract_seromodel_summary.Rd index cd037799..106b1ca7 100644 --- a/man/extract_seromodel_summary.Rd +++ b/man/extract_seromodel_summary.Rd @@ -35,8 +35,8 @@ corresponding standar deviation. } \examples{ \dontrun{ -data("serodata") -serodata <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) seromodel_object <- run_seromodel(serodata = serodata, foi_model = "constant") extract_seromodel_summary(seromodel_object) diff --git a/man/figures/logo.svg b/man/figures/logo.svg index e6d58887..3ee0a32c 100644 --- a/man/figures/logo.svg +++ b/man/figures/logo.svg @@ -1,577 +1,40877 @@ - - - - - - - - - -