diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 1e3a7e52..b0e8175c 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -4,7 +4,7 @@ on: push: branches: [main, master, dev] pull_request: - branches: [main, master] + branches: [main, master, dev] name: R-CMD-check diff --git a/DESCRIPTION b/DESCRIPTION index 3ceccaa9..68b013bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,8 @@ LazyData: true RoxygenNote: 7.2.3 Depends: R (>= 3.5.0) -Imports: +Imports: + checkmate, methods, Rcpp (>= 0.12.0), RcppParallel (>= 5.0.1), @@ -57,7 +58,6 @@ Imports: purrr, qtl, reshape2, - tidyverse, usethis, vdiffr (>= 1.0.0) LinkingTo: diff --git a/NAMESPACE b/NAMESPACE index f5ffc57b..afb7a5e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,8 +2,9 @@ export(extract_seromodel_summary) export(fit_seromodel) -export(get_exposure_ages) +export(get_cohort_ages) export(get_exposure_matrix) +export(get_foi_central_estimates) export(get_prev_expanded) export(get_table_rhats) export(plot_foi) diff --git a/R/model_comparison.R b/R/model_comparison.R index 3c6dd0ff..65eceda8 100644 --- a/R/model_comparison.R +++ b/R/model_comparison.R @@ -1,26 +1,27 @@ #' Method for extracting a dataframe containing the R-hat estimates for a given serological model #' #' This method relies in the function \link[bayesplot]{rhat} to extract the R-hat estimates of the serological model object -#' \code{seromodel_object} and returns a table a dataframe with the estimates for each year of birth. -#' @param seromodel_object seromodel_object +#' \code{seromodel_object} and returns a table a dataframe with the estimates for each year of birth. +#' @inheritParams get_foi_central_estimates #' @return rhats table #' @examples -#' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata = serodata) -#' model_constant <- run_seromodel(serodata = data_test, -#' foi_model = "constant", +#' data(chagas2012) +#' serodata <- prepare_serodata(serodata = chagas2012) +#' model_constant <- run_seromodel(serodata = serodata, +#' foi_model = "constant", #' n_iters = 1500) -#' get_table_rhats(model_object = model_constant) -#' } +#' cohort_ages <- get_cohort_ages(serodata) +#' get_table_rhats(seromodel_object = model_constant, +#' cohort_ages = cohort_ages) #' @export -get_table_rhats <- function(seromodel_object) { - rhats <- bayesplot::rhat(seromodel_object$fit, "foi") +get_table_rhats <- function(seromodel_object, + cohort_ages) { + rhats <- bayesplot::rhat(seromodel_object, "foi") if (any(is.nan(rhats))) { rhats[which(is.nan(rhats))] <- 0 } - model_rhats <- data.frame(year = seromodel_object$exposure_years, rhat = rhats) + model_rhats <- data.frame(year = cohort_ages$birth_year, rhat = rhats) model_rhats$rhat[model_rhats$rhat == 0] <- NA return(model_rhats) diff --git a/R/modelling.R b/R/modelling.R index 8558f109..c063bdc1 100644 --- a/R/modelling.R +++ b/R/modelling.R @@ -42,20 +42,21 @@ #' @param print_summary TBD #' @return \code{seromodel_object}. An object containing relevant information about the implementation of the model. For further details refer to \link{fit_seromodel}. #' @examples -#' \dontrun{ -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' run_seromodel (serodata, -#' foi_model = "constant") -#' } +#' foi_model = "constant") #' @export run_seromodel <- function(serodata, - foi_model = "constant", + foi_model = c("constant", "tv_normal_log", + "tv_normal"), n_iters = 1000, n_thin = 2, delta = 0.90, m_treed = 10, decades = 0, print_summary = TRUE) { + foi_model <- match.arg(foi_model) survey <- unique(serodata$survey) if (length(survey) > 1) warning("You have more than 1 surveys or survey codes") seromodel_object <- fit_seromodel(serodata = serodata, @@ -68,7 +69,9 @@ run_seromodel <- function(serodata, foi_model, " finished running ------")) if (print_summary){ - print(t(seromodel_object$model_summary)) + model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object, + serodata = serodata) + print(t(model_summary)) } return(seromodel_object) } @@ -78,7 +81,7 @@ run_seromodel <- function(serodata, #' This function fits the specified model \code{foi_model} to the serological survey data \code{serodata} #' by means of the \link[rstan]{sampling} method. The function determines whether the corresponding stan model #' object needs to be compiled by rstan. -#' @param serodata A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}. +#' @inheritParams run_seromodel #' @param foi_model Name of the selected model. Current version provides three options: #' \describe{ #' \item{\code{"constant"}}{Runs a constant model} @@ -92,46 +95,26 @@ run_seromodel <- function(serodata, #' For further details refer to the \code{control} parameter in \link[rstan]{sampling} or \href{https://mc-stan.org/rstanarm/reference/adapt_delta.html}{here}. #' @param m_treed Maximum tree depth for the binary tree used in the NUTS stan sampler. For further details refer to the \code{control} parameter in \link[rstan]{sampling}. #' @param decades Number of decades covered by the survey data. -#' @return \code{seromodel_object}. An object containing relevant information about the implementation of the model. It contains the following: -#' \tabular{ll}{ -#' \code{fit} \tab \code{stanfit} object returned by the function \link[rstan]{sampling} \cr \tab \cr -#' \code{serodata} \tab A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}.\cr \tab \cr -#' \code{stan_data} \tab List containing \code{Nobs}, \code{Npos}, \code{Ntotal}, \code{Age}, \code{Ymax}, \code{AgeExpoMatrix} and \code{NDecades}. -#' This object is used as an input for the \link[rstan]{sampling} function \cr \tab \cr -#' \code{exposure_years} \tab Integer atomic vector containing the actual exposure years (1946, ..., 2007 e.g.) \cr \tab \cr -#' \code{exposure_ages} \tab Integer atomic vector containing the numeration of the exposure ages. \cr \tab \cr -#' \code{n_iters} \tab Number of interations for eah chain including the warmup. \cr \tab \cr -#' \code{n_thin} \tab Positive integer specifying the period for saving samples. \cr \tab \cr -#' \code{n_warmup} \tab Number of warm up iterations. Set by default as n_iters/2. \cr \tab \cr -#' \code{foi_model} \tab The name of the model\cr \tab \cr -#' \code{delta} \tab Real number between 0 and 1 that represents the target average acceptance probability. \cr \tab \cr -#' \code{m_treed} \tab Maximum tree depth for the binary tree used in the NUTS stan sampler. \cr \tab \cr -#' \code{loo_fit} \tab Efficient approximate leave-one-out cross-validation. Refer to \link[loo]{loo} for further details. \cr \tab \cr -#' \code{foi_cent_est} \tab A data fram e containing \code{year} (corresponding to \code{exposure_years}), \code{lower}, \code{upper}, and \code{medianv} \cr \tab \cr -#' \code{foi_post_s} \tab Sample n rows from a table. Refer to \link[dplyr]{sample_n} for further details. \cr \tab \cr -#' \code{model_summary} \tab A data fram containing the summary of the model. Refer to \link{extract_seromodel_summary} for further details. \cr \tab \cr -#' } - +#' @return \code{seromodel_object}. \code{stanfit} object returned by the function \link[rstan]{sampling} #' @examples -#' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_fit <- fit_seromodel(serodata = serodata, #' foi_model = "constant") -#' } #' #' @export fit_seromodel <- function(serodata, - foi_model, + foi_model = c("constant", "tv_normal_log", + "tv_normal"), n_iters = 1000, n_thin = 2, delta = 0.90, m_treed = 10, decades = 0) { # TODO Add a warning because there are exceptions where a minimal amount of iterations is needed + foi_model <- match.arg(foi_model) model <- stanmodels[[foi_model]] - exposure_ages <- get_exposure_ages(serodata) - exposure_years <- (min(serodata$birth_year):serodata$tsur[1])[-1] + cohort_ages <- get_cohort_ages(serodata = serodata) exposure_matrix <- get_exposure_matrix(serodata) Nobs <- nrow(serodata) @@ -140,33 +123,24 @@ fit_seromodel <- function(serodata, Npos = serodata$counts, Ntotal = serodata$total, Age = serodata$age_mean_f, - Ymax = max(exposure_ages), + Ymax = max(cohort_ages$age), AgeExpoMatrix = exposure_matrix, NDecades = decades ) n_warmup <- floor(n_iters / 2) - if (foi_model == "tv_normal_log") { f_init <- function() { - list(log_foi = rep(-3, length(exposure_ages))) - } - lower_quantile = 0.1 - upper_quantile = 0.9 - medianv_quantile = 0.5 + list(log_foi = rep(-3, nrow(cohort_ages))) + } } - else { f_init <- function() { - list(foi = rep(0.01, length(exposure_ages))) + list(foi = rep(0.01, nrow(cohort_ages))) } - lower_quantile = 0.05 - upper_quantile = 0.95 - medianv_quantile = 0.5 - } - fit <- rstan::sampling( + seromodel_fit <- rstan::sampling( model, data = stan_data, iter = n_iters, @@ -182,104 +156,51 @@ fit_seromodel <- function(serodata, chain_id = 0 # https://github.com/stan-dev/rstan/issues/761#issuecomment-647029649 ) - if (class(fit@sim$samples) != "NULL") { - loo_fit <- loo::loo(fit, save_psis = TRUE, "logLikelihood") - foi <- rstan::extract(fit, "foi", inc_warmup = FALSE)[[1]] - # foi <- rstan::extract(fit, "foi", inc_warmup = TRUE, permuted=FALSE)[[1]] - # generates central estimations - foi_cent_est <- data.frame( - year = exposure_years, - lower = apply(foi, 2, function(x) quantile(x, lower_quantile)), - - upper = apply(foi, 2, function(x) quantile(x, upper_quantile)), - - medianv = apply(foi, 2, function(x) quantile(x, medianv_quantile)) - ) - - - # generates a sample of iterations - if (n_iters >= 2000) { - foi_post_s <- dplyr::sample_n(as.data.frame(foi), size = 1000) - colnames(foi_post_s) <- exposure_years - } else { - foi_post_s <- as.data.frame(foi) - colnames(foi_post_s) <- exposure_years - } - - seromodel_object <- list( - fit = fit, - serodata = serodata, - stan_data = stan_data, - exposure_years = exposure_years, - exposure_ages = exposure_ages, - n_iters = n_iters, - n_thin = n_thin, - n_warmup = n_warmup, - foi_model = foi_model, - delta = delta, - m_treed = m_treed, - loo_fit = loo_fit, - foi_cent_est = foi_cent_est, - foi_post_s = foi_post_s - ) - seromodel_object$model_summary <- - extract_seromodel_summary(seromodel_object) + if (seromodel_fit@mode == 0) { + seromodel_object <- seromodel_fit + return(seromodel_object) } else { - loo_fit <- c(-1e10, 0) - seromodel_object <- list( - fit = "no model", - serodata = serodata, - stan_data = stan_data, - exposure_years = exposure_years, - exposure_ages = exposure_ages, - n_iters = n_iters, - n_thin = n_thin, - n_warmup = n_warmup, - model = foi_model, - delta = delta, - m_treed = m_treed, - loo_fit = loo_fit, - model_summary = NA - ) + # This may happen for invalid inputs in rstan::sampling() (e.g. thin > iter) + seromodel_object <- "no model" + return(seromodel_object) } - - return(seromodel_object) } -#' Function that generates an atomic vector containing the corresponding exposition years of a serological survey +#' Function that generates a data.frame containing the age of each cohort corresponding to each birth year exluding the year of the survey. #' -#' This function generates an atomic vector containing the exposition years corresponding to the specified serological survey data \code{serodata}. -#' The exposition years to the disease for each individual corresponds to the time from birth to the moment of the survey. -#' @param serodata A data frame containing the data from a seroprevalence survey. This data frame must contain the year of birth for each individual (birth_year) and the time of the survey (tsur). birth_year can be constructed by means of the \link{prepare_serodata} function. -#' @return \code{exposure_ages}. An atomic vector with the numeration of the exposition years in serodata +#' This function generates a data.frame containing the age of each cohort corresponding to each \code{birth_year} excluding the year of the survey, +#' for which the cohort age is still 0. +#' specified serological survey data \code{serodata} excluding the year of the survey. +#' @inheritParams run_seromodel +#' @return \code{cohort_ages}. A data.frame containing the age of each cohort corresponding to each birth year #' @examples -#' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata = serodata, alpha = 0.05) -#' exposure_ages <- get_exposure_ages(serodata) -#' } +#' data(chagas2012) +#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) +#' cohort_ages <- get_cohort_ages(serodata = serodata) #' @export -get_exposure_ages <- function(serodata) { - return(seq_along(min(serodata$birth_year):(serodata$tsur[1] - 1))) +get_cohort_ages <- function(serodata) { + birth_year <- (min(serodata$birth_year):serodata$tsur[1]) + age <- (seq_along(min(serodata$birth_year):(serodata$tsur[1] - 1))) + + cohort_ages <- data.frame(birth_year = birth_year[-length(birth_year)], age = rev(age)) + return(cohort_ages) } # TODO Is necessary to explain better what we mean by the exposure matrix. #' Function that generates the exposure matrix corresponding to a serological survey #' -#' @param serodata A data frame containing the data from a seroprevalence survey. This data frame must contain the year of birth for each individual (birth_year) and the time of the survey (tsur). birth_year can be constructed by means of the \link{prepare_serodata} function. +#' @inheritParams run_seromodel #' @return \code{exposure_output}. An atomic matrix containing the expositions for each entry of \code{serodata} by year. #' @examples -#' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata = serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(serodata = chagas2012) #' exposure_matrix <- get_exposure_matrix(serodata = serodata) -#' } #' @export get_exposure_matrix <- function(serodata) { age_class <- serodata$age_mean_f - exposure_ages <- get_exposure_ages(serodata) - ly <- length(exposure_ages) + cohort_ages <- get_cohort_ages(serodata = serodata) + ly <- nrow(cohort_ages) exposure <- matrix(0, nrow = length(age_class), ncol = ly) for (k in 1:length(age_class)) exposure[k, (ly - age_class[k] + 1):ly] <- 1 @@ -287,6 +208,47 @@ get_exposure_matrix <- function(serodata) { return(exposure_output) } +#' Function that generates the central estimates for the fitted forced FoI +#' +#' @param seromodel_object Stanfit object containing the results of fitting a model by means of \link{run_seromodel}. +#' @param cohort_ages A data.frame containing the age of each cohort corresponding to each birth year. +#' @return \code{foi_central_estimates}. Central estimates for the fitted forced FoI +#' @examples +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- fit_seromodel(serodata = serodata, +#' foi_model = "constant") +#' cohort_ages <- get_cohort_ages(serodata = serodata) +#' foi_central_estimates <- get_foi_central_estimates(seromodel_object = seromodel_object, +#' cohort_ages = cohort_ages) +#' @export +get_foi_central_estimates <- function(seromodel_object, + cohort_ages) { + + if (seromodel_object@model_name == "tv_normal_log") { + lower_quantile = 0.1 + upper_quantile = 0.9 + medianv_quantile = 0.5 + } + else { + lower_quantile = 0.05 + upper_quantile = 0.95 + medianv_quantile = 0.5 + } + # extracts foi from stan fit + foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] + + # generates central estimations + foi_central_estimates <- data.frame( + year = cohort_ages$birth_year, + lower = apply(foi, 2, function(x) quantile(x, lower_quantile)), + + upper = apply(foi, 2, function(x) quantile(x, upper_quantile)), + + medianv = apply(foi, 2, function(x) quantile(x, medianv_quantile)) + ) + return(foi_central_estimates) +} #' Method to extact a summary of the specified serological model object #' @@ -294,8 +256,8 @@ get_exposure_matrix <- function(serodata) { #' survey data used to fit the model, such as the year when the survey took place, the type of test taken and the corresponding antibody, #' as well as information about the convergence of the model, like the expected log pointwise predictive density \code{elpd} and its #' corresponding standar deviation. -#' @param seromodel_object \code{seromodel_object}. An object containing relevant information about the implementation of the model. -#' Refer to \link{fit_seromodel} for further details. +#' @inheritParams get_foi_central_estimates +#' @inheritParams run_seromodel #' @return \code{model_summary}. Object with a summary of \code{seromodel_object} containing the following: #' \tabular{ll}{ #' \code{foi_model} \tab Name of the selected model. \cr \tab \cr @@ -312,41 +274,42 @@ get_exposure_matrix <- function(serodata) { #' \code{converged} \tab convergence \cr \tab \cr #' } #' @examples -#' \dontrun{ -#' data("serodata") -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel(serodata = serodata, #' foi_model = "constant") -#' extract_seromodel_summary(seromodel_object) -#' } +#' extract_seromodel_summary(seromodel_object, +#' serodata = serodata) #' @export -extract_seromodel_summary <- function(seromodel_object) { - foi_model <- seromodel_object$foi_model - serodata <- seromodel_object$serodata +extract_seromodel_summary <- function(seromodel_object, + serodata) { #------- Loo estimates - - loo_fit <- seromodel_object$loo_fit + # The argument parameter_name refers to the name given to the Log-likelihood in the stan models. + # See loo::extract_log_lik() documentation for further details + loo_fit <- loo::loo(seromodel_object, save_psis = FALSE, pars = c(parameter_name = "logLikelihood")) if (sum(is.na(loo_fit)) < 1) { lll <- as.numeric((round(loo_fit$estimates[1, ], 2))) } else { lll <- c(-1e10, 0) } + #------- model_summary <- data.frame( - foi_model = foi_model, - dataset = serodata$survey[1], - country = serodata$country[1], - year = serodata$tsur[1], - test = serodata$test[1], - antibody = serodata$antibody[1], + foi_model = seromodel_object@model_name, + dataset = unique(serodata$survey), + country = unique(serodata$country), + year = unique(serodata$tsur), + test = unique(serodata$test), + antibody = unique(serodata$antibody), n_sample = sum(serodata$total), n_agec = length(serodata$age_mean_f), - n_iter = seromodel_object$n_iters, + n_iter = seromodel_object@sim$iter, elpd = lll[1], se = lll[2], converged = NA ) - - rhats <- get_table_rhats(seromodel_object) + cohort_ages <- get_cohort_ages(serodata = serodata) + rhats <- get_table_rhats(seromodel_object = seromodel_object, + cohort_ages = cohort_ages) if (any(rhats$rhat > 1.1) == FALSE) { model_summary$converged <- "Yes" } @@ -360,18 +323,17 @@ extract_seromodel_summary <- function(seromodel_object) { #' #' This function computes the corresponding binomial confidence intervals for the obtained prevalence based on a fitting #' of the Force-of-Infection \code{foi} for plotting an analysis purposes. -#' @param foi Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$fit, "foi", inc_warmup = FALSE)[[1]]}. -#' @param serodata A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}. +#' @param foi Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}. +#' @inheritParams run_seromodel #' @param bin_data TBD #' @return \code{prev_final}. The expanded prevalence data. This is used for plotting purposes in the \code{visualization} module. #' @examples -#' \dontrun{ -#' serodata <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel(serodata = serodata, -#' foi_model = "constant") -#' foi <- rstan::extract(seromodel_object$fit, "foi")[[1]] -#' get_prev_expanded <- function(foi, serodata) -#' } +#' foi_model = "constant") +#' foi <- rstan::extract(seromodel_object, "foi")[[1]] +#' get_prev_expanded(foi, serodata) #' @export get_prev_expanded <- function(foi, serodata, diff --git a/R/serodata.R b/R/serodata.R deleted file mode 100644 index 1404ed4f..00000000 --- a/R/serodata.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage serodata -#' -#' @format An object of class \code{"cross"}; see \code{\link[qtl]{read.cross}}. -#' -#' @keywords datasets -#' -#' @examples -#' serodata -"serodata" \ No newline at end of file diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R index f4168cfe..933e8f27 100644 --- a/R/seroprevalence_data.R +++ b/R/seroprevalence_data.R @@ -18,7 +18,6 @@ #' \code{antibody} \tab antibody \cr \tab \cr #' } #' @param alpha probability of a type I error. For further details refer to \link[Hmisc]{binconf}. -#' @param add_age_mean_f TBD #' @return serodata with additional columns necessary for the analysis. These columns are: #' \tabular{ll}{ #' \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr @@ -29,19 +28,30 @@ #' \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr #' } #' @examples -#'\dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) -#' } +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' @export prepare_serodata <- function(serodata = serodata, - alpha = 0.05, - add_age_mean_f = TRUE) { - if(add_age_mean_f){ + alpha = 0.05) { + checkmate::assert_numeric(alpha, lower = 0, upper = 1) + #Check that serodata has the right columns + stopifnot("serodata must contain the right columns" = + all(c("survey", "total", "counts", "age_min", "age_max", "tsur", + "country","test","antibody" + ) %in% + colnames(serodata) + ) + ) + if(!any(colnames(serodata) == "age_mean_f")){ + serodata <- serodata %>% + dplyr::mutate(age_mean_f = floor((age_min + age_max) / 2), sample_size = sum(total)) + } + + if(!any(colnames(serodata) == "birth_year")){ serodata <- serodata %>% - dplyr::mutate(age_mean_f = floor((age_min + age_max) / 2), sample_size = sum(total)) %>% dplyr::mutate(birth_year = .data$tsur - .data$age_mean_f) } + serodata <- serodata %>% cbind( Hmisc::binconf( @@ -68,33 +78,18 @@ prepare_serodata <- function(serodata = serodata, #' #' This function prepapares a given pre-processed serological dataset (see \code{\link{prepare_serodata}}) to plot the binomial confidence intervals #' of its corresponding seroprevalence grouped by age group. -#' @param serodata A data frame containing the data from a seroprevalence survey. For more information see the function \link{run_seromodel}. -#' This data frame must contain the following columns: -#' \tabular{ll}{ -#' \code{survey} \tab survey Label of the current survey \cr \tab \cr -#' \code{total} \tab Number of samples for each age group\cr \tab \cr -#' \code{counts} \tab Number of positive samples for each age group\cr \tab \cr -#' \code{age_min} \tab age_min \cr \tab \cr -#' \code{age_max} \tab age_max \cr \tab \cr -#' \code{tsur} \tab Year in which the survey took place \cr \tab \cr -#' \code{country} \tab The country where the survey took place \cr \tab \cr -#' \code{test} \tab The type of test taken \cr \tab \cr -#' \code{antibody} \tab antibody \cr \tab \cr -#' \code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr -#' \code{sample_size} \tab The size of the sample \cr \tab \cr -#' \code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr -#' \code{prev_obs} \tab Observed prevalence \cr \tab \cr -#' \code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr -#' \code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr -#' } -#' The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}. +#' @inheritParams run_seromodel #' @return data set with the binomial confidence intervals #' @examples -#'\dontrun{ -#' prepare_bin_data (serodata) -#' } +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' prepare_bin_data(serodata) #' @export prepare_bin_data <- function(serodata) { + if(!any(colnames(serodata) == "age_mean_f")){ + serodata <- serodata %>% + dplyr::mutate(age_mean_f = floor((age_min + age_max) / 2), sample_size = sum(total)) + } serodata$cut_ages <- cut(as.numeric(serodata$age_mean_f), seq(1, 101, by = 5), diff --git a/R/visualisation.R b/R/visualisation.R index 26f7fd81..6c26051f 100644 --- a/R/visualisation.R +++ b/R/visualisation.R @@ -1,30 +1,12 @@ #' Function that generates the sero-positivity plot from a raw serological survey dataset #' -#' @param serodata A data frame containing the data from a seroprevalence survey. -#' This data frame must contain the following columns: -#' \tabular{ll}{ -#' \code{survey} \tab survey Label of the current survey \cr \tab \cr -#' \code{total} \tab Number of samples for each age group\cr \tab \cr -#' \code{counts} \tab Number of positive samples for each age group\cr \tab \cr -#' \code{age_min} \tab age_min \cr \tab \cr -#' \code{age_max} \tab age_max \cr \tab \cr -#' \code{tsur} \tab Year in which the survey took place \cr \tab \cr -#' \code{country} \tab The country where the survey took place \cr \tab \cr -#' \code{test} \tab The type of test taken \cr \tab \cr -#' \code{antibody} \tab antibody \cr \tab \cr -#' } +#' @inheritParams prepare_serodata #' @param size_text Text size use in the theme of the graph returned by the function. #' @return A ggplot object containing the seropositivity-vs-age graph of the raw data of a given seroprevalence survey with its corresponging binomial confidence interval. #' @examples -#' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, -#' foi_model = "constant", -#' n_iters = 1000 -#') -#' plot_seroprev(seromodel_object, size_text = 15) -#' } +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' plot_seroprev(serodata, size_text = 15) #' @export plot_seroprev <- function(serodata, size_text = 6) { @@ -47,27 +29,29 @@ plot_seroprev <- function(serodata, #' This function generates a seropositivity plot of the specified serological model object. This includes the original data grouped by age #' as well as the obtained fitting from the model implementation. Age is located on the x axis and seropositivity on the y axis with its #' corresponding confidence interval. -#' @param seromodel_object Object containing the results of fitting a model by means of \link{run_seromodel}. +#' @inheritParams get_foi_central_estimates +#' @inheritParams run_seromodel #' @param size_text Text size of the graph returned by the function. #' @return A ggplot object containing the seropositivity-vs-age graph including the data, the fitted model and their corresponding confindence intervals. #' @examples -#' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel(serodata = data_test, +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel(serodata = serodata, #' foi_model = "constant", #' n_iters = 1000) -#' plot_seroprev_fitted(seromodel_object, size_text = 15) -#' } +#' plot_seroprev_fitted(seromodel_object, +#' serodata = serodata, +#' size_text = 15) #' @export plot_seroprev_fitted <- function(seromodel_object, + serodata, size_text = 6) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL" ) { + if (is.character(seromodel_object) == FALSE) { + if (class(seromodel_object@sim$samples) != "NULL" ) { - foi <- rstan::extract(seromodel_object$fit, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = seromodel_object$serodata, bin_data = TRUE) + foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] + prev_expanded <- get_prev_expanded(foi, serodata = serodata, bin_data = TRUE) prev_plot <- ggplot2::ggplot(prev_expanded) + ggplot2::geom_ribbon( @@ -126,34 +110,38 @@ plot_seroprev_fitted <- function(seromodel_object, #' This function generates a Force-of-Infection plot from the results obtained by fitting a serological model. #' This includes the corresponding binomial confidence interval. #' The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection. -#' @param seromodel_object Object containing the results of fitting a model by means of \link{run_seromodel}. +#' @inheritParams get_foi_central_estimates #' @param size_text Text size use in the theme of the graph returned by the function. #' @param max_lambda TBD #' @param foi_sim TBD #' @return A ggplot2 object containing the Force-of-infection-vs-time including the corresponding confidence interval. #' @examples -#' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel( +#' serodata = serodata, #' foi_model = "constant", #' n_iters = 1000 -#' ) -#' plot_foi(seromodel_object, size_text = 15) -#' } +#' ) +#' cohort_ages <- get_cohort_ages(serodata) +#' plot_foi(seromodel_object = seromodel_object, +#' cohort_ages = cohort_ages, +#' size_text = 15) #' @export plot_foi <- function(seromodel_object, + cohort_ages, max_lambda = NA, size_text = 25, foi_sim = NULL) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { - foi <- rstan::extract(seromodel_object$fit, + if (is.character(seromodel_object) == FALSE) { + if (class(seromodel_object@sim$samples) != "NULL") { + foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] #-------- This bit is to get the actual length of the foi data - foi_data <- seromodel_object$foi_cent_est + foi_data <- get_foi_central_estimates(seromodel_object = seromodel_object, + cohort_ages = cohort_ages) #-------- foi_data$medianv[1] <- NA @@ -223,27 +211,29 @@ plot_foi <- function(seromodel_object, #' This function generates a plot of the R-hat estimates obtained for a specified fitted serological model \code{seromodel_object}. #' The x axis corresponds to the decades covered by the survey and the y axis to the value of the rhats. #' All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}). -#' @param seromodel_object Object containing the results of fitting a model by means of \link{run_seromodel}. +#' @inheritParams get_foi_central_estimates #' @param size_text Text size use in the theme of the graph returned by the function. #' @return The rhats-convergence plot of the selected model. #' @examples -#' \dontrun{ -#' data("serodata") -#' data_test <- prepare_serodata(serodata) +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) #' seromodel_object <- run_seromodel( -#' serodata = data_test, +#' serodata = serodata, #' foi_model = "constant", #' n_iters = 1000 -#') -#' plot_rhats(seromodel_object, +#' ) +#' cohort_ages <- get_cohort_ages(serodata = serodata) +#' plot_rhats(seromodel_object, +#' cohort_ages = cohort_ages, #' size_text = 15) -#' } #' @export plot_rhats <- function(seromodel_object, + cohort_ages, size_text = 25) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { - rhats <- get_table_rhats(seromodel_object) + if (is.character(seromodel_object) == FALSE) { + if (class(seromodel_object@sim$samples) != "NULL") { + rhats <- get_table_rhats(seromodel_object = seromodel_object, + cohort_ages = cohort_ages) rhats_plot <- ggplot2::ggplot(rhats, ggplot2::aes(year, rhat)) + @@ -287,43 +277,52 @@ plot_rhats <- function(seromodel_object, #' Function that generates a vertical arrange of plots showing a summary of a model, the estimated seroprevalence, #' the Force-of-Infection fit and the R-hat estimates plots. #' -#' @param seromodel_object Object containing the results of fitting a model by means of \link{run_seromodel}. +#' @inheritParams get_foi_central_estimates +#' @inheritParams run_seromodel #' @param size_text Text size use in the theme of the graph returned by the function. #' @param max_lambda TBD #' @param foi_sim TBD #' @return A ggplot object with a vertical arrange containing the seropositivity, force of infection, and convergence plots. #' @examples -#' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, -#' foi_model = "constant", -#' n_iters = 1000 -#') -#' plot_seromodel(seromodel_object, size_text = 15) -#' } +#' data(chagas2012) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel( +#' serodata = serodata, +#' foi_model = "constant", +#' n_iters = 1000 +#' ) +#' plot_seromodel(seromodel_object, +#' serodata = serodata, +#' size_text = 15) #' @export plot_seromodel <- function(seromodel_object, + serodata, max_lambda = NA, size_text = 25, foi_sim = NULL) { - if (is.character(seromodel_object$fit) == FALSE) { - if (class(seromodel_object$fit@sim$samples) != "NULL") { + if (is.character(seromodel_object) == FALSE) { + if (class(seromodel_object@sim$samples) != "NULL") { + cohort_ages <- get_cohort_ages(serodata = serodata) + prev_plot <- plot_seroprev_fitted(seromodel_object = seromodel_object, - size_text = size_text) + serodata = serodata, + size_text = size_text) foi_plot <- plot_foi( seromodel_object = seromodel_object, + cohort_ages = cohort_ages, max_lambda = max_lambda, size_text = size_text, foi_sim = foi_sim ) rhats_plot <- plot_rhats(seromodel_object = seromodel_object, + cohort_ages = cohort_ages, size_text = size_text) - + model_summary <- extract_seromodel_summary(seromodel_object = seromodel_object, + serodata = serodata) summary_table <- t( - dplyr::select(seromodel_object$model_summary, + dplyr::select(model_summary, c('foi_model', 'dataset', 'elpd', 'se', 'converged'))) summary_plot <- plot_info_table(summary_table, size_text = size_text) @@ -359,7 +358,8 @@ plot_seromodel <- function(seromodel_object, ggplot2::ylab(" ") + ggplot2::xlab(" ") g1 <- g0 - g0 <- g0 + ggplot2::labs(subtitle = seromodel_object$model) + + # TODO: This + g0 <- g0 + ggplot2::labs(subtitle = seromodel_object$model_name) + ggplot2::theme(plot.title = ggplot2::element_text(size = 10)) plot_arrange <- @@ -377,16 +377,16 @@ plot_seromodel <- function(seromodel_object, #' @param size_text Text size of the graph returned by the function #' @return p the plot for the given table #' @examples -#' \dontrun{ -#' data_test <- prepare_serodata(serodata) -#' seromodel_object <- run_seromodel( -#' serodata = data_test, -#' foi_model = "constant", -#' n_iters = 1000 -#') -#' info = t(seromodel_object$model_summary) +#' serodata <- prepare_serodata(chagas2012) +#' seromodel_object <- run_seromodel( +#' serodata = serodata, +#' foi_model = "constant", +#' n_iters = 1000 +#' ) +#' seromodel_summary <- extract_seromodel_summary(seromodel_object = seromodel_object, +#' serodata = serodata) +#' info = t(seromodel_summary) #' plot_info_table (info, size_text = 15) -#' } #' @export plot_info_table <- function(info, size_text) { dato <- data.frame(y = NROW(info):seq_len(1), diff --git a/README.Rmd b/README.Rmd index 964b4b24..7a5dbb34 100644 --- a/README.Rmd +++ b/README.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( ) ``` -## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data +## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data @@ -47,22 +47,20 @@ remotes::install_github("epiverse-trace/serofoi") ```{r cleaning, include = FALSE, echo = TRUE} library(serofoi) -rownames(serodata) <- NULL - ``` ***serofoi*** provides a minimal serosurvey dataset, `serodata`, that can be used to test out the package. ```{r ex, include = TRUE} -# Load example serodata data included with the package -data("serodata") -head(serodata, 5) +# Load example dataset chagas2012 included with the package +data(chagas2012) +head(chagas2012, 5) ``` The function `prepare_serodata` will prepare the entry data for the use of the modelling module; this function computes the sample size, the years of birth and the binomial confidence interval for each age group in the provided dataset. A visualisation of the prepared seroprevalence data can be obtained using the function plot_seroprev: ```{r data_test, include = TRUE, out.fig.height="30%", out.width="50%", fig.align="center", message=FALSE} -serodata_test <- prepare_serodata(serodata) +serodata_test <- prepare_serodata(chagas2012) plot_seroprev(serodata_test, size_text = 15) ``` diff --git a/README.md b/README.md index c1ecfcca..3de55fec 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data +## *serofoi*: force-of-infection from population based serosurveys with age-disagregated data @@ -48,9 +48,9 @@ remotes::install_github("epiverse-trace/serofoi") can be used to test out the package. ``` r -# Load example serodata data included with the package -data("serodata") -head(serodata, 5) +# Load example dataset chagas2012 included with the package +data(chagas2012) +head(chagas2012, 5) #> survey total counts age_min age_max tsur country test antibody #> 1 COL-035-93 34 0 1 1 2012 COL ELISA IgG anti-T.cruzi #> 2 COL-035-93 25 0 2 2 2012 COL ELISA IgG anti-T.cruzi @@ -66,7 +66,7 @@ in the provided dataset. A visualisation of the prepared seroprevalence data can be obtained using the function plot_seroprev: ``` r -serodata_test <- prepare_serodata(serodata) +serodata_test <- prepare_serodata(chagas2012) plot_seroprev(serodata_test, size_text = 15) ``` @@ -97,10 +97,10 @@ More details on how to use ***serofoi*** can be found in the [online documentation](https://epiverse-trace.github.io/serofoi/) as package vignettes, under [**Get Started**](https://epiverse-trace.github.io/serofoi/articles/serofoi.html), -[**FoI +[**An Introduction to FoI Models**](https://epiverse-trace.github.io/serofoi/articles/foi_models.html) -and [**Use -Cases**](https://epiverse-trace.github.io/serofoi/articles/use_cases.html) +and [**Real-life Use Cases for +serofoi**](https://epiverse-trace.github.io/serofoi/articles/use_cases.html) ## Help diff --git a/data/serodata.RData b/data/serodata.RData deleted file mode 100644 index f9570b14..00000000 Binary files a/data/serodata.RData and /dev/null differ diff --git a/man/extract_seromodel_summary.Rd b/man/extract_seromodel_summary.Rd index cd037799..33f807f6 100644 --- a/man/extract_seromodel_summary.Rd +++ b/man/extract_seromodel_summary.Rd @@ -4,11 +4,31 @@ \alias{extract_seromodel_summary} \title{Method to extact a summary of the specified serological model object} \usage{ -extract_seromodel_summary(seromodel_object) +extract_seromodel_summary(seromodel_object, serodata) } \arguments{ -\item{seromodel_object}{\code{seromodel_object}. An object containing relevant information about the implementation of the model. -Refer to \link{fit_seromodel} for further details.} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} } \value{ \code{model_summary}. Object with a summary of \code{seromodel_object} containing the following: @@ -34,11 +54,10 @@ as well as information about the convergence of the model, like the expected log corresponding standar deviation. } \examples{ -\dontrun{ -data("serodata") -serodata <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) seromodel_object <- run_seromodel(serodata = serodata, foi_model = "constant") -extract_seromodel_summary(seromodel_object) -} +extract_seromodel_summary(seromodel_object, + serodata = serodata) } diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 00000000..309d3f0f Binary files /dev/null and b/man/figures/logo.png differ diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index 67908578..9f17a451 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -6,7 +6,7 @@ \usage{ fit_seromodel( serodata, - foi_model, + foi_model = c("constant", "tv_normal_log", "tv_normal"), n_iters = 1000, n_thin = 2, delta = 0.9, @@ -15,7 +15,26 @@ fit_seromodel( ) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}.} +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} \item{foi_model}{Name of the selected model. Current version provides three options: \describe{ @@ -37,25 +56,7 @@ For further details refer to the \code{control} parameter in \link[rstan]{sampli \item{decades}{Number of decades covered by the survey data.} } \value{ -\code{seromodel_object}. An object containing relevant information about the implementation of the model. It contains the following: -\tabular{ll}{ -\code{fit} \tab \code{stanfit} object returned by the function \link[rstan]{sampling} \cr \tab \cr -\code{serodata} \tab A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}.\cr \tab \cr -\code{stan_data} \tab List containing \code{Nobs}, \code{Npos}, \code{Ntotal}, \code{Age}, \code{Ymax}, \code{AgeExpoMatrix} and \code{NDecades}. -This object is used as an input for the \link[rstan]{sampling} function \cr \tab \cr -\code{exposure_years} \tab Integer atomic vector containing the actual exposure years (1946, ..., 2007 e.g.) \cr \tab \cr -\code{exposure_ages} \tab Integer atomic vector containing the numeration of the exposure ages. \cr \tab \cr -\code{n_iters} \tab Number of interations for eah chain including the warmup. \cr \tab \cr -\code{n_thin} \tab Positive integer specifying the period for saving samples. \cr \tab \cr -\code{n_warmup} \tab Number of warm up iterations. Set by default as n_iters/2. \cr \tab \cr -\code{foi_model} \tab The name of the model\cr \tab \cr -\code{delta} \tab Real number between 0 and 1 that represents the target average acceptance probability. \cr \tab \cr -\code{m_treed} \tab Maximum tree depth for the binary tree used in the NUTS stan sampler. \cr \tab \cr -\code{loo_fit} \tab Efficient approximate leave-one-out cross-validation. Refer to \link[loo]{loo} for further details. \cr \tab \cr -\code{foi_cent_est} \tab A data fram e containing \code{year} (corresponding to \code{exposure_years}), \code{lower}, \code{upper}, and \code{medianv} \cr \tab \cr -\code{foi_post_s} \tab Sample n rows from a table. Refer to \link[dplyr]{sample_n} for further details. \cr \tab \cr -\code{model_summary} \tab A data fram containing the summary of the model. Refer to \link{extract_seromodel_summary} for further details. \cr \tab \cr -} +\code{seromodel_object}. \code{stanfit} object returned by the function \link[rstan]{sampling} } \description{ This function fits the specified model \code{foi_model} to the serological survey data \code{serodata} @@ -63,11 +64,9 @@ by means of the \link[rstan]{sampling} method. The function determines whether t object needs to be compiled by rstan. } \examples{ -\dontrun{ -data("serodata") -serodata <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) seromodel_fit <- fit_seromodel(serodata = serodata, foi_model = "constant") -} } diff --git a/man/get_cohort_ages.Rd b/man/get_cohort_ages.Rd new file mode 100644 index 00000000..e584d314 --- /dev/null +++ b/man/get_cohort_ages.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/modelling.R +\name{get_cohort_ages} +\alias{get_cohort_ages} +\title{Function that generates a data.frame containing the age of each cohort corresponding to each birth year exluding the year of the survey.} +\usage{ +get_cohort_ages(serodata) +} +\arguments{ +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} +} +\value{ +\code{cohort_ages}. A data.frame containing the age of each cohort corresponding to each birth year +} +\description{ +This function generates a data.frame containing the age of each cohort corresponding to each \code{birth_year} excluding the year of the survey, +for which the cohort age is still 0. +specified serological survey data \code{serodata} excluding the year of the survey. +} +\examples{ +data(chagas2012) +serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) +cohort_ages <- get_cohort_ages(serodata = serodata) +} diff --git a/man/get_exposure_ages.Rd b/man/get_exposure_ages.Rd deleted file mode 100644 index aec51f06..00000000 --- a/man/get_exposure_ages.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_exposure_ages} -\alias{get_exposure_ages} -\title{Function that generates an atomic vector containing the corresponding exposition years of a serological survey} -\usage{ -get_exposure_ages(serodata) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence survey. This data frame must contain the year of birth for each individual (birth_year) and the time of the survey (tsur). birth_year can be constructed by means of the \link{prepare_serodata} function.} -} -\value{ -\code{exposure_ages}. An atomic vector with the numeration of the exposition years in serodata -} -\description{ -This function generates an atomic vector containing the exposition years corresponding to the specified serological survey data \code{serodata}. -The exposition years to the disease for each individual corresponds to the time from birth to the moment of the survey. -} -\examples{ -\dontrun{ -data("serodata") -serodata <- prepare_serodata(serodata = serodata, alpha = 0.05) -exposure_ages <- get_exposure_ages(serodata) -} -} diff --git a/man/get_exposure_matrix.Rd b/man/get_exposure_matrix.Rd index 8b4f1abf..2fc4c9ba 100644 --- a/man/get_exposure_matrix.Rd +++ b/man/get_exposure_matrix.Rd @@ -7,7 +7,26 @@ get_exposure_matrix(serodata) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence survey. This data frame must contain the year of birth for each individual (birth_year) and the time of the survey (tsur). birth_year can be constructed by means of the \link{prepare_serodata} function.} +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} } \value{ \code{exposure_output}. An atomic matrix containing the expositions for each entry of \code{serodata} by year. @@ -16,9 +35,7 @@ get_exposure_matrix(serodata) Function that generates the exposure matrix corresponding to a serological survey } \examples{ -\dontrun{ -data("serodata") -serodata <- prepare_serodata(serodata = serodata) +data(chagas2012) +serodata <- prepare_serodata(serodata = chagas2012) exposure_matrix <- get_exposure_matrix(serodata = serodata) } -} diff --git a/man/get_foi_central_estimates.Rd b/man/get_foi_central_estimates.Rd new file mode 100644 index 00000000..84630313 --- /dev/null +++ b/man/get_foi_central_estimates.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/modelling.R +\name{get_foi_central_estimates} +\alias{get_foi_central_estimates} +\title{Function that generates the central estimates for the fitted forced FoI} +\usage{ +get_foi_central_estimates(seromodel_object, cohort_ages) +} +\arguments{ +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{cohort_ages}{A data.frame containing the age of each cohort corresponding to each birth year.} +} +\value{ +\code{foi_central_estimates}. Central estimates for the fitted forced FoI +} +\description{ +Function that generates the central estimates for the fitted forced FoI +} +\examples{ +data(chagas2012) +serodata <- prepare_serodata(chagas2012) +seromodel_object <- fit_seromodel(serodata = serodata, + foi_model = "constant") +cohort_ages <- get_cohort_ages(serodata = serodata) +foi_central_estimates <- get_foi_central_estimates(seromodel_object = seromodel_object, + cohort_ages = cohort_ages) +} diff --git a/man/get_prev_expanded.Rd b/man/get_prev_expanded.Rd index a95f4899..4ab509c7 100644 --- a/man/get_prev_expanded.Rd +++ b/man/get_prev_expanded.Rd @@ -8,9 +8,28 @@ Force-of-Infection fitting} get_prev_expanded(foi, serodata, bin_data = FALSE) } \arguments{ -\item{foi}{Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$fit, "foi", inc_warmup = FALSE)[[1]]}.} +\item{foi}{Object containing the information of the force of infection. It is obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}.} -\item{serodata}{A data frame containing the data from a seroprevalence survey. For further details refer to \link{run_seromodel}.} +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} \item{bin_data}{TBD} } @@ -22,11 +41,10 @@ This function computes the corresponding binomial confidence intervals for the o of the Force-of-Infection \code{foi} for plotting an analysis purposes. } \examples{ -\dontrun{ -serodata <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) seromodel_object <- run_seromodel(serodata = serodata, - foi_model = "constant") -foi <- rstan::extract(seromodel_object$fit, "foi")[[1]] -get_prev_expanded <- function(foi, serodata) -} + foi_model = "constant") +foi <- rstan::extract(seromodel_object, "foi")[[1]] +get_prev_expanded(foi, serodata) } diff --git a/man/get_table_rhats.Rd b/man/get_table_rhats.Rd index 4aac86a0..d3d22249 100644 --- a/man/get_table_rhats.Rd +++ b/man/get_table_rhats.Rd @@ -4,10 +4,12 @@ \alias{get_table_rhats} \title{Method for extracting a dataframe containing the R-hat estimates for a given serological model} \usage{ -get_table_rhats(seromodel_object) +get_table_rhats(seromodel_object, cohort_ages) } \arguments{ -\item{seromodel_object}{seromodel_object} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{cohort_ages}{A data.frame containing the age of each cohort corresponding to each birth year.} } \value{ rhats table @@ -17,12 +19,12 @@ This method relies in the function \link[bayesplot]{rhat} to extract the R-hat e \code{seromodel_object} and returns a table a dataframe with the estimates for each year of birth. } \examples{ -\dontrun{ -data("serodata") -data_test <- prepare_serodata(serodata = serodata) -model_constant <- run_seromodel(serodata = data_test, - foi_model = "constant", +data(chagas2012) +serodata <- prepare_serodata(serodata = chagas2012) +model_constant <- run_seromodel(serodata = serodata, + foi_model = "constant", n_iters = 1500) -get_table_rhats(model_object = model_constant) -} +cohort_ages <- get_cohort_ages(serodata) +get_table_rhats(seromodel_object = model_constant, + cohort_ages = cohort_ages) } diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd index 6e376c70..8dd29d7d 100644 --- a/man/plot_foi.Rd +++ b/man/plot_foi.Rd @@ -4,10 +4,18 @@ \alias{plot_foi} \title{Function that generates a Force-of-Infection plot corresponding to the specified fitted serological model} \usage{ -plot_foi(seromodel_object, max_lambda = NA, size_text = 25, foi_sim = NULL) +plot_foi( + seromodel_object, + cohort_ages, + max_lambda = NA, + size_text = 25, + foi_sim = NULL +) } \arguments{ -\item{seromodel_object}{Object containing the results of fitting a model by means of \link{run_seromodel}.} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{cohort_ages}{A data.frame containing the age of each cohort corresponding to each birth year.} \item{max_lambda}{TBD} @@ -24,13 +32,15 @@ This includes the corresponding binomial confidence interval. The x axis corresponds to the decades covered by the survey the y axis to the Force-of-Infection. } \examples{ -\dontrun{ - data_test <- prepare_serodata(serodata) - seromodel_object <- run_seromodel( - serodata = data_test, +data(chagas2012) +serodata <- prepare_serodata(chagas2012) +seromodel_object <- run_seromodel( + serodata = serodata, foi_model = "constant", n_iters = 1000 -) -plot_foi(seromodel_object, size_text = 15) -} + ) +cohort_ages <- get_cohort_ages(serodata) +plot_foi(seromodel_object = seromodel_object, + cohort_ages = cohort_ages, + size_text = 15) } diff --git a/man/plot_info_table.Rd b/man/plot_info_table.Rd index 72ddc003..a642fff0 100644 --- a/man/plot_info_table.Rd +++ b/man/plot_info_table.Rd @@ -18,14 +18,14 @@ p the plot for the given table Function that generates a plot for a given table } \examples{ -\dontrun{ - data_test <- prepare_serodata(serodata) - seromodel_object <- run_seromodel( - serodata = data_test, - foi_model = "constant", - n_iters = 1000 +serodata <- prepare_serodata(chagas2012) +seromodel_object <- run_seromodel( + serodata = serodata, + foi_model = "constant", + n_iters = 1000 ) -info = t(seromodel_object$model_summary) +seromodel_summary <- extract_seromodel_summary(seromodel_object = seromodel_object, + serodata = serodata) +info = t(seromodel_summary) plot_info_table (info, size_text = 15) } -} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index 02b73f00..eb206112 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -4,10 +4,12 @@ \alias{plot_rhats} \title{Function that generates a plot of the R-hat estimates of the specified fitted serological model} \usage{ -plot_rhats(seromodel_object, size_text = 25) +plot_rhats(seromodel_object, cohort_ages, size_text = 25) } \arguments{ -\item{seromodel_object}{Object containing the results of fitting a model by means of \link{run_seromodel}.} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{cohort_ages}{A data.frame containing the age of each cohort corresponding to each birth year.} \item{size_text}{Text size use in the theme of the graph returned by the function.} } @@ -20,15 +22,15 @@ The x axis corresponds to the decades covered by the survey and the y axis to th All rhats must be smaller than 1 to ensure convergence (for further details check \link[bayesplot]{rhat}). } \examples{ -\dontrun{ -data("serodata") -data_test <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) seromodel_object <- run_seromodel( - serodata = data_test, + serodata = serodata, foi_model = "constant", n_iters = 1000 -) -plot_rhats(seromodel_object, + ) +cohort_ages <- get_cohort_ages(serodata = serodata) +plot_rhats(seromodel_object, + cohort_ages = cohort_ages, size_text = 15) } -} diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 6a36f736..6aaa2c51 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -7,13 +7,35 @@ the Force-of-Infection fit and the R-hat estimates plots.} \usage{ plot_seromodel( seromodel_object, + serodata, max_lambda = NA, size_text = 25, foi_sim = NULL ) } \arguments{ -\item{seromodel_object}{Object containing the results of fitting a model by means of \link{run_seromodel}.} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} \item{max_lambda}{TBD} @@ -29,13 +51,14 @@ Function that generates a vertical arrange of plots showing a summary of a model the Force-of-Infection fit and the R-hat estimates plots. } \examples{ -\dontrun{ -data_test <- prepare_serodata(serodata) -seromodel_object <- run_seromodel( - serodata = data_test, - foi_model = "constant", - n_iters = 1000 -) -plot_seromodel(seromodel_object, size_text = 15) -} + data(chagas2012) + serodata <- prepare_serodata(chagas2012) + seromodel_object <- run_seromodel( + serodata = serodata, + foi_model = "constant", + n_iters = 1000 + ) +plot_seromodel(seromodel_object, + serodata = serodata, + size_text = 15) } diff --git a/man/plot_seroprev.Rd b/man/plot_seroprev.Rd index 8948decb..68044182 100644 --- a/man/plot_seroprev.Rd +++ b/man/plot_seroprev.Rd @@ -7,7 +7,7 @@ plot_seroprev(serodata, size_text = 6) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence survey. +\item{serodata}{A data frame containing the data from a serological survey. This data frame must contain the following columns: \tabular{ll}{ \code{survey} \tab survey Label of the current survey \cr \tab \cr @@ -30,13 +30,7 @@ A ggplot object containing the seropositivity-vs-age graph of the raw data of a Function that generates the sero-positivity plot from a raw serological survey dataset } \examples{ -\dontrun{ - data_test <- prepare_serodata(serodata) - seromodel_object <- run_seromodel( - serodata = data_test, - foi_model = "constant", - n_iters = 1000 -) -plot_seroprev(seromodel_object, size_text = 15) -} +data(chagas2012) +serodata <- prepare_serodata(chagas2012) +plot_seroprev(serodata, size_text = 15) } diff --git a/man/plot_seroprev_fitted.Rd b/man/plot_seroprev_fitted.Rd index 18740584..837765a5 100644 --- a/man/plot_seroprev_fitted.Rd +++ b/man/plot_seroprev_fitted.Rd @@ -4,10 +4,31 @@ \alias{plot_seroprev_fitted} \title{Function that generates a seropositivity plot corresponding to the specified fitted serological model} \usage{ -plot_seroprev_fitted(seromodel_object, size_text = 6) +plot_seroprev_fitted(seromodel_object, serodata, size_text = 6) } \arguments{ -\item{seromodel_object}{Object containing the results of fitting a model by means of \link{run_seromodel}.} +\item{seromodel_object}{Stanfit object containing the results of fitting a model by means of \link{run_seromodel}.} + +\item{serodata}{A data frame containing the data from a seroprevalence survey. +This data frame must contain the following columns: +\tabular{ll}{ +\code{survey} \tab survey Label of the current survey \cr \tab \cr +\code{total} \tab Number of samples for each age group\cr \tab \cr +\code{counts} \tab Number of positive samples for each age group\cr \tab \cr +\code{age_min} \tab age_min \cr \tab \cr +\code{age_max} \tab age_max \cr \tab \cr +\code{tsur} \tab Year in which the survey took place \cr \tab \cr +\code{country} \tab The country where the survey took place \cr \tab \cr +\code{test} \tab The type of test taken \cr \tab \cr +\code{antibody} \tab antibody \cr \tab \cr +\code{age_mean_f} \tab Floor value of the average between age_min and age_max \cr \tab \cr +\code{sample_size} \tab The size of the sample \cr \tab \cr +\code{birth_year} \tab The year in which the individuals of each age group were bornt \cr \tab \cr +\code{prev_obs} \tab Observed prevalence \cr \tab \cr +\code{prev_obs_lower} \tab Lower limit of the confidence interval for the observed prevalence \cr \tab \cr +\code{prev_obs_upper} \tab Upper limit of the confidence interval for the observed prevalence \cr \tab \cr +} +The last six colums can be added to \code{serodata} by means of the function \code{\link{prepare_serodata}}.} \item{size_text}{Text size of the graph returned by the function.} } @@ -20,12 +41,12 @@ as well as the obtained fitting from the model implementation. Age is located on corresponding confidence interval. } \examples{ -\dontrun{ -data("serodata") -data_test <- prepare_serodata(serodata) -seromodel_object <- run_seromodel(serodata = data_test, +data(chagas2012) +serodata <- prepare_serodata(chagas2012) +seromodel_object <- run_seromodel(serodata = serodata, foi_model = "constant", n_iters = 1000) -plot_seroprev_fitted(seromodel_object, size_text = 15) -} +plot_seroprev_fitted(seromodel_object, + serodata = serodata, + size_text = 15) } diff --git a/man/prepare_bin_data.Rd b/man/prepare_bin_data.Rd index a2d767a4..e7c8c1bd 100644 --- a/man/prepare_bin_data.Rd +++ b/man/prepare_bin_data.Rd @@ -8,7 +8,7 @@ age group} prepare_bin_data(serodata) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence survey. For more information see the function \link{run_seromodel}. +\item{serodata}{A data frame containing the data from a seroprevalence survey. This data frame must contain the following columns: \tabular{ll}{ \code{survey} \tab survey Label of the current survey \cr \tab \cr @@ -37,7 +37,7 @@ This function prepapares a given pre-processed serological dataset (see \code{\l of its corresponding seroprevalence grouped by age group. } \examples{ -\dontrun{ -prepare_bin_data (serodata) -} +data(chagas2012) +serodata <- prepare_serodata(chagas2012) +prepare_bin_data(serodata) } diff --git a/man/prepare_serodata.Rd b/man/prepare_serodata.Rd index 33b4a1de..8e3c316a 100644 --- a/man/prepare_serodata.Rd +++ b/man/prepare_serodata.Rd @@ -4,7 +4,7 @@ \alias{prepare_serodata} \title{Function that prepares the data from a serological survey for modelling} \usage{ -prepare_serodata(serodata = serodata, alpha = 0.05, add_age_mean_f = TRUE) +prepare_serodata(serodata = serodata, alpha = 0.05) } \arguments{ \item{serodata}{A data frame containing the data from a serological survey. @@ -22,8 +22,6 @@ This data frame must contain the following columns: }} \item{alpha}{probability of a type I error. For further details refer to \link[Hmisc]{binconf}.} - -\item{add_age_mean_f}{TBD} } \value{ serodata with additional columns necessary for the analysis. These columns are: @@ -40,8 +38,6 @@ serodata with additional columns necessary for the analysis. These columns are: This function adds the necessary additional variables to the given dataset \code{serodata} corresponding to a serological survey. } \examples{ -\dontrun{ -data("serodata") -data_test <- prepare_serodata(serodata) -} +data(chagas2012) +serodata <- prepare_serodata(chagas2012) } diff --git a/man/run_seromodel.Rd b/man/run_seromodel.Rd index becb135a..9d70340f 100644 --- a/man/run_seromodel.Rd +++ b/man/run_seromodel.Rd @@ -6,7 +6,7 @@ \usage{ run_seromodel( serodata, - foi_model = "constant", + foi_model = c("constant", "tv_normal_log", "tv_normal"), n_iters = 1000, n_thin = 2, delta = 0.9, @@ -66,9 +66,8 @@ This function runs the specified model for the Force-of-Infection \code{foi_mode \code{serodata} as the input data. See \link{fit_seromodel} for further details. } \examples{ -\dontrun{ -serodata <- prepare_serodata(serodata) +data(chagas2012) +serodata <- prepare_serodata(chagas2012) run_seromodel (serodata, - foi_model = "constant") -} + foi_model = "constant") } diff --git a/man/serodata.Rd b/man/serodata.Rd deleted file mode 100644 index a0644814..00000000 --- a/man/serodata.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/serodata.R -\docType{data} -\name{serodata} -\alias{serodata} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl]{read.cross}}. -} -\usage{ -serodata -} -\description{ -Data from a serological surveys -} -\examples{ -serodata -} -\keyword{datasets} diff --git a/tests/testthat/models_serialization.R b/tests/testthat/models_serialization.R index 6306fac0..0daa2b37 100644 --- a/tests/testthat/models_serialization.R +++ b/tests/testthat/models_serialization.R @@ -1,5 +1,4 @@ library(devtools) -library(dplyr) library(serofoi) library(testthat) @@ -7,7 +6,7 @@ set.seed(1234) # For reproducibility #----- Read and prepare data data("simdata_large_epi") -simdata <- simdata_large_epi %>% prepare_serodata() +simdata <- prepare_serodata(simdata_large_epi) no_transm <- 0.0000000001 big_outbreak <- 1.5 foi_sim <- c(rep(no_transm, 32), rep(big_outbreak, 3), rep(no_transm, 15)) # 1 epidemics @@ -23,11 +22,12 @@ models_list <- lapply(models_to_run, serodata = simdata, n_iters = 1000) -model_constant_json <- jsonlite::serializeJSON(models_list[[1]]) -write_json(model_constant_json, testthat::test_path("extdata", "model_constant.json")) +saveRDS(models_list[[1]], + testthat::test_path("extdata", "model_constant.RDS")) -model_tv_normal_json <- jsonlite::serializeJSON(models_list[[2]]) -write_json(model_tv_normal_json, testthat::test_path("extdata", "model_tv_normal.json")) +saveRDS(models_list[[2]], + testthat::test_path("extdata", "model_tv_normal.RDS")) + +saveRDS(models_list[[3]], + testthat::test_path("extdata", "model_tv_normal_log.RDS")) -model_tv_normal_log_json <- jsonlite::serializeJSON(models_list[[3]]) -write_json(model_tv_normal_json, testthat::test_path("extdata", "model_tv_normal_log.json")) diff --git a/tests/testthat/test_issue_47.R b/tests/testthat/test_issue_47.R index e46b2740..4e5e82c5 100644 --- a/tests/testthat/test_issue_47.R +++ b/tests/testthat/test_issue_47.R @@ -7,15 +7,17 @@ test_that("issue 47", { # Load data ## This dataset is already prepared - data_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS") - data_issue <- readRDS(data_path) + serodata_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS") + serodata <- readRDS(serodata_path) # Error reproduction - model_test <- run_seromodel(data_issue, foi_model = "tv_normal", print_summary = FALSE) - foi <- rstan::extract(model_test$fit, "foi", inc_warmup = FALSE)[[1]] - age_max <- max(data_issue$age_mean_f) - prev_expanded <- get_prev_expanded(foi, serodata = data_issue) + model_test <- run_seromodel(serodata = serodata, + foi_model = "tv_normal", + print_summary = FALSE) + foi <- rstan::extract(model_test, "foi", inc_warmup = FALSE)[[1]] + prev_expanded <- get_prev_expanded(foi, serodata = serodata) # Test + age_max <- max(serodata$age_mean_f) expect_length(prev_expanded$age, n = age_max) }) diff --git a/tests/testthat/test_modelling.R b/tests/testthat/test_modelling.R index 76494b19..e5a26974 100644 --- a/tests/testthat/test_modelling.R +++ b/tests/testthat/test_modelling.R @@ -2,17 +2,16 @@ test_that("individual models", { # So far we are skipping tests on these platforms until # we find an efficient way to update rstan testthat snapshots on all of them - skip_on_os(c("windows", "mac")) + # skip_on_os(c("windows", "mac")) source("testing_utils.R") set.seed(1234) # For reproducibility library(devtools) - library(dplyr) library(vdiffr) #----- Read and prepare data - data("serodata") - data_test <- serodata %>% prepare_serodata(alpha = 0.05) + data(chagas2012) + serodata <- prepare_serodata(chagas2012, alpha = 0.05) data_constant_path <- testthat::test_path("extdata", "prev_expanded_constant.RDS") data_tv_normal_path <- testthat::test_path("extdata", "prev_expanded_tv_normal.RDS") @@ -20,16 +19,20 @@ test_that("individual models", { prev_expanded_tv_normal_log <- readRDS(data_constant_path) + #----- Test for get_cohort_ages + cohort_ages <- get_cohort_ages(serodata = serodata) + expect_equal(nrow(cohort_ages), max(unique(serodata$tsur)) - min(serodata$birth_year)) + #----- Test for the constant model model_name <- "constant" - model_object <- run_seromodel(serodata = data_test, + model_object <- run_seromodel(serodata = serodata, foi_model = model_name, n_iters = 1000, print_summary = FALSE) - foi <- rstan::extract(model_object$fit, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = model_object$serodata) + foi <- rstan::extract(model_object, "foi", inc_warmup = FALSE)[[1]] + prev_expanded <- get_prev_expanded(foi, serodata = serodata) prev_expanded_constant <- readRDS(data_constant_path) testthat::expect_equal(prev_expanded, prev_expanded_constant, tolerance = TRUE) @@ -37,24 +40,24 @@ test_that("individual models", { #----- Test for the tv_normal model model_name <- "tv_normal" - model_object <- run_seromodel(serodata = data_test, + model_object <- run_seromodel(serodata = serodata, foi_model = model_name, n_iters = 1000) - foi <- rstan::extract(model_object$fit, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = model_object$serodata) + foi <- rstan::extract(model_object, "foi", inc_warmup = FALSE)[[1]] + prev_expanded <- get_prev_expanded(foi, serodata = serodata) prev_expanded_tv_normal <- readRDS(data_tv_normal_path) testthat::expect_equal(prev_expanded, prev_expanded_tv_normal, tolerance = TRUE) #----- Test for the tv_normal_log model model_name <- "tv_normal_log" - model_object <- run_seromodel(serodata = data_test, + model_object <- run_seromodel(serodata = serodata, foi_model = model_name, n_iters = 1000) - foi <- rstan::extract(model_object$fit, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = model_object$serodata) + foi <- rstan::extract(model_object, "foi", inc_warmup = FALSE)[[1]] + prev_expanded <- get_prev_expanded(foi, serodata = serodata) prev_expanded_tv_normal <- readRDS(data_tv_normal_path) testthat::expect_equal(prev_expanded, prev_expanded_tv_normal_log, tolerance = TRUE) diff --git a/tests/testthat/test_visualisation.R b/tests/testthat/test_visualisation.R deleted file mode 100644 index 52ef09fa..00000000 --- a/tests/testthat/test_visualisation.R +++ /dev/null @@ -1,146 +0,0 @@ -# Test for the function plot_seroprev_fitted - -library(testthat) - -test_that("individual models", { - # So far we are skipping tests on these platforms until - # we find an efficient way to update rstan testthat snapshots on all of them - skip_on_os(c("windows", "mac")) - source("testing_utils.R") - set.seed(1234) # For reproducibility - - library(devtools) - library(dplyr) - library(vdiffr) - library(jsonlite) - - data("simdata_large_epi") - simdata <- simdata_large_epi %>% prepare_serodata() - no_transm <- 0.0000000001 - big_outbreak <- 1.5 - foi_sim <- c(rep(no_transm, 32), rep(big_outbreak, 3), rep(no_transm, 15)) # 1 epidemics - - - #----- Results visualisation - size_text <- 6 - max_lambda <- 1.55 - - model_constant_json <- jsonlite::fromJSON(testthat::test_path("extdata", "model_constant.json")) - model_constant <- jsonlite::unserializeJSON(model_constant_json) - constant_plot <- plot_seromodel(model_constant, - size_text = size_text, - max_lambda = max_lambda, - foi_sim = foi_sim - ) - - model_tv_normal_json <- fromJSON(testthat::test_path("extdata", "model_tv_normal.json")) - model_tv_normal <- jsonlite::unserializeJSON(model_tv_normal_json) - tv_normal_plot <- plot_seromodel(model_tv_normal, - size_text = size_text, - max_lambda = max_lambda, - foi_sim = foi_sim - ) - - model_tv_normal_log_json <- fromJSON(testthat::test_path("extdata", "model_tv_normal_log.json")) - model_tv_normal_log <- jsonlite::unserializeJSON(model_tv_normal_log_json) - tv_normal_log_plot <- plot_seromodel(model_tv_normal_log, - size_text = size_text, - max_lambda = max_lambda, - foi_sim = foi_sim - ) - - plot_arrange <- cowplot::plot_grid(constant_plot, - tv_normal_plot, - tv_normal_log_plot, - ncol = 3, labels = "AUTO" - ) - vdiffr::expect_doppelganger("plot_arrange_simdata_foi", plot_arrange) -}) - - -# #Test for the function plot_rhats -# -# library(testthat) -# -# # Define a test context -# context("Testing plot_rhats function") -# -# # Create a mock seromodel_object -# mock_seromodel_object <- list(fit = "model did not run") -# -# # Define a test case for the else statement -# test_that("plot_rhats function works for else statement", { -# -# # Call the function with the mock seromodel_object -# rhats_plot <- plot_rhats(mock_seromodel_object) -# -# # Expect the output to be a ggplot object -# expect_is(rhats_plot, "ggplot") -# -# # Expect the plot to have a single point -# expect_equal(length(rhats_plot$layers[[1]]$data), 1) -# -# # Expect the plot to have a single label -# expect_equal(length(rhats_plot$layers[[2]]$data), 1) -# -# # Expect the label to be "errors" -# expect_equal(rhats_plot$layers[[2]]$label, "errors") -# }) -# -# -# #Test for the function plot_seromodel -# -# library(testthat) -# -# # Test for exception in else -# test_that("plot_seromodel prints an error message and returns an empty plot object when a model cannot be fitted", { -# # Create a seromodel object with fit as a feature -# seromodel_object <- list(fit = "no_fit", model = "my_model") -# # Run the function and check that it returns an empty plot object -# expect_silent(plot_seromodel(seromodel_object)) -# expect_equal(length(plot_seromodel(seromodel_object)$grobs), 5) -# }) -# -# # We create a helper function that returns an unwrapped seromodel object -# create_dummy_seromodel <- function() { -# # empty object -# seromodel <- list() -# seromodel$fit <- "dummy" -# seromodel$serodata <- data.frame(age = c(0, 10, 20, 30, 40, 50, 60), -# p_obs_bin = c(0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99), -# bin_size = c(5, 10, 20, 30, 40, 50, 60)) -# return(seromodel) -# } -# -# # Unit tests for plot_seroprev_fitted() -# test_that("plot_seroprev_fitted() returns an empty plot for an unfitted seromodel object", { -# # We create an unadjusted seromodel object -# seromodel <- create_dummy_seromodel() -# # We call the function plot_seroprev_fitted() -# plot <- plot_seroprev_fitted(seromodel) -# # We verify that the plot object is an empty ggplot -# expect_true(class(plot) == "ggplot") -# expect_true(length(plot$layers) == 0) -# }) -# -# -# #Test for the function plot_foi -# -# library(testthat) -# -# # Define the test context -# context("Test of the function plot_foi") -# -# # Create a test to check the else block -# test_that("The plot_foi function should output an empty plot when the model is not running", { -# -# # Create an empty object that simulates the output of the model that failed -# empty_model <- list(fit = "failure") -# -# # Run the plot_foi function with the empty model -# plot <- plot_foi(empty_model) -# -# # Check if the output is an empty graph -# expect_identical(ggplot2::ggplot(), plot) -# }) -# diff --git a/vignettes/foi_models.Rmd b/vignettes/foi_models.Rmd index 2c53d6a9..fb9eaba6 100644 --- a/vignettes/foi_models.Rmd +++ b/vignettes/foi_models.Rmd @@ -18,8 +18,6 @@ knitr::opts_chunk$set( ```{r cleaning, include = FALSE, echo = TRUE} library(serofoi) -rownames(serodata) <- NULL - ``` The current version of ***serofoi*** supports three different models for estimating the *Force-of-Infection (FoI)*, including constant and time-varying trajectories. For fitting the model to the sero-prevalence data we use a suit of bayesian models that include prior and upper prior distributions @@ -62,11 +60,16 @@ serodata_constant <- prepare_serodata(simdata_constant) model_1 <- run_seromodel(serodata = serodata_constant, foi_model = "constant", n_iters = 800) -plot_seromodel(model_1, size_text = 6) +plot_seromodel(model_1, + serodata = serodata_constant, + size_text = 6) ``` ```{r model_1_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} foi_sim_constant <- rep(0.02, 50) -model_1_plot <- plot_seromodel(model_1, foi_sim = foi_sim_constant, size_text = 6) +model_1_plot <- plot_seromodel(model_1, + serodata = serodata_constant, + foi_sim = foi_sim_constant, + size_text = 6) plot(model_1_plot) ``` Figure 1. Constant serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. @@ -96,13 +99,18 @@ serodata_sw_dec <- prepare_serodata(simdata_sw_dec) model_2 <- run_seromodel(serodata = serodata_sw_dec, foi_model = "tv_normal", n_iters = 1500) -plot_seromodel(model_2, size_text = 6) +plot_seromodel(model_2, + serodata = serodata_sw_dec, + size_text = 6) ``` ```{r model_2_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} no_transm <- 0.0000000001 foi_sim_sw_dec <- c(rep(0.2, 25), rep(0.1, 10), rep(no_transm, 17)) -model_2_plot <- plot_seromodel(model_2, foi_sim = foi_sim_sw_dec, size_text = 6) +model_2_plot <- plot_seromodel(model_2, + serodata = serodata_sw_dec, + foi_sim = foi_sim_sw_dec, + size_text = 6) plot(model_2_plot) ``` Figure 2. Slow time-varying serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. @@ -125,7 +133,9 @@ serodata_large_epi <- prepare_serodata(simdata_large_epi) model_3 <- run_seromodel(serodata = serodata_large_epi, foi_model = "tv_normal_log", n_iters = 1500) -model_3_plot <- plot_seromodel(model_3, size_text = 6) +model_3_plot <- plot_seromodel(model_3, + serodata = serodata_large_epi, + size_text = 6) plot(model_3_plot) ``` ```{r model_3_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} @@ -133,7 +143,10 @@ no_transm <- 0.0000000001 big_outbreak <- 1.5 foi_sim_large_epi <- c(rep(no_transm, 32), rep(big_outbreak, 3), rep(no_transm, 15)) -model_3_plot <- plot_seromodel(model_3, foi_sim = foi_sim_large_epi, size_text = 6) +model_3_plot <- plot_seromodel(model_3, + serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, + size_text = 6) plot(model_3_plot) ``` Figure 3. *Time-varying fast epidemic serofoi model* plot. Simulated (red) vs modelled (blue) *FoI*. @@ -163,12 +176,16 @@ Now, we would like to know whether this model actually fits this dataset better model_1 <- run_seromodel(serodata = serodata_large_epi, foi_model = "constant", n_iters = 800) -model_1_plot <- plot_seromodel(model_1, size_text = 6) +model_1_plot <- plot_seromodel(model_1, + serodata = serodata_large_epi, + size_text = 6) model_2 <- run_seromodel(serodata = serodata_large_epi, foi_model = "tv_normal", n_iters = 1500) -model_2_plot <- plot_seromodel(model_2, size_text = 6) +model_2_plot <- plot_seromodel(model_2, + serodata = serodata_large_epi, + size_text = 6) ``` Using the function `cowplot::plot_grid` we can visualise the results of the three models simultaneously: @@ -177,9 +194,19 @@ cowplot::plot_grid(model_1_plot, model_2_plot, model_3_plot, nrow = 1, ncol = 3, labels = "AUTO") ``` ```{r model_comparison_plot_, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} -model_1_plot <- plot_seromodel(model_1, foi_sim = foi_sim_large_epi, size_text = 6) -model_2_plot <- plot_seromodel(model_2, foi_sim = foi_sim_large_epi, size_text = 6) -model_3_plot <- plot_seromodel(model_3, foi_sim = foi_sim_large_epi, size_text = 6) +size_text <- 6 +model_1_plot <- plot_seromodel(model_1, + serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, + size_text = size_text) +model_2_plot <- plot_seromodel(model_2, + serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, + size_text = size_text) +model_3_plot <- plot_seromodel(model_3, + serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, + size_text = size_text) cowplot::plot_grid(model_1_plot, model_2_plot, model_3_plot, nrow = 1, ncol = 3, labels = "AUTO") ``` diff --git a/vignettes/serofoi.Rmd b/vignettes/serofoi.Rmd index 082e86ef..cf149b1d 100644 --- a/vignettes/serofoi.Rmd +++ b/vignettes/serofoi.Rmd @@ -60,12 +60,14 @@ The integrated dataset `serodata_test` provides a minimal example of the input o ```{r model_constant, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center"} library(serofoi) # Loading and preparing data for modelling -data("serodata") -serodata_test <- prepare_serodata(serodata) +data(chagas2012) +serodata_test <- prepare_serodata(chagas2012) # Model implementation model_constant <- run_seromodel(serodata = serodata_test, foi_model = "constant") # Visualisation -plot_seromodel(model_constant, size_text = 6) +plot_seromodel(model_constant, + serodata = serodata_test, + size_text = 6) ``` For details on the implementation of time-varying models and model comparison see [FoI Models](https://trace-lac.github.io/serofoi/articles/foi_models.html). diff --git a/vignettes/use_cases.Rmd b/vignettes/use_cases.Rmd index 361fd1f2..e64dfac3 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/use_cases.Rmd @@ -67,9 +67,15 @@ m3_chik <- run_seromodel(serodata = chik2015p, n_thin = 2) # Visualisation of the results -p1_chik <- plot_seromodel(m1_chik, size_text = 6) -p2_chik <- plot_seromodel(m2_chik, size_text = 6) -p3_chik <- plot_seromodel(m3_chik, size_text = 6) +p1_chik <- plot_seromodel(m1_chik, + serodata = chik2015p, + size_text = 6) +p2_chik <- plot_seromodel(m2_chik, + serodata = chik2015p, + size_text = 6) +p3_chik <- plot_seromodel(m3_chik, + serodata = chik2015p, + size_text = 6) cowplot::plot_grid(p1_chik, p2_chik, p3_chik, ncol=3) ``` @@ -98,24 +104,30 @@ veev2012p <- prepare_serodata(veev2012) # Implementation of the models m1_veev <- run_seromodel(serodata = veev2012p, - foi_model = "constant", - n_iters = 500, - n_thin = 2) + foi_model = "constant", + n_iters = 500, + n_thin = 2) m2_veev <- run_seromodel(serodata = veev2012p, - foi_model = "tv_normal", - n_iters = 500, - n_thin = 2) + foi_model = "tv_normal", + n_iters = 500, + n_thin = 2) m3_veev <- run_seromodel(serodata = veev2012p, - foi_model = "tv_normal_log", - n_iters = 500, - n_thin = 2) + foi_model = "tv_normal_log", + n_iters = 500, + n_thin = 2) # Visualisation of the results -p1_veev <- plot_seromodel(m1_veev, size_text = 6) -p2_veev <- plot_seromodel(m2_veev, size_text = 6) -p3_veev <- plot_seromodel(m3_veev, size_text = 6) +p1_veev <- plot_seromodel(m1_veev, + serodata = veev2012p, + size_text = 6) +p2_veev <- plot_seromodel(m2_veev, + serodata = veev2012p, + size_text = 6) +p3_veev <- plot_seromodel(m3_veev, + serodata = veev2012p, + size_text = 6) cowplot::plot_grid(p1_veev, p2_veev, p3_veev, ncol=3) ``` @@ -150,9 +162,13 @@ m2_cha <- run_seromodel(serodata = chagas2012p, n_iters = 800) # Visualisation of the results -p1_cha <- plot_seromodel(m1_cha, size_text = 6) -p2_cha <- plot_seromodel(m2_cha, size_text = 6) -cowplot::plot_grid(p1_cha, p2_cha, ncol=2) +p1_cha <- plot_seromodel(m1_cha, + serodata = chagas2012p, + size_text = 6) +p2_cha <- plot_seromodel(m2_cha, + serodata = chagas2012p, + size_text = 6) +cowplot::plot_grid(p1_cha, p2_cha, ncol = 2) ``` Figure 3. ***serofoi*** endemic models for *FoI* estimates of *Trypanosoma cruzi* in a rural area of Colombia. ## References