diff --git a/.Rbuildignore b/.Rbuildignore index 5c251fb59..2824401fe 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -24,3 +24,4 @@ ^\.devcontainer$ ^CRAN-SUBMISSION$ ^touchstone$ +^\.benchmark$ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 31cb7c5c2..7d8561a75 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ inst/include/*.o src .DS_Store +.vscode \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 87c81213e..658547ad0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,13 @@ # Generated by roxygen2: do not edit by hand +S3method("+",dist_spec) +S3method(mean,dist_spec) +S3method(plot,dist_spec) S3method(plot,epinow) S3method(plot,estimate_infections) S3method(plot,estimate_secondary) S3method(plot,estimate_truncation) +S3method(print,dist_spec) S3method(summary,epinow) S3method(summary,estimate_infections) export(R_to_growth) @@ -145,6 +149,7 @@ importFrom(ggplot2,geom_line) importFrom(ggplot2,geom_linerange) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_ribbon) +importFrom(ggplot2,geom_step) importFrom(ggplot2,geom_vline) importFrom(ggplot2,ggplot) importFrom(ggplot2,ggplot_build) @@ -157,6 +162,7 @@ importFrom(ggplot2,scale_x_date) importFrom(ggplot2,scale_y_continuous) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) +importFrom(ggplot2,vars) importFrom(lifecycle,deprecate_soft) importFrom(lifecycle,deprecate_warn) importFrom(lubridate,days) @@ -188,6 +194,7 @@ importFrom(rstan,summary) importFrom(rstan,vb) importFrom(runner,mean_run) importFrom(scales,comma) +importFrom(stats,convolve) importFrom(stats,glm) importFrom(stats,lm) importFrom(stats,median) @@ -207,5 +214,6 @@ importFrom(stats,sd) importFrom(stats,var) importFrom(truncnorm,rtruncnorm) importFrom(utils,capture.output) +importFrom(utils,head) importFrom(utils,tail) useDynLib(EpiNow2, .registration=TRUE) diff --git a/NEWS.md b/NEWS.md index 53c0f3d3f..b428f3b2e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,22 +2,24 @@ This release is in development. For a stable release install 1.3.5 from CRAN. +## Breaking changes + +- The external distribution interface has been updated to use the `dist_spec()` function. This comes with a range of benefits, including optimising model fitting when static delays are used (by convolving when first defined vs in stan), easy printing (using `print()`), and easy plotting (using `plot()`). It also makes it possible to use all supported distributions everywhere (i.e, as a generation time or reporting delay). However, this update will break most users code as the interface has changed. See the documentation for `dist_spec()` for more details. By @sbfnk in #363 and reviewed by @seabbs. + ## Package -* Model description has been expanded to include more detail. -* Moved to a GitHub Action to only lint changed files. -* Linted the package with a wider range of default linters. -* Added a GitHub Action to build the README when it is altered. -* Added handling of edge case where we sample from the negative binomial with - mean close or equal to 0. By @sbfnk in #366. -* Replaced use of nested `ifelse()` and `data.table::fifelse()` in the - code base with `data.table::fcase()`. By @jamesmbaazam in #383 and reviewed by @seabbs. -* Reviewed the example in `calc_backcalc_data()` to call `calc_backcalc_data()` - instead of `create_gp_data()`. By @jamesmbaazam in #388 and reviewed by @seabbs. +* Model description has been expanded to include more detail. By @sbfnk in #373 and reviewed by @seabbs. +* Moved to a GitHub Action to only lint changed files. By @seabbs in #378. +* Linted the package with a wider range of default linters. By @seabbs in #378. +* Added a GitHub Action to build the README when it is altered. By @seabbs. +* Added handling of edge case where we sample from the negative binomial with mean close or equal to 0. By @sbfnk in #366 and reviewed by @seabbs. +* Replaced use of nested `ifelse()` and `data.table::fifelse()` in the code base with `data.table::fcase()`. By @jamesmbaazam in #383 and reviewed by @seabbs. +* Reviewed the example in `calc_backcalc_data()` to call `calc_backcalc_data()` instead of `create_gp_data()`. By @jamesmbaazam in #388 and reviewed by @seabbs. * Improved compilation times by reducing the number of distinct stan models and deprecated `tune_inv_gamma()`. By @sbfnk in #394 and reviewed by @seabbs. * Changed touchstone settings so that benchmarks are only performed if the stan model is changed. By @sbfnk in #400 and reviewed by @seabbs. * [pak](https://pak.r-lib.org/) is now suggested for installing the developmental version of the package. By @jamesmbaazam in #407 and reviewed by @seabbs. This has been successfully tested on MacOS Ventura, Ubuntu 20.04, and Windows 10. Users are advised to use `remotes::install_github("epiforecasts/EpiNow2")` if `pak` fails and if both fail, raise an issue. * `dist_fit()`'s `samples` argument now takes a default value of 1000 instead of NULL. If a supplied `samples` is less than 1000, it is changed to 1000 and a warning is thrown to indicate the change. By @jamesmbazam in #389 and reviewed by @seabbs. +* The internal distribution interface has been streamlined to reduce code duplication. By @sbfnk in #363 and reviewed by @seabbs. # EpiNow2 1.3.5 diff --git a/R/create.R b/R/create.R index 9e2e9b910..d346afb8a 100644 --- a/R/create.R +++ b/R/create.R @@ -414,10 +414,8 @@ create_obs_model <- function(obs = obs_opts(), dates) { #' #' @param shifted_cases A dataframe of delay shifted cases #' -#' @param truncation `r lifecycle::badge("experimental")` A list of options as -#' generated by `trunc_opts()` defining the truncation of observed data. -#' Defaults to `trunc_opts()`. See `estimate_truncation()` for an approach to -#' estimating truncation from data. +#' @param seeding_time Integer; seeding time, usually obtained using +#' `get_seeding_time()` #' #' @inheritParams create_gp_data #' @inheritParams create_obs_model @@ -430,34 +428,20 @@ create_obs_model <- function(obs = obs_opts(), dates) { #' @author Sam Abbott #' @author Sebastian Funk #' @export -create_stan_data <- function(reported_cases, generation_time, - rt, gp, obs, delays, horizon, - backcalc, shifted_cases, - truncation) { - ## make sure we have at least gt_max seeding time - delays$seeding_time <- max(delays$seeding_time, generation_time$max) +create_stan_data <- function(reported_cases, seeding_time, + rt, gp, obs, horizon, + backcalc, shifted_cases) { - ## for backwards compatibility call generation_time_opts internally - if (is.list(generation_time) && - all(c("mean", "mean_sd", "sd", "sd_sd") %in% names(generation_time))) { - generation_time <- do.call(generation_time_opts, generation_time) - } - - cases <- reported_cases[(delays$seeding_time + 1):(.N - horizon)]$confirm + cases <- reported_cases[(seeding_time + 1):(.N - horizon)]$confirm data <- list( cases = cases, shifted_cases = shifted_cases, t = length(reported_cases$date), horizon = horizon, - burn_in = 0 + burn_in = 0, + seeding_time = seeding_time ) - # add gt data - data <- c(data, generation_time) - # add delay data - data <- c(data, delays) - # add truncation data - data <- c(data, truncation) # add Rt data data <- c( data, @@ -476,10 +460,6 @@ create_stan_data <- function(reported_cases, generation_time, is.na(data$prior_infections) || is.null(data$prior_infections), 0, data$prior_infections ) - if (is.null(data$gt_weight)) { - ## default: weigh by number of data points - data$gt_weight <- data$t - data$seeding_time - data$horizon - } if (data$seeding_time > 1) { safe_lm <- purrr::safely(stats::lm) data$prior_growth <- safe_lm(log(confirm) ~ t, data = first_week)[[1]] @@ -532,37 +512,20 @@ create_stan_data <- function(reported_cases, generation_time, create_initial_conditions <- function(data) { init_fun <- function() { out <- list() - if (data$n_uncertain_mean_delays > 0) { - out$delay_mean <- array(purrr::map2_dbl( - data$delay_mean_mean[data$uncertain_mean_delays], - data$delay_mean_sd[data$uncertain_mean_delays] * 0.1, - ~ rnorm(1, mean = .x, sd = .y) + if (data$delay_n_p > 0) { + out$delay_mean <- array(truncnorm::rtruncnorm( + n = data$delay_n_p, a = 0, + mean = data$delay_mean_mean, sd = data$delay_mean_sd * 0.1 )) - } - if (data$n_uncertain_sd_delays > 0) { - out$delay_sd <- array(purrr::map2_dbl( - data$delay_sd_mean[data$uncertain_sd_delays], - data$delay_sd_sd[data$uncertain_sd_delays] * 0.1, - ~ rnorm(1, mean = .x, sd = .y) + out$delay_sd <- array(truncnorm::rtruncnorm( + n = data$delay_n_p, a = 0, + mean = data$delay_sd_mean, sd = data$delay_sd_sd * 0.1 )) + } else { + out$delay_mean <- array(numeric(0)) + out$delay_sd <- array(numeric(0)) } - if (data$truncation > 0) { - if (data$trunc_mean_sd > 0) { - out$truncation_mean <- array(rnorm(1, - mean = data$trunc_mean_mean, - sd = data$trunc_mean_sd * 0.1 - )) - } - if (data$trunc_sd_sd > 0) { - out$truncation_sd <- array( - truncnorm::rtruncnorm(1, - a = 0, - mean = data$trunc_sd_mean, - sd = data$trunc_sd_sd * 0.1 - ) - ) - } - } + if (data$fixed == 0) { out$eta <- array(rnorm(data$M, mean = 0, sd = 0.1)) out$rho <- array(rlnorm(1, @@ -579,6 +542,10 @@ create_initial_conditions <- function(data) { out$alpha <- array( truncnorm::rtruncnorm(1, a = 0, mean = 0, sd = data$alpha_sd) ) + } else { + out$eta <- array(numeric(0)) + out$rho <- array(numeric(0)) + out$alpha <- array(numeric(0)) } if (data$model_type == 1) { out$rep_phi <- array( @@ -597,23 +564,14 @@ create_initial_conditions <- function(data) { n = 1, mean = convert_to_logmean(data$r_mean, data$r_sd), sd = convert_to_logsd(data$r_mean, data$r_sd) * 0.1 )) - if (data$gt_mean_sd > 0) { - out$gt_mean <- array(truncnorm::rtruncnorm(1, - a = 0, mean = data$gt_mean_mean, - sd = data$gt_mean_sd * 0.1 - )) - } - if (data$gt_sd_sd > 0) { - out$gt_sd <- array(truncnorm::rtruncnorm(1, - a = 0, mean = data$gt_sd_mean, - sd = data$gt_sd_sd * 0.1 - )) - } + } - if (data$bp_n > 0) { - out$bp_sd <- array(truncnorm::rtruncnorm(1, a = 0, mean = 0, sd = 0.1)) - out$bp_effects <- array(rnorm(data$bp_n, 0, 0.1)) - } + if (data$bp_n > 0) { + out$bp_sd <- array(truncnorm::rtruncnorm(1, a = 0, mean = 0, sd = 0.1)) + out$bp_effects <- array(rnorm(data$bp_n, 0, 0.1)) + } else { + out$bp_sd <- array(numeric(0)) + out$bp_effects <- array(numeric(0)) } if (data$obs_scale == 1) { out$frac_obs <- array(truncnorm::rtruncnorm(1, @@ -621,6 +579,8 @@ create_initial_conditions <- function(data) { mean = data$obs_scale_mean, sd = data$obs_scale_sd * 0.1 )) + } else { + out$frac_obs <- array(numeric(0)) } if (data$week_effect > 0) { out$day_of_week_simplex <- array( @@ -675,3 +635,53 @@ create_stan_args <- function(stan = stan_opts(), args$return_fit <- NULL return(args) } + +##' Create delay variables for stan +##' +##' @param ... Named delay distributions specified using `dist_spec()`. +##' The names are assigned to IDs +##' @param ot Integer, number of observations (needed if weighing any priors) +##' with the number of observations +##' @return A list of variables as expected by the stan model +##' @importFrom purrr transpose map +##' @author Sebastian Funk +create_stan_delays <- function(..., ot) { + dot_args <- list(...) + ## combine delays + combined_delays <- unclass(c(...)) + ## number of different non-empty types + type_n <- unlist(purrr::transpose(dot_args)$n) + ## assign ID values to each type + ids <- rep(0L, length(type_n)) + ids[type_n > 0] <- seq_len(sum(type_n > 0)) + names(ids) <- paste(names(type_n), "id", sep = "_") + + ## start consructing stan object + ret <- unclass(combined_delays) + ## construct additional variables + ret <- c(ret, list( + types = sum(type_n > 0), + types_p = array(1L - combined_delays$fixed) + )) + ## delay identifiers + ret$types_id <- integer(0) + ret$types_id[ret$types_p == 1] <- seq_len(ret$n_p) + ret$types_id[ret$types_p == 0] <- seq_len(ret$n_np) + ret$types_id <- array(ret$types_id) + ## map delays to identifiers + ret$types_groups <- array(c(0, cumsum(unname(type_n[type_n > 0]))) + 1) + ## map pmfs + ret$np_pmf_groups <- array(c(0, cumsum(combined_delays$np_pmf_length)) + 1) + ## assign prior weights + if (any(ret$weight == 0)) { + ret$weight[ret$weight == 0] <- ot + } + ## remove auxiliary variables + ret$fixed <- NULL + ret$np_pmf_length <- NULL + + names(ret) <- paste("delay", names(ret), sep = "_") + ret <- c(ret, ids) + + return(ret) +} diff --git a/R/dist.R b/R/dist.R index 04b425f0d..0be201582 100644 --- a/R/dist.R +++ b/R/dist.R @@ -15,7 +15,15 @@ #' distribution be cumulative. #' #' @param model Character string, defining the model to be used. Supported -#' options are exponential ("exp"), gamma ("gamma"), and log normal ("lognorm") +#' options are exponential ("exp"), gamma ("gamma"), and log normal +#' ("lognormal") +#' +#' @param discrete Logical, defaults to `FALSE`. Should the probability +#' distribution be discretised. In this case each entry of the probability +#' mass function corresponds to the 1-length interval ending at the entry, +#' i.e. the probability mass function is a vector where the first entry +#' corresponds to the integral over the (0,1] interval of the continuous +#' distribution, the second entry corresponds to the (1,2] interval etc. #' #' @param params A list of parameters values (by name) required for each model. #' For the exponential model this is a rate parameter and for the gamma model @@ -27,6 +35,7 @@ #' @return A vector of samples or a probability distribution. #' @export #' @author Sam Abbott +#' @author Sebastian Funk #' @examples #' #' ## Exponential model @@ -44,37 +53,37 @@ #' #' ## Gamma model #' # sample -#' dist_skel(10, model = "gamma", params = list(alpha = 1, beta = 2)) +#' dist_skel(10, model = "gamma", params = list(shape = 1, scale = 2)) #' #' # cumulative prob density #' dist_skel(0:10, #' model = "gamma", dist = TRUE, -#' params = list(alpha = 1, beta = 2) +#' params = list(shape = 1, scale = 2) #' ) #' #' # probability density #' dist_skel(0:10, #' model = "gamma", dist = TRUE, -#' cum = FALSE, params = list(alpha = 2, beta = 2) +#' cum = FALSE, params = list(shape = 2, scale = 2) #' ) #' #' ## Log normal model #' # sample -#' dist_skel(10, model = "lognorm", params = list(mean = log(5), sd = log(2))) +#' dist_skel(10, model = "lognormal", params = list(mean = log(5), sd = log(2))) #' #' # cumulative prob density #' dist_skel(0:10, -#' model = "lognorm", dist = TRUE, +#' model = "lognormal", dist = TRUE, #' params = list(mean = log(5), sd = log(2)) #' ) #' #' # probability density #' dist_skel(0:10, -#' model = "lognorm", dist = TRUE, cum = FALSE, +#' model = "lognormal", dist = TRUE, cum = FALSE, #' params = list(mean = log(5), sd = log(2)) #' ) dist_skel <- function(n, dist = FALSE, cum = TRUE, model, - params, max_value = 120) { + discrete = FALSE, params, max_value = 120) { if (model %in% "exp") { # define support functions for exponential dist rdist <- function(n) { @@ -90,18 +99,18 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model, } } else if (model %in% "gamma") { rdist <- function(n) { - rgamma(n, params$alpha, params$beta) + rgamma(n, params$shape, params$scale) } pdist <- function(n) { - pgamma(n, params$alpha, params$beta) / - pgamma(max_value, params$alpha, params$beta) + pgamma(n, params$shape, params$scale) / + pgamma(max_value, params$shape, params$scale) } ddist <- function(n) { - (pgamma(n + 1, params$alpha, params$beta) - - pgamma(n, params$alpha, params$beta)) / - pgamma(max_value, params$alpha, params$beta) + (pgamma(n + 1, params$shape, params$scale) - + pgamma(n, params$shape, params$scale)) / + pgamma(max_value, params$shape, params$scale) } - } else if (model %in% "lognorm") { + } else if (model %in% "lognormal") { rdist <- function(n) { rlnorm(n, params$mean, params$sd) } @@ -116,6 +125,20 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model, } } + if (discrete) { + cmf <- c(0, pdist(seq_len(max_value + 1))) + pmf <- diff(cmf) + rdist <- function(n) { + sample(x = seq_len(max_value) - 1, size = n, prob = pmf) + } + pdist <- function(n) { + cmf[n + 1] + } + ddist <- function(n) { + pmf[n + 1] + } + } + # define internal sampling function inner_skel <- function(n, dist = FALSE, cum = TRUE, max_value = NULL) { if (!dist) { @@ -307,24 +330,36 @@ gamma_dist_def <- function(shape, shape_sd, sd, sd_sd, max_value, samples) { if (missing(shape) && missing(scale) && !missing(mean) && !missing(sd)) { - mean <- truncnorm::rtruncnorm(samples, a = 0, mean = mean, sd = mean_sd) - sd <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) - beta <- sd^2 / mean - alpha <- mean / beta - beta <- 1 / beta + if (!missing(mean_sd)) { + mean <- truncnorm::rtruncnorm(samples, a = 0, mean = mean, sd = mean_sd) + } + if (!missing(sd_sd)) { + sd <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) + } + scale <- sd^2 / mean + shape <- mean / scale + scale <- 1 / scale } else { - alpha <- truncnorm::rtruncnorm(samples, a = 0, mean = shape, sd = shape_sd) - beta <- 1 / truncnorm::rtruncnorm( - samples, a = 0, mean = scale, sd = scale_sd - ) + if (!missing(shape_sd)) { + shape <- truncnorm::rtruncnorm( + samples, + a = 0, mean = shape, sd = shape_sd + ) + } + if (!missing(scale_sd)) { + scale <- 1 / truncnorm::rtruncnorm( + samples, + a = 0, mean = scale, sd = scale_sd + ) + } } dist <- data.table::data.table( model = rep("gamma", samples), params = purrr::transpose( list( - alpha = alpha, - beta = beta + shape = shape, + scale = scale ) ), max_value = rep(max_value, samples) @@ -387,10 +422,20 @@ lognorm_dist_def <- function(mean, mean_sd, mean_shape } - sampled_means <- truncnorm::rtruncnorm( - samples, a = 0, mean = mean, sd = mean_sd - ) - sampled_sds <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) + if (missing(mean_sd)) { + sampled_means <- mean + } else { + sampled_means <- truncnorm::rtruncnorm( + samples, + a = 0, mean = mean, sd = mean_sd + ) + } + + if (missing(sd_sd)) { + sampled_sds <- sd + } else { + sampled_sds <- truncnorm::rtruncnorm(samples, a = 0, mean = sd, sd = sd_sd) + } means <- sampled_means sds <- sampled_sds @@ -400,7 +445,7 @@ lognorm_dist_def <- function(mean, mean_sd, } dist <- data.table::data.table( - model = rep("lognorm", samples), + model = rep("lognormal", samples), params = purrr::transpose( list( mean = means, @@ -444,7 +489,7 @@ lognorm_dist_def <- function(mean, mean_sd, #' @param max_value Numeric, defaults to the maximum value in the observed #' data. Maximum delay to allow (added to output but does impact fitting). #' -#' @return A list summarising the bootstrapped distribution +#' @return A `dist_spec` object summarising the bootstrapped distribution #' @author Sam Abbott #' @importFrom purrr transpose #' @importFrom future.apply future_lapply @@ -536,7 +581,7 @@ bootstrapped_dist_fit <- function(values, dist = "lognormal", } else { out$max <- max(values) } - return(out) + return(do.call(dist_spec, out)) } #' Estimate a Delay Distribution @@ -549,7 +594,7 @@ bootstrapped_dist_fit <- function(values, dist = "lognormal", #' #' @param ... Arguments to pass to internal methods. #' -#' @return A list summarising the bootstrapped distribution +#' @return A `dist_spec` summarising the bootstrapped distribution #' @author Sam Abbott #' @export #' @seealso bootstrapped_dist_fit @@ -559,10 +604,11 @@ bootstrapped_dist_fit <- function(values, dist = "lognormal", #' estimate_delay(delays, samples = 1000, bootstraps = 10) #' } estimate_delay <- function(delays, ...) { - bootstrapped_dist_fit( + fit <- bootstrapped_dist_fit( values = delays, dist = "lognormal", ... ) + return(fit) } #' Approximate Sampling a Distribution using Counts @@ -716,20 +762,23 @@ sample_approx_dist <- function(cases = NULL, # filter out all zero cases until first recorded case mapped_cases <- data.table::setorder(mapped_cases, date) - mapped_cases <- mapped_cases[, - cum_cases := cumsum(cases)][cum_cases != 0][, cum_cases := NULL - ] + mapped_cases <- mapped_cases[ + , + cum_cases := cumsum(cases) + ][cum_cases != 0][, cum_cases := NULL] } else if (type %in% "median") { shift <- as.integer( median(as.integer(dist_fn(1000, dist = FALSE)), na.rm = TRUE) ) if (direction %in% "backwards") { - mapped_cases <- data.table::copy(cases)[, + mapped_cases <- data.table::copy(cases)[ + , date := date - lubridate::days(shift) ] } else if (direction %in% "forwards") { - mapped_cases <- data.table::copy(cases)[, + mapped_cases <- data.table::copy(cases)[ + , date := date + lubridate::days(shift) ] } @@ -785,7 +834,9 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' @description `r lifecycle::badge("stable")` #' Defines the parameters of a supported distribution for use in onward #' modelling. Multiple distribution families are supported - see the -#' documentation for `family` for details. This function provides distribution +#' documentation for `family` for details. Alternatively, a nonparametric +#' distribution can be specified using the \code{pmf} argument. +#' This function provides distribution #' functionality in [delay_opts()], [generation_time_opts()], and #' [trunc_opts()]. #' @@ -804,7 +855,7 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' @param sd_sd Numeric, defaults to 0. Sets the standard deviation of the #' uncertainty around the sd of the distribution assuming a normal prior. #' -#' @param dist Character, defaults to "lognormal". The (discretised +#' @param distribution Character, defaults to "lognormal". The (discretised #' distribution to be used. If sd == 0 then the distribution is fixed and a #' delta function is used. If sd > 0 then the distribution is discretised and #' truncated. @@ -820,7 +871,7 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' model fitting these are then transformed to the shape and scale of the gamma #' distribution. #' -#' When `dist` is the default lognormal distribution the other function +#' When `distribution` is the default lognormal distribution the other function #' arguments have the following definition: #' - `mean` is the mean of the natural logarithm of the delay (on the #' log scale). @@ -829,67 +880,497 @@ tune_inv_gamma <- function(lower = 2, upper = 21) { #' @param max Numeric, maximum value of the distribution. The distribution will #' be truncated at this value. #' +#' @param pmf Numeric, a vector of values that represent the (nonparametric) +#' probability mass function of the delay (starting with 0); defaults to an +#' empty vector corresponding to a parametric specification of the distribution +#' (using \code{mean}, \code{sd} and corresponding uncertainties) +#' #' @param fixed Logical, defaults to `FALSE`. Should delays be treated -#' as coming from fixed (vs uncertain) distributions. Making this simplification +#' as coming from fixed (vs uncertain) distributions. Overrides any values +#' assigned to \code{mean_sd} and \code{sd_sd} by setting them to zero. #' reduces compute requirement but may produce spuriously precise estimates. -#' +#' @param prior_weight Integer, weight given to the generation time prior. +#' By default (prior_weight = 00) the priors will be weighted by the number of +#' observation data points, usually preventing the posteriors from shifting +#' much from the given distribution. Another sensible option would be 1, +#' i.e. treating the generation time distribution as a single parameter. #' @return A list of distribution options. #' #' @author Sebastian Funk #' @author Sam Abbott #' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' dist_spec( +#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0, - dist = c("lognormal", "gamma"), max = NULL, - fixed = FALSE) { - dist <- match.arg(dist) + distribution = c("lognormal", "gamma"), max, + pmf = numeric(0), fixed = FALSE, prior_weight = 0L) { + ## check if parametric or nonparametric + if (length(pmf) > 0 && + !all( + missing(mean), missing(sd), missing(mean_sd), + missing(sd_sd), missing(distribution), missing(max) + )) { + stop("Distributional parameters or a pmf can be specified, but not both.") + } - if (missing(mean)) { + if (fixed) { + mean_sd <- 0 + sd_sd <- 0 + } + fixed <- mean_sd == 0 && mean_sd == 0 + + ## check parametric parameters make sense + if (!missing(mean)) { + if (sd == 0 && sd_sd == 0) { ## integer fixed + if (mean %% 1 != 0) { + stop( + "When a distribution is set to a constant ", + "(sd == 0 and sd_sd == 0) then the mean parameter ", + "must be an integer." + ) + } + max <- mean + if (mean_sd > 0) { + stop( + "When a distribution has sd == 0 and ", + "sd_sd == 0 then mean_sd must be 0, too." + ) + } + } else { + if (missing(max)) { + stop("Maximum of parametric distributions must be specified.") + } + } + } else { + if (!all( + missing(sd), missing(mean_sd), + missing(sd_sd), missing(distribution), missing(max) + )) { + stop( + "If any distributional parameters are given then so must the mean." + ) + } + } + + distribution <- match.arg(distribution) + if (fixed) { ret <- list( mean_mean = numeric(0), mean_sd = numeric(0), sd_mean = numeric(0), sd_sd = numeric(0), - fixed = integer(0), - dist = integer(0) + dist = integer(0), + max = integer(0) ) - if (is.null(max)) { - ret$max <- integer(0) - } else { - ret$max <- max + if (length(pmf) == 0) { + if (missing(mean)) { ## empty + ret <- c(ret, list( + n = 0, + n_p = 0, + n_np = 0, + np_pmf_max = 0, + np_pmf = numeric(0), + np_pmf_length = integer(0), + weight = numeric(0), + fixed = integer(0) + )) + } else { ## parametric fixed + if (sd == 0) { ## delta + pmf <- c(rep(0, mean), 1) + } else { + if (distribution == "lognormal") { + params <- lognorm_dist_def( + mean = mean, mean_sd = mean_sd, + sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 + ) + } else if (distribution == "gamma") { + params <- gamma_dist_def( + mean = mean, mean_sd = mean_sd, + sd = sd, sd_sd = sd_sd, max_value = max, samples = 1 + ) + } + pmf <- dist_skel( + n = seq_len(max) - 1, dist = TRUE, cum = FALSE, + model = distribution, params = params$params[[1]], max_value = max, + discrete = TRUE + ) + } + } + } else { ## nonparametric fixed + if (!missing(max) && (max + 1) < length(pmf)) { + pmf <- pmf[1:(max + 1)] + } + pmf <- pmf / sum(pmf) + } + if (length(pmf) > 0) { + ret <- c(ret, list( + n = 1, + n_p = 0, + n_np = 1, + np_pmf_max = length(pmf), + np_pmf = pmf, + np_pmf_length = length(pmf), + weight = numeric(0), + fixed = 1L + )) } } else { ret <- list( mean_mean = mean, mean_sd = mean_sd, sd_mean = sd, - sd_sd = sd_sd + sd_sd = sd_sd, + dist = + which(eval(formals()[["distribution"]]) == distribution) - 1, + max = max, + n = 1, + n_p = 1, + n_np = 0, + np_pmf_max = 0, + np_pmf = numeric(0), + np_pmf_length = integer(0), + weight = prior_weight, + fixed = 0L ) - if (fixed) { - ret$mean_sd <- 0 - ret$sd_sd <- 0 + } + ret <- purrr::map(ret, array) + sum_args <- grep("(^n$|^n_|_max$)", names(ret)) + ret[sum_args] <- purrr::map(ret[sum_args], sum) + attr(ret, "class") <- c("list", "dist_spec") + return(ret) +} + +#' Creates a delay distribution as the sum of two other delay distributions +#' +#' This is done via convolution with `stats::convolve()`. Nonparametric delays +#' that can be combined are processed together, and their cumulative +#' distribution function is truncated at a specified tolerance level, ensuring +#' numeric stability. +#' +#' @param e1 The first delay distribution (from a call to [dist_spec()]) to +#' combine. +#' +#' @param e2 The second delay distribution (from a call to [dist_spec()]) to +#' combine. +#' +#' @param tolerance A numeric value that sets the cumulative probability +#' to retain when truncating the cumulative distribution function of the +#' combined nonparametric delays. The default value is 0.001 with this retaining +#' 0.999 of the cumulative probability. Note that using a larger tolerance may +#' result in a smaller number of points in the combined nonparametric delay but +#' may also impact the accuracy of the combined delay (i.e., change the mean +#' and standard deviation). +#' +#' @return A delay distribution representing the sum of the two delays +#' (with class [dist_spec()]) +#' +#' @author Sebastian Funk +#' @author Sam Abbott +#' @importFrom stats convolve +dist_spec_plus <- function(e1, e2, tolerance = 0.001) { + ## process delay distributions + delays <- c(e1, e2) + ## combine any nonparametric delays that can be combined + if (sum(delays$fixed) > 1) { + new_pmf <- 1L + group_starts <- c(1L, cumsum(delays$np_pmf_length) + 1L) + for (i in seq_len(length(group_starts) - 1L)) { + new_pmf <- stats::convolve( + new_pmf, + rev(delays$np_pmf[seq(group_starts[i], group_starts[i + 1L] - 1L)]), + type = "open" + ) + } + if (tolerance > 0 && length(new_pmf) > 1) { + cdf <- cumsum(new_pmf) + new_pmf <- new_pmf[c(TRUE, (1 - cdf[-length(cdf)]) >= tolerance)] + new_pmf <- new_pmf / sum(new_pmf) } - ret$fixed <- as.integer(ret$mean_sd == 0 && ret$mean_sd == 0) + delays$np_pmf <- new_pmf + delays$fixed <- c(1, rep(0, delays$n_p)) + delays$n_np <- 1 + delays$n <- delays$n_p + 1 + delays$np_pmf_max <- length(delays$np_pmf) + delays$np_pmf_length <- length(delays$np_pmf) + } + return(delays) +} - ## check if it's a fixed value - if (ret$sd_mean == 0 && ret$sd_sd == 0) { - if (ret$mean_mean %% 1 != 0) { - stop( - "When a distribution is set to a constant ", - "(sd == 0 and sd_sd == 0) then the mean parameter ", - "must be an integer." - ) - } - ret$max <- ret$mean_mean - if (ret$mean_sd > 0) { - stop( - "When a distribution has sd == 0 and ", - "sd_sd == 0 then mean_sd must be 0, too." - ) +#' Creates a delay distribution as the sum of two other delay distributions +#' +#' This is done via convolution with `stats::convolve()`. Nonparametric delays +#' that can be combined are processed together, and their cumulative +#' distribution function is truncated at a specified tolerance level, ensuring +#' numeric stability. +#' +#' @return A delay distribution representing the sum of the two delays +#' (with class [dist_spec()]) +#' @inheritParams dist_spec_plus +#' @author Sebastian Funk +#' @method + dist_spec +#' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' lognormal <- dist_spec( +#' mean = 1.6, sd = 1, max = 20, distribution = "lognormal" +#' ) +#' lognormal + lognormal +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' gamma <- dist_spec( +#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) +#' lognormal + gamma +#' +#' # Using tolerance parameter +#' EpiNow2:::dist_spec_plus(lognormal, lognormal, tolerance = 0.5) +`+.dist_spec` <- function(e1, e2) { + dist_spec_plus(e1, e2, tolerance = 0.001) +} + +#' Combines multiple delay distributions for further processing +#' +#' This combines the parameters so that they can be fed as multiple delay +#' distributions to [epinow()] or [estimate_infections()]. +#' +#' @param ... The delay distributions (from calls to [dist_spec()]) to combine +#' @return Combined delay distributions (with class [dist_spec()]`) +#' @author Sebastian Funk +#' @method c dist_spec +#' @importFrom purrr transpose map +`c.dist_spec` <- function(...) { + ## process delay distributions + delays <- list(...) + if (!(all(vapply(delays, is, FALSE, "dist_spec")))) { + stop( + "Delay distribution can only be concatenated with other delay ", + "distributions." + ) + } + ## transpose delays + delays <- purrr::transpose(delays) + ## convert back to arrays + delays <- purrr::map(delays, function(x) array(unlist(x))) + sum_args <- grep("(^n$|^n_|_max$)", names(delays)) + delays[sum_args] <- purrr::map(delays[sum_args], sum) + attr(delays, "class") <- c("list", "dist_spec") + return(delays) +} + +##' Returns the mean of one or more delay distribution +##' +##' This works out the mean of all the (parametric / nonparametric) delay +##' distributions combined in the passed [dist_spec()]. +##' +##' @param x The [dist_spec()] to use +##' @param ... Not used +##' @return A vector of means. +##' @author Sebastian Funk +##' @method mean dist_spec +##' @importFrom utils head +##' @export +#' @examples +#' # A fixed lognormal distribution with mean 5 and sd 1. +#' lognormal <- dist_spec( +#' mean = 5, sd = 1, max = 20, distribution = "lognormal" +#' ) +#' mean(lognormal) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' gamma <- dist_spec( +#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) +#' mean(gamma) +#' +#' # The mean of the sum of two distributions +#' mean(lognormal + gamma) +mean.dist_spec <- function(x, ...) { + ret <- rep(0, x$n) + ## nonparametric + if (x$n_np > 0) { + init_id <- c(1, head(cumsum(x$np_pmf_length) + 1, n = -1)) + ret[x$fixed == 1L] <- vapply(seq_along(init_id), function(id) { + pmf <- x$np_pmf[seq(init_id[id], cumsum(x$np_pmf_length)[id])] + return(sum((seq_len(x$np_pmf_length[id]) - 1) * pmf)) + }, 0) + } + ## parametric + if (x$n_p > 0) { + ret[x$fixed == 0L] <- vapply(seq_along(which(x$fixed == 0L)), function(id) { + if (x$dist[id] == 0) { ## lognormal + return(exp(x$mean_mean[id] + x$sd_mean[id] / 2)) + } else if (x$dist[id] == 1) { ## gamma + return(x$mean_mean[id]) } + }, 0) + } + return(ret) +} + +#' Prints the parameters of one or more delay distributions +#' +#' This displays the parameters of the uncertain and probability mass +#' functions of fixed delay distributions combined in the passed [dist_spec()]. +#' @param x The [dist_spec()] to use +#' @param ... Not used +#' @return invisible +#' @author Sebastian Funk +#' @method print dist_spec +#' @export +#' @examples +#' #' # A fixed lognormal distribution with mean 5 and sd 1. +#' lognormal <- dist_spec( +#' mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal" +#' ) +#' print(lognormal) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' gamma <- dist_spec( +#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) +#' print(gamma) +print.dist_spec <- function(x, ...) { + cat("\n") + if (x$n == 0) { + cat("Empty `dist_spec` distribution.\n") + return(invisible(NULL)) + } else if (x$n > 1) { + cat("Combination of delay distributions:\n") + } + fixed_id <- 1 + fixed_pos <- 1 + variable_id <- 1 + for (i in 1:x$n) { + cat(" ") + if (!is.null(x$names) && nchar(x$names[i]) > 0) { + cat(x$names[i], ": ", sep = "") + } + if (x$fixed[i] == 0) { + dist <- c("lognormal", "gamma")[x$dist[variable_id] + 1] + cat( + "Uncertain ", dist, " distribution with (untruncated) ", + ifelse(dist == "lognormal", "log", ""), + "mean ", signif(x$mean_mean[variable_id], digits = 2), + " (SD ", signif(x$mean_sd[variable_id], digits = 2), ") and ", + ifelse(dist == "lognormal", "log", ""), + "SD ", signif(x$sd_mean[variable_id], 2), + " (SD ", signif(x$sd_sd[variable_id], 2), ")\n", + sep = "" + ) + variable_id <- variable_id + 1 } else { - ret$max <- max + cat( + "Fixed distribution with PMF [", + paste(signif( + x$np_pmf[seq(fixed_pos, fixed_pos + x$np_pmf_length[fixed_id] - 1)], + digits = 2 + ), collapse = " "), + "]\n", + sep = "" + ) + fixed_id <- fixed_id + 1 + fixed_pos <- fixed_pos + x$np_pmf_length[i] } - ret$dist <- which(eval(formals()[["dist"]]) == dist) - 1 } - return(lapply(ret, array)) + cat("\n") +} + +#' Plot PMF and CDF for a dist_spec object +#' +#' This function takes a [dist_spec] object and plots its probability mass +#' function (PMF) and cumulative distribution function (CDF) using [ggplot2]. +#' Note that currently uncertainty in distributions is not plot. +#' +#' @param x A [dist_spec] object +#' @param ... Additional arguments to pass to \code{ggplot} +#' @importFrom ggplot2 aes geom_col geom_step facet_wrap vars theme_bw +#' @export +#' @author Sam Abbott +#' @examples +#' #' # A fixed lognormal distribution with mean 5 and sd 1. +#' lognormal <- dist_spec( +#' mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal" +#' ) +#' plot(lognormal) +#' +#' # An uncertain gamma distribution with mean 3 and sd 2 +#' gamma <- dist_spec( +#' mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, +#' distribution = "gamma" +#' ) +#' plot(gamma) +#' +#' # Multiple distributions +#' plot(lognormal + gamma + lognormal) +#' +#' # A combination of the two fixed distributions +#' plot(lognormal + lognormal) +plot.dist_spec <- function(x, ...) { + distribution <- cdf <- NULL + # Get the PMF and CDF data + pmf_data <- data.frame( + value = numeric(), pmf = numeric(), + distribution = factor() + ) + cdf_data <- data.frame( + value = numeric(), cdf = numeric(), + distribution = factor() + ) + variable_id <- 1 + fixed_id <- 1 + group_starts <- c(1L, cumsum(x$np_pmf_length) + 1L) + for (i in 1:x$n) { + if (x$fixed[i] == 0) { + # Uncertain distribution + dist_name <- c("Lognormal", "Gamma")[x$dist[variable_id] + 1] + mean <- x$mean_mean[variable_id] + sd <- x$sd_mean[variable_id] + c_dist <- dist_spec( + mean = mean, sd = sd, max = x$max[variable_id], + distribution = tolower(dist_name) + ) + pmf <- c_dist$np_pmf + variable_id <- variable_id + 1 + dist_name <- paste0(dist_name, " (ID: ", i, ")") + } else { + # Fixed distribution + pmf <- x$np_pmf[seq(group_starts[i], group_starts[i + 1L] - 1L)] + dist_name <- paste0("Fixed", " (ID: ", i, ")") + fixed_id <- fixed_id + 1 + } + pmf_data <- rbind( + pmf_data, + data.frame( + value = seq_along(pmf), pmf = pmf, distribution = dist_name + ) + ) + cumsum_pmf <- cumsum(pmf) + cdf_data <- rbind( + cdf_data, + data.frame( + value = seq_along(pmf), cdf = cumsum_pmf / sum(pmf), + distribution = dist_name + ) + ) + } + + # Plot PMF and CDF as facets in the same plot + plot <- ggplot() + + aes(x = value, y = pmf) + + geom_col(data = pmf_data) + + geom_step(data = cdf_data, aes(y = cdf)) + + facet_wrap(vars(distribution)) + + labs(x = "Day", y = "Probability density") + + theme_bw() + return(plot) } diff --git a/R/epinow.R b/R/epinow.R index d9124330a..78744975f 100644 --- a/R/epinow.R +++ b/R/epinow.R @@ -47,7 +47,7 @@ #' incubation_period <- get_incubation_period( #' disease = "SARS-CoV-2", source = "lauer" #' ) -#' reporting_delay <- list( +#' reporting_delay <- dist_spec( #' mean = convert_to_logmean(2, 1), #' mean_sd = 0.1, #' sd = convert_to_logsd(2, 1), @@ -60,9 +60,10 @@ #' #' # estimate Rt and nowcast/forecast cases by date of infection #' out <- epinow( -#' reported_cases = reported_cases, generation_time = generation_time, +#' reported_cases = reported_cases, +#' generation_time = generation_time_opts(generation_time), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1)), -#' delays = delay_opts(incubation_period, reporting_delay) +#' delays = delay_opts(incubation_period + reporting_delay) #' ) #' # summary of the latest estimates #' summary(out) diff --git a/R/estimate_infections.R b/R/estimate_infections.R index eaf86b959..b9188f55c 100644 --- a/R/estimate_infections.R +++ b/R/estimate_infections.R @@ -28,6 +28,10 @@ #' options. See the documentation of `delay_opts()` and the examples below for #' details. #' +#' @param truncation A call to `trunc_opts()` defining the truncation of +#' observed data. Defaults to `trunc_opts()`. See `estimate_truncation()` for +#' an approach to estimating truncation from data. +#' #' @param horizon Numeric, defaults to 7. Number of days into the future to #' forecast. #' @@ -64,22 +68,26 @@ #' reported_cases <- example_confirmed[1:60] #' #' # set up example generation time -#' generation_time <- generation_time_opts( +#' generation_time <- get_generation_time( #' disease = "SARS-CoV-2", source = "ganyani", fixed = TRUE #' ) #' # set delays between infection and case report #' incubation_period <- get_incubation_period( +#' disease = "SARS-CoV-2", source = "lauer", fixed = TRUE +#' ) +#' # delays between infection and case report, with uncertainty +#' incubation_period_uncertain <- get_incubation_period( #' disease = "SARS-CoV-2", source = "lauer" #' ) -#' reporting_delay <- list( +#' reporting_delay <- dist_spec( #' mean = convert_to_logmean(2, 1), mean_sd = 0, #' sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 #' ) #' #' # default settings but assuming that delays are fixed rather than uncertain #' def <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1)), #' stan = stan_opts(control = list(adapt_delta = 0.95)) #' ) @@ -92,8 +100,8 @@ #' #computation. #' # These settings are an area of active research. See ?gp_opts for details. #' agp <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1)), #' gp = gp_opts(ls_min = 10, basis_prop = 0.1), #' stan = stan_opts(control = list(adapt_delta = 0.95)) @@ -103,8 +111,8 @@ #' #' # Adjusting for future susceptible depletion #' dep <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts( #' prior = list(mean = 2, sd = 0.1), #' pop = 1000000, future = "latest" @@ -116,15 +124,15 @@ #' #' # Adjusting for truncation of the most recent data #' # See estimate_truncation for an approach to estimating this from data -#' trunc_dist <- trunc_opts(dist = list( +#' trunc_dist <- dist_spec( #' mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1, #' sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1, #' max = 3 -#' )) +#' ) #' trunc <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), -#' truncation = trunc_dist, +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), +#' truncation = trunc_opts(trunc_dist), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1)), #' gp = gp_opts(ls_min = 10, basis_prop = 0.1), #' stan = stan_opts(control = list(adapt_delta = 0.95)) @@ -140,8 +148,8 @@ #' # can be optionally switched off using backcalc_opts(prior = "none"), #' # see ?backcalc_opts for other options #' backcalc <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = NULL, backcalc = backcalc_opts(), #' obs = obs_opts(scale = list(mean = 0.4, sd = 0.05)), #' horizon = 0 @@ -150,8 +158,8 @@ #' #' # Rt projected into the future using the Gaussian process #' project_rt <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts( #' prior = list(mean = 2, sd = 0.1), #' future = "project" @@ -162,8 +170,8 @@ #' # default settings on a later snapshot of data #' snapshot_cases <- example_confirmed[80:130] #' snapshot <- estimate_infections(snapshot_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(prior = list(mean = 1, sd = 0.1)) #' ) #' plot(snapshot) @@ -171,8 +179,8 @@ #' # stationary Rt assumption (likely to provide biased real-time estimates) #' # with uncertain reporting delays #' stat <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period_uncertain + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1), gp_on = "R0") #' ) #' plot(stat) @@ -180,15 +188,16 @@ #' # no gaussian process (i.e fixed Rt assuming no breakpoints) #' # with uncertain reporting delays #' fixed <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period_uncertain + reporting_delay), #' gp = NULL #' ) #' plot(fixed) #' #' # no delays #' no_delay <- estimate_infections( -#' reported_cases, generation_time = generation_time +#' reported_cases, +#' generation_time = generation_time_opts(generation_time) #' ) #' plot(no_delay) #' @@ -199,8 +208,8 @@ #' breakpoint := ifelse(date == as.Date("2020-03-16"), 1, 0) #' ] #' bkp <- estimate_infections(bp_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period_uncertain + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1)), #' gp = NULL #' ) @@ -211,8 +220,8 @@ #' # weekly random walk #' # with uncertain reporting delays #' rw <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period_uncertain + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1), rw = 7), #' gp = NULL #' ) @@ -253,9 +262,6 @@ estimate_infections <- function(reported_cases, name = "EpiNow2.epinow.estimate_infections" ) } - if (is.null(delays$delays)) { - stop("A call to delay_opts must be passed to delays") - } # Make sure there are no missing dates and order cases reported_cases <- create_clean_reported_cases( reported_cases, horizon, @@ -266,12 +272,13 @@ estimate_infections <- function(reported_cases, # Record earliest date with data start_date <- min(reported_cases$date, na.rm = TRUE) + seeding_time <- get_seeding_time(delays, generation_time) + # Create mean shifted reported cases as prior reported_cases <- data.table::rbindlist(list( data.table::data.table( - date = - seq(min(reported_cases$date) - delays$seeding_time - - backcalc$prior_window, + date = seq( + min(reported_cases$date) - seeding_time - backcalc$prior_window, min(reported_cases$date) - 1, by = "days" ), @@ -282,7 +289,7 @@ estimate_infections <- function(reported_cases, shifted_cases <- create_shifted_cases( reported_cases, - delays$seeding_time, + seeding_time, backcalc$prior_window, horizon ) @@ -291,9 +298,7 @@ estimate_infections <- function(reported_cases, # Define stan model parameters data <- create_stan_data( reported_cases = reported_cases, - generation_time = generation_time, - delays = delays, - truncation = truncation, + seeding_time = seeding_time, rt = rt, gp = gp, obs = obs, @@ -302,6 +307,13 @@ estimate_infections <- function(reported_cases, horizon = horizon ) + data <- c(data, create_stan_delays( + gt = generation_time, + delay = delays, + trunc = truncation, + ot = data$t - data$seeding_time - data$horizon + )) + # Set up default settings args <- create_stan_args( stan = stan, @@ -343,7 +355,7 @@ estimate_infections <- function(reported_cases, ) ## Add prior infections - if (delays$delays > 0) { + if (delays$n > 0) { out$prior_infections <- shifted_cases[ , .( @@ -707,6 +719,7 @@ format_fit <- function(posterior_samples, horizon, shift, burn_in, start_date, ) ] + # remove burn in period if specified if (burn_in > 0) { futile.logger::flog.info( diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R index 451bd8400..0982b8d94 100644 --- a/R/estimate_secondary.R +++ b/R/estimate_secondary.R @@ -127,9 +127,10 @@ estimate_secondary <- function(reports, secondary = secondary_opts(), delays = delay_opts( - list( + dist_spec( mean = 2.5, mean_sd = 0.5, - sd = 0.47, sd_sd = 0.25, max = 30 + sd = 0.47, sd_sd = 0.25, max = 30, + prior_weight = 1 ) ), truncation = trunc_opts(), @@ -151,15 +152,17 @@ estimate_secondary <- function(reports, t = nrow(reports), obs = reports$secondary, primary = reports$primary, - burn_in = burn_in + burn_in = burn_in, + seeding_time = 0 ) # secondary model options data <- c(data, secondary) # delay data - data <- c(data, delays) - data$seeding_time <- 0 - # truncation data - data <- c(data, truncation) + data <- c(data, create_stan_delays( + delay = delays, + trunc = truncation + )) + # observation model data data <- c(data, create_obs_model(obs, dates = reports$date)) diff --git a/R/extract.R b/R/extract.R index 20eacb97a..764797379 100644 --- a/R/extract.R +++ b/R/extract.R @@ -152,47 +152,22 @@ extract_parameter_samples <- function(stan_fit, data, reported_dates, c("time", "date") := NULL ] } - if (data$n_uncertain_mean_delays > 0) { + if (data$delay_n_p > 0) { out$delay_mean <- extract_parameter( - "delay_mean", samples, 1:data$n_uncertain_mean_delays + "delay_mean", samples, seq_len(data$delay_n_p) ) out$delay_mean <- out$delay_mean[, strat := as.character(time)][, time := NULL][, - date := NULL + date := NULL ] - } - if (data$n_uncertain_sd_delays > 0) { out$delay_sd <- extract_parameter( - "delay_sd", samples, seq_len(data$n_uncertain_sd_delays) + "delay_sd", samples, seq_len(data$delay_n_p) ) out$delay_sd <- out$delay_sd[, strat := as.character(time)][, time := NULL][, date := NULL ] } - if (data$truncation > 0) { - if (data$trunc_mean_sd > 0) { - out$truncation_mean <- extract_parameter("trunc_mean", samples, 1) - out$truncation_mean <- - out$truncation_mean[, - strat := as.character(time)][, time := NULL][, date := NULL - ] - } - if (data$trunc_sd_sd > 0) { - out$truncation_sd <- extract_parameter("trunc_sd", samples, 1) - out$truncation_sd <- out$truncation_sd[, - strat := as.character(time)][, time := NULL][, date := NULL - ] - } - } - if (data$estimate_r && data$gt_mean_sd > 0) { - out$gt_mean <- extract_static_parameter("gt_mean", samples) - out$gt_mean <- out$gt_mean[, value := value.V1][, value.V1 := NULL] - } - if (data$estimate_r && data$gt_sd_sd > 0) { - out$gt_sd <- extract_static_parameter("gt_sd", samples) - out$gt_sd <- out$gt_sd[, value := value.V1][, value.V1 := NULL] - } if (data$model_type == 1) { out$reporting_overdispersion <- extract_static_parameter("rep_phi", samples) out$reporting_overdispersion <- out$reporting_overdispersion[, diff --git a/R/get.R b/R/get.R index 9d18b4f9d..bf233076c 100644 --- a/R/get.R +++ b/R/get.R @@ -92,8 +92,8 @@ get_raw_result <- function(file, region, date, #' # run multiregion estimates #' regional_out <- regional_epinow( #' reported_cases = cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(rw = 7), gp = NULL, #' output = c("regions", "latest"), #' target_folder = dir, @@ -221,7 +221,7 @@ get_dist <- function(data, disease, source, max_value = 15, fixed = FALSE) { dist$mean_sd <- 0 dist$sd_sd <- 0 } - return(dist) + return(do.call(dist_spec, dist)) } #' Get a Literature Distribution for the Generation Time #' @@ -301,3 +301,27 @@ get_regions_with_most_reports <- function(reported_cases, most_reports <- most_reports[1:no_regions][!is.na(region)]$region return(most_reports) } + +##' Estimate seeding time from delays and generation time +##' +##' The seeding time is set to the mean of the specified delays, constrained +##' to be at least the maximum generation time +##' @param delays Delays as specified using `dist_spec` +##' @param generation_time Generation time as specified using `dist_spec` +##' @return An integer seeding time +##' @author Sebastian Funk +get_seeding_time <- function(delays, generation_time) { + # Estimate the mean delay ----------------------------------------------- + seeding_time <- sum(mean(delays)) + if (seeding_time < 1) { + seeding_time <- 1 + } else { + seeding_time <- as.integer(seeding_time) + } + ## make sure we have at least gt_max seeding time + seeding_time <- max( + seeding_time, + sum(generation_time$max) + sum(generation_time$np_pmf_max) + ) + return(seeding_time) +} diff --git a/R/opts.R b/R/opts.R index 60ccc892f..1feebb456 100644 --- a/R/opts.R +++ b/R/opts.R @@ -6,90 +6,40 @@ #' be passed to [get_generation_time], or as parameters of a distribution to be #' passed to [dist_spec]. #' -#' @param ... Any parameters to be passed to [dist_spec()], if the generation -#' time is given as parameters of a distribution rather than a \code{disease} -#' and \code{source}. In this case if the \code{mean} parameter is not set a -#' mean of 1 will be assumed, if the \code{max} parameter not set then the -#' \code{max} will be set to 15 to ensure backwards compatibility, and if no -#' \code{dist} parameter is given then a gamma distribution will be used for -#' backwards compatibility. As for [delay_opts()] a list of parameters can -#' also be supplied that describe a distribution (but unlike [delay_opts()] -#' multiple distributions may not currently be used (for example as output by -#' get_generation_time()). -#' -#' @param max Integer, defaults to 15. Maximum generation time. This will -#' not be used if a maximum is set in the distribution parameters. -#' -#' @param fixed Logical, defaults to `FALSE`. Should the generation time be -#' treated as coming from fixed (vs uncertain) distributions. -#' -#' @param prior_weight numeric, weight given to the generation time prior. -#' By default the generation time prior will be weighted by the number of -#' observation data points, usually preventing the posteriors from shifting -#' much from the given distribution. Another sensible option would be 1, -#' i.e. treating the generation time distribution as a single parameter. +#' @param dist A delay distribution or series of delay distributions generated +#' using [dist_spec()] or [get_generation_time()]. If no distribution is given +#' a fixed generation time of 1 will be assumed. #' #' @return A list summarising the input delay distributions. #' @author Sebastian Funk #' @author Sam Abbott -#' @inheritParams get_generation_time #' @seealso convert_to_logmean convert_to_logsd bootstrapped_dist_fit dist_spec #' @export #' @examples -#' # default settings with a fixed generation time +#' # default settings with a fixed generation time of 1 #' generation_time_opts() #' #' # A fixed gamma distributed generation time -#' generation_time_opts(mean = 3, sd = 2) +#' generation_time_opts(dist_spec(mean = 3, sd = 2, max = 15)) #' #' # An uncertain gamma distributed generation time -#' generation_time_opts(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5) -#' -generation_time_opts <- function(..., disease, source, max = 15L, - fixed = FALSE, prior_weight = NULL) { - dot_options <- list(...) ## options for dist_spec - ## check consistent options are given - type_options <- (length(dot_options) > 0) + ## distributional parameters - (!missing(disease) && !missing(source)) ## from included distributions - if (type_options > 1) { - stop( - "Generation time should be given either as distributional options", - " or as disease/source, but not both." # nolint - ) - } - - ## check to see if options have been passed in as a list - if (length(dot_options) == 1 && is.list(dot_options[[1]])) { - dot_options <- dot_options[[1]] - } - - if (!missing(disease) && !missing(source)) { - dist <- get_generation_time( - disease = disease, source = source, max_value = max - ) - dist$fixed <- fixed - gt <- do.call(dist_spec, dist) - } else { - ## make gamma default for backwards compatibility - if (!("dist" %in% names(dot_options))) { - dot_options$dist <- "gamma" - } - ## set max - if (!("max" %in% names(dot_options))) { - dot_options$max <- max - } - ## set default of mean=1 for backwards compatibility - if (!("mean" %in% names(dot_options))) { - dot_options$mean <- 1 - } - dot_options$fixed <- fixed - gt <- do.call(dist_spec, dot_options) +#' generation_time_opts( +#' dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 15) +#' ) +#' +#' # A generation time sourced from the literature +#' dist <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani") +#' generation_time_opts(dist) +generation_time_opts <- function(dist = dist_spec(mean = 1)) { + if (!is(dist, "dist_spec")) { + stop("The generation time distribution must be given either using a call ", + "to `dist_spec` or `get_generation_time`. ", + "This behaviour has changed from previous versions of `EpiNow2` and ", + "any code using it may need to be updated. For examples and more ", + "information, see the relevant documentation pages using ", + "`?generation_time_opts`") } - gt$weight <- prior_weight - - names(gt) <- paste0("gt_", names(gt)) - - return(gt) + return(dist) } #' Delay Distribution Options @@ -97,18 +47,8 @@ generation_time_opts <- function(..., disease, source, max = 15L, #' @description `r lifecycle::badge("stable")` #' Returns delay distributions formatted for usage by downstream #' functions. -#' @param ... A list of lists specifying distributions of reporting delays. -#' Each list is passed to [dist_spec()] and so should contain parameters linked -#' to the functions arguments. If a list is not given then it is assumed that -#' no delay is present. If multiple lists are given then they are assumed to -#' be independent. -#' -#' @param fixed Logical, defaults to `FALSE`. Should all reporting delays be -#' treated as coming from fixed (vs uncertain) distributions. Making this -#' simplification drastically reduces compute requirements. Setting this here -#' overrides any of the constituent delay distributions being set to be fixed -#' or not. -#' +#' @param dist A delay distribution or series of delay distributions generated +#' using [dist_spec()]. Default is an empty call to [dist_spec()], i.e. no delay #' @return A list summarising the input delay distributions. #' @author Sam Abbott #' @author Sebastian Funk @@ -119,65 +59,27 @@ generation_time_opts <- function(..., disease, source, max = 15L, #' delay_opts() #' #' # A single delay that has uncertainty -#' delay <- list(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 15) +#' delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 15) #' delay_opts(delay) #' -#' # A single delay where we override the uncertainty assumption -#' delay_opts(delay, fixed = TRUE) -#' -#' # A delay where uncertainty is implict -#' delay_opts(list(mean = 1, mean_sd = 0, sd = 0.5, sd_sd = 0, max = 15)) +#' # A single delay without uncertainty +#' delay <- dist_spec(mean = 1, sd = 0.5, max = 15) +#' delay_opts(delay) #' -#' # Multiple delays -#' delay_opts(delay, delay) -delay_opts <- function(..., fixed = FALSE) { - delays <- list(...) - data <- lapply(delays, do.call, what = dist_spec) - - if (fixed) { ## set all to fixed - data <- lapply(data, function(x) { - x$fixed <- 1L - x$mean_sd <- 0 - x$sd_sd <- 0 - return(x) - }) - } - - if (length(data) > 0) { - data <- purrr::transpose(data) - ## convert back to arrays - data <- lapply(data, function(x) array(unlist(x))) - } else { - data <- dist_spec() - } - - names(data) <- paste0("delay_", names(data)) - # Estimate the mean delay ----------------------------------------------- - data$seeding_time <- sum(purrr::pmap_dbl( - list(data$delay_mean_mean, data$delay_sd_mean, data$delay_dist), - function(.x, .y, .z) { - ifelse(.z == 0, exp(.x + .y^2 / 2), .x) - } - )) - if (data$seeding_time < 1) { - data$seeding_time <- 1 - } else { - data$seeding_time <- as.integer(data$seeding_time) +#' # Multiple delays (in this case twice the same) +#' delay_opts(delay + delay) +delay_opts <- function(dist = dist_spec()) { + if (!is(dist, "dist_spec")) { + stop( + "Delay distributions must be of given either using a call to ", + "`dist_spec` or one of the `get_...` functions such as ", + "`get_incubation_period`. This behaviour has changed from previous ", + "versions of `EpiNow2` and any code using it may need to be updated. ", + "For examples and more information, see the relevant documentation ", + "pages using `?delay_opts`." + ) } - - data$delays <- length(delays) - - data$uncertain_mean_delays <- array(which(data$delay_mean_sd > 0)) - data$uncertain_sd_delays <- array(which(data$delay_sd_sd > 0)) - data$fixed_delays <- array( - which(data$delay_mean_sd == 0 & data$delay_sd_sd == 0) - ) - - data$n_uncertain_mean_delays <- length(data$uncertain_mean_delays) - data$n_uncertain_sd_delays <- length(data$uncertain_sd_delays) - data$n_fixed_delays <- length(data$fixed_delays) - - return(data) + return(dist) } #' Truncation Distribution Options @@ -187,9 +89,9 @@ delay_opts <- function(..., fixed = FALSE) { #' downstream functions. See `estimate_truncation()` for an approach to #' estimate these distributions. #' -#' @param dist Parameters of a distribution as supported by -#' [dist_spec()]. -#' +#' @param dist A delay distribution or series of delay distributions reflecting +#' the truncation generated using [dist_spec()] or [estimate_truncation()]. +#' Default is an empty call to [dist_spec()], i.e. no truncation #' @return A list summarising the input truncation distribution. #' #' @author Sam Abbott @@ -201,12 +103,19 @@ delay_opts <- function(..., fixed = FALSE) { #' trunc_opts() #' #' # truncation dist -#' trunc_opts(dist = list(mean = 3, sd = 2)) -trunc_opts <- function(dist = list()) { - data <- do.call(dist_spec, dist) - names(data) <- paste0("trunc_", names(data)) - data$truncation <- as.integer(length(data$trunc_max) > 0) - return(data) +#' trunc_opts(dist = dist_spec(mean = 3, sd = 2, max = 10)) +trunc_opts <- function(dist = dist_spec()) { + if (!is(dist, "dist_spec")) { + stop( + "Truncation distributions must be of given either using a call to ", + "`dist_spec` or one of the `get_...` functions. ", + "This behaviour has changed from previous versions of `EpiNow2` and ", + "any code using it may need to be updated. For examples and more ", + "information, see the relevant documentation pages using ", + "`?trunc_opts`" + ) + } + return(dist) } #' Time-Varying Reproduction Number Options diff --git a/R/plot.R b/R/plot.R index 874ca0946..4bbc03902 100644 --- a/R/plot.R +++ b/R/plot.R @@ -94,8 +94,8 @@ plot_CrIs <- function(plot, CrIs, alpha, linewidth) { #' #' # run model #' out <- estimate_infections(cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay) +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay) #' ) #' # plot infections #' plot_estimates( diff --git a/R/regional_epinow.R b/R/regional_epinow.R index 882fb8d17..dbb5a56cc 100644 --- a/R/regional_epinow.R +++ b/R/regional_epinow.R @@ -72,7 +72,7 @@ #' incubation_period <- get_incubation_period( #' disease = "SARS-CoV-2", source = "lauer" #' ) -#' reporting_delay <- list( +#' reporting_delay <- dist_spec( #' mean = convert_to_logmean(2, 1), #' mean_sd = 0.1, #' sd = convert_to_logsd(2, 1), @@ -90,8 +90,8 @@ #' # samples and warmup have been reduced for this example #' def <- regional_epinow( #' reported_cases = cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.2)), #' stan = stan_opts( #' samples = 100, warmup = 200, @@ -107,8 +107,8 @@ #' rt <- opts_list(rt_opts(), cases, realland = rt_opts(rw = 7)) #' region_rt <- regional_epinow( #' reported_cases = cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt, gp = gp, #' stan = stan_opts( #' samples = 100, warmup = 200, diff --git a/R/report.R b/R/report.R index 33279c0a5..be8c21e34 100644 --- a/R/report.R +++ b/R/report.R @@ -39,7 +39,7 @@ #' incubation_period <- get_incubation_period( #' disease = "SARS-CoV-2", source = "lauer" #' ) -#' reporting_delay <- list( +#' reporting_delay <- dist_spec( #' mean = convert_to_logmean(2, 1), mean_sd = 0.1, #' sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10 #' ) @@ -51,7 +51,7 @@ #' #' reported_cases <- report_cases( #' case_estimates = cases, -#' delays = delay_opts(incubation_period, reporting_delay), +#' delays = delay_opts(incubation_period + reporting_delay), #' type = "sample" #' ) #' print(reported_cases) @@ -66,13 +66,13 @@ report_cases <- function(case_estimates, # define delay distributions delay_defs <- purrr::map( - seq_along(delays$delay_mean_mean), + seq_along(delays$mean_mean), ~ EpiNow2::lognorm_dist_def( - mean = delays$delay_mean_mean[.], - mean_sd = delays$delay_mean_sd[.], - sd = delays$delay_sd_mean[.], - sd_sd = delays$delay_sd_sd[.], - max_value = delays$delay_max[.], + mean = delays$mean_mean[.], + mean_sd = delays$mean_sd[.], + sd = delays$mean_mean[.], + sd_sd = delays$mean_sd[.], + max_value = delays$max[.], samples = samples ) ) @@ -283,8 +283,8 @@ report_summary <- function(summarised_estimates, #' # run model #' out <- estimate_infections(cases, #' stan = stan_opts(samples = 500), -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = NULL #' ) #' diff --git a/R/simulate_infections.R b/R/simulate_infections.R index b463bf78d..bc7bd44bf 100644 --- a/R/simulate_infections.R +++ b/R/simulate_infections.R @@ -54,15 +54,15 @@ #' incubation_period <- get_incubation_period( #' disease = "SARS-CoV-2", source = "lauer" #' ) -#' reporting_delay <- list( +#' reporting_delay <- dist_spec( #' mean = convert_to_logmean(2, 1), mean_sd = 0.1, #' sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 15 #' ) #' #' # fit model to data to recover Rt estimates #' est <- estimate_infections(reported_cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' rt = rt_opts(prior = list(mean = 2, sd = 0.1), rw = 7), #' stan = stan_opts(control = list(adapt_delta = 0.9)), #' obs = obs_opts(scale = list(mean = 0.1, sd = 0.01)), @@ -112,7 +112,8 @@ simulate_infections <- function(estimates, draws <- extract(estimates$fit, pars = c( "noise", "eta", "lp__", "infections", - "reports", "imputed_reports", "r" + "reports", "imputed_reports", "r", + "gt_mean", "gt_var" ), include = FALSE ) diff --git a/R/summarise.R b/R/summarise.R index 22264dd33..3490b4afa 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -185,8 +185,8 @@ summarise_results <- function(regions, #' # run basic nowcasting pipeline #' out <- regional_epinow( #' reported_cases = cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' output = "region", #' rt = NULL #' ) @@ -545,8 +545,8 @@ summarise_key_measures <- function(regional_results = NULL, #' # run basic nowcasting pipeline #' regional_out <- regional_epinow( #' reported_cases = cases, -#' generation_time = generation_time, -#' delays = delay_opts(incubation_period, reporting_delay), +#' generation_time = generation_time_opts(generation_time), +#' delays = delay_opts(incubation_period + reporting_delay), #' stan = stan_opts(samples = 100, warmup = 100), #' output = c("region", "timing") #' ) diff --git a/README.Rmd b/README.Rmd index ffaaa1513..e4b68cf41 100644 --- a/README.Rmd +++ b/README.Rmd @@ -107,7 +107,7 @@ reporting_delay <- estimate_delay( If data was not available we could instead make an informed estimate of the likely delay (*this is a synthetic example and not applicable to real world use cases and we have not included uncertainty to decrease runtimes*), ```{r} -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), sd = convert_to_logsd(2, 1), max = 10, dist = "lognormal" ) @@ -141,7 +141,7 @@ Estimate cases by date of infection, the time-varying reproduction number, the r estimates <- epinow( reported_cases = reported_cases, generation_time = generation_time_opts(generation_time), - delays = delay_opts(incubation_period, reporting_delay), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.2)), stan = stan_opts(cores = 4, control = list(adapt_delta = 0.99)), verbose = interactive() @@ -194,8 +194,8 @@ Calling `regional_epinow()` runs the `epinow()` on each region in turn (or in pa ```{r, message = FALSE, warning = FALSE} estimates <- regional_epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.2), rw = 7), gp = NULL, stan = stan_opts(cores = 4, warmup = 250, samples = 1000) diff --git a/README.md b/README.md index ee7e03c98..4c73f377f 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ real world use cases and we have not included uncertainty to decrease runtimes*), ``` r -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), sd = convert_to_logsd(2, 1), max = 10, dist = "lognormal" ) @@ -238,7 +238,7 @@ For other formulations see the documentation for estimates <- epinow( reported_cases = reported_cases, generation_time = generation_time_opts(generation_time), - delays = delay_opts(incubation_period, reporting_delay), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.2)), stan = stan_opts(cores = 4, control = list(adapt_delta = 0.99)), verbose = interactive() @@ -258,13 +258,13 @@ parameters at the latest date partially supported by data. knitr::kable(summary(estimates)) ``` -| measure | estimate | -|:--------------------------------------|:-----------------------| -| New confirmed cases by infection date | 2260 (1145 – 4079) | -| Expected change in daily cases | Likely decreasing | -| Effective reproduction no. | 0.88 (0.63 – 1.2) | -| Rate of growth | -0.033 (-0.11 – 0.041) | -| Doubling/halving time (days) | -21 (17 – -6.3) | +| measure | estimate | +|:--------------------------------------|:----------------------| +| New confirmed cases by infection date | 2313 (1159 – 4345) | +| Expected change in daily cases | Likely decreasing | +| Effective reproduction no. | 0.89 (0.62 – 1.2) | +| Rate of growth | -0.026 (-0.1 – 0.038) | +| Doubling/halving time (days) | -26 (18 – -6.7) | Summarised parameter estimates can also easily be returned, either filtered for a single parameter or for all parameters. @@ -272,19 +272,19 @@ filtered for a single parameter or for all parameters. ``` r head(summary(estimates, type = "parameters", params = "R")) #> date variable strat type median mean sd lower_90 -#> 1: 2020-02-22 R NA estimate 2.106722 2.110304 0.13394453 1.893217 -#> 2: 2020-02-23 R NA estimate 2.078456 2.079754 0.11199646 1.895084 -#> 3: 2020-02-24 R NA estimate 2.047571 2.047837 0.09415931 1.894614 -#> 4: 2020-02-25 R NA estimate 2.013123 2.014628 0.08001628 1.881882 -#> 5: 2020-02-26 R NA estimate 1.979236 1.980256 0.06909073 1.866860 -#> 6: 2020-02-27 R NA estimate 1.945204 1.944881 0.06089018 1.845480 +#> 1: 2020-02-22 R NA estimate 2.140044 2.142893 0.13818099 1.937615 +#> 2: 2020-02-23 R NA estimate 2.105628 2.106892 0.11415164 1.936612 +#> 3: 2020-02-24 R NA estimate 2.068985 2.069442 0.09420757 1.921287 +#> 4: 2020-02-25 R NA estimate 2.031434 2.030725 0.07830576 1.907767 +#> 5: 2020-02-26 R NA estimate 1.991226 1.990969 0.06634858 1.884688 +#> 6: 2020-02-27 R NA estimate 1.950962 1.950427 0.05807390 1.856440 #> lower_50 lower_20 upper_20 upper_50 upper_90 -#> 1: 2.017975 2.075736 2.143921 2.199814 2.331646 -#> 2: 2.003099 2.051668 2.108079 2.154005 2.260400 -#> 3: 1.983107 2.022437 2.070778 2.109637 2.199787 -#> 4: 1.960463 1.993953 2.033858 2.068386 2.143568 -#> 5: 1.933872 1.962735 1.997947 2.025679 2.091690 -#> 6: 1.903834 1.930841 1.960120 1.983832 2.045497 +#> 1: 2.046299 2.104219 2.174057 2.232616 2.370781 +#> 2: 2.025782 2.075403 2.132810 2.182697 2.294095 +#> 3: 2.003747 2.044222 2.090967 2.131610 2.225019 +#> 4: 1.977390 2.010528 2.048636 2.082264 2.163819 +#> 5: 1.944677 1.974170 2.008207 2.035011 2.103163 +#> 6: 1.910567 1.935790 1.965265 1.988672 2.046538 ``` Reported cases are returned in a separate data frame in order to @@ -293,19 +293,19 @@ streamline the reporting of forecasts and for model evaluation. ``` r head(summary(estimates, output = "estimated_reported_cases")) #> date type median mean sd lower_90 lower_50 lower_20 -#> 1: 2020-02-22 gp_rt 50 51.1930 13.63311 30 42 47 -#> 2: 2020-02-23 gp_rt 68 70.0595 17.77081 44 58 64 -#> 3: 2020-02-24 gp_rt 76 77.0875 18.71673 49 64 71 -#> 4: 2020-02-25 gp_rt 77 78.7475 19.59236 50 65 73 -#> 5: 2020-02-26 gp_rt 85 86.1595 20.75091 55 72 80 -#> 6: 2020-02-27 gp_rt 118 120.7215 29.62068 77 100 111 +#> 1: 2020-02-22 gp_rt 65.5 67.2870 18.83096 40 54 61 +#> 2: 2020-02-23 gp_rt 78.0 78.8395 21.73755 47 63 72 +#> 3: 2020-02-24 gp_rt 77.0 78.8920 21.59142 47 64 72 +#> 4: 2020-02-25 gp_rt 73.0 75.0705 20.82804 45 61 68 +#> 5: 2020-02-26 gp_rt 78.0 79.8325 22.03166 47 65 73 +#> 6: 2020-02-27 gp_rt 110.0 112.9160 28.92359 71 92 103 #> upper_20 upper_50 upper_90 -#> 1: 54 60 76.00 -#> 2: 73 80 100.05 -#> 3: 80 88 109.05 -#> 4: 82 91 114.00 -#> 5: 90 99 123.00 -#> 6: 125 138 173.00 +#> 1: 70 79.00 101 +#> 2: 83 92.00 117 +#> 3: 82 92.00 116 +#> 4: 78 87.00 115 +#> 5: 83 91.25 120 +#> 6: 118 130.00 165 ``` A range of plots are returned (with the single summary plot shown @@ -316,7 +316,7 @@ method. plot(estimates) ``` -![](man/figures/unnamed-chunk-14-1.png) +![](man/figures/unnamed-chunk-15-1.png) ### [regional_epinow()](https://epiforecasts.io/EpiNow2/reference/regional_epinow.html) @@ -348,25 +348,25 @@ us piecewise constant estimates by week. ``` r estimates <- regional_epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.2), rw = 7), gp = NULL, stan = stan_opts(cores = 4, warmup = 250, samples = 1000) ) -#> INFO [2023-04-28 11:43:24] Producing following optional outputs: regions, summary, samples, plots, latest -#> INFO [2023-04-28 11:43:24] Reporting estimates using data up to: 2020-04-21 -#> INFO [2023-04-28 11:43:24] No target directory specified so returning output -#> INFO [2023-04-28 11:43:24] Producing estimates for: testland, realland -#> INFO [2023-04-28 11:43:24] Regions excluded: none -#> INFO [2023-04-28 11:43:52] Completed estimates for: testland -#> INFO [2023-04-28 11:44:20] Completed estimates for: realland -#> INFO [2023-04-28 11:44:20] Completed regional estimates -#> INFO [2023-04-28 11:44:20] Regions with estimates: 2 -#> INFO [2023-04-28 11:44:20] Regions with runtime errors: 0 -#> INFO [2023-04-28 11:44:20] Producing summary -#> INFO [2023-04-28 11:44:20] No summary directory specified so returning summary output -#> INFO [2023-04-28 11:44:21] No target directory specified so returning timings +#> INFO [2023-06-09 13:52:11] Producing following optional outputs: regions, summary, samples, plots, latest +#> INFO [2023-06-09 13:52:11] Reporting estimates using data up to: 2020-04-21 +#> INFO [2023-06-09 13:52:11] No target directory specified so returning output +#> INFO [2023-06-09 13:52:11] Producing estimates for: testland, realland +#> INFO [2023-06-09 13:52:11] Regions excluded: none +#> INFO [2023-06-09 13:52:40] Completed estimates for: testland +#> INFO [2023-06-09 13:53:07] Completed estimates for: realland +#> INFO [2023-06-09 13:53:07] Completed regional estimates +#> INFO [2023-06-09 13:53:07] Regions with estimates: 2 +#> INFO [2023-06-09 13:53:07] Regions with runtime errors: 0 +#> INFO [2023-06-09 13:53:07] Producing summary +#> INFO [2023-06-09 13:53:07] No summary directory specified so returning summary output +#> INFO [2023-06-09 13:53:08] No target directory specified so returning timings ``` Results from each region are stored in a `regional` list with across @@ -389,10 +389,10 @@ output. knitr::kable(estimates$summary$summarised_results$table) ``` -| Region | New confirmed cases by infection date | Expected change in daily cases | Effective reproduction no. | Rate of growth | Doubling/halving time (days) | -|:---------|:--------------------------------------|:-------------------------------|:---------------------------|:----------------------|:-----------------------------| -| realland | 2116 (1159 – 3665) | Likely decreasing | 0.86 (0.61 – 1.2) | -0.04 (-0.11 – 0.056) | -17 (12 – -6) | -| testland | 2069 (1141 – 3628) | Likely decreasing | 0.86 (0.6 – 1.1) | -0.039 (-0.12 – 0.04) | -18 (17 – -5.9) | +| Region | New confirmed cases by infection date | Expected change in daily cases | Effective reproduction no. | Rate of growth | Doubling/halving time (days) | +|:---------|:--------------------------------------|:-------------------------------|:---------------------------|:------------------------|:-----------------------------| +| realland | 2176 (1192 – 4065) | Likely decreasing | 0.87 (0.65 – 1.1) | -0.032 (-0.096 – 0.03) | -22 (23 – -7.2) | +| testland | 2217 (1150 – 4155) | Likely decreasing | 0.87 (0.64 – 1.2) | -0.031 (-0.099 – 0.036) | -23 (19 – -7) | A range of plots are again returned (with the single summary plot shown below). @@ -401,7 +401,7 @@ below). estimates$summary$summary_plot ``` -![](man/figures/unnamed-chunk-18-1.png) +![](man/figures/unnamed-chunk-19-1.png) ### Reporting templates diff --git a/inst/dev/recover-synthetic/rt.R b/inst/dev/recover-synthetic/rt.R index e51f9a8c3..9ab84a919 100644 --- a/inst/dev/recover-synthetic/rt.R +++ b/inst/dev/recover-synthetic/rt.R @@ -10,7 +10,7 @@ options(mc.cores = 4) generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani") # set delays between infection and case report incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer") -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 15 ) @@ -20,8 +20,8 @@ obs <- obs_opts(scale = list(mean = 0.1, sd = 0.025), return_likelihood = TRUE) # fit model to data to recover realistic parameter estimates and define settings # shared simulation settings init <- estimate_infections(example_confirmed[1:100], - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1), rw = 14), gp = NULL, horizon = 0, obs = obs @@ -63,8 +63,8 @@ for (method in c("nuts")) { # GP gp[[method]] <- estimate_infections(sim_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.25)), stan = stanopts, obs = obs, @@ -78,8 +78,8 @@ for (method in c("nuts")) { # Backcalculation backcalc[[method]] <- estimate_infections(sim_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = NULL, stan = stanopts, obs = obs, @@ -93,8 +93,8 @@ for (method in c("nuts")) { # RW (weekly) weekly_rw[[method]] <- estimate_infections(sim_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts( prior = list(mean = 2, sd = 0.25), rw = 7 @@ -112,8 +112,8 @@ for (method in c("nuts")) { # RW (every month) + stationary Guassian process gp_rw[[method]] <- estimate_infections(sim_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts( prior = list(mean = 2, sd = 0.25), rw = 14, gp_on = "R0" ), @@ -131,8 +131,8 @@ for (method in c("nuts")) { if (fit_daily) { daily_rw[[method]] <- estimate_infections(sim_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts( prior = list(mean = 2, sd = 0.25), rw = 1 diff --git a/inst/stan/data/delays.stan b/inst/stan/data/delays.stan index a11b51b6c..fb78f0454 100644 --- a/inst/stan/data/delays.stan +++ b/inst/stan/data/delays.stan @@ -1,14 +1,20 @@ - int delays; // no. of delay distributions - int n_uncertain_mean_delays; // no. of delay distributions with uncertain mean - int n_uncertain_sd_delays; // no. of delay distributions with uncertain sd - int n_fixed_delays; // no. of delay distributions with uncertain sd - // indices of delay distributions with uncertainty - int uncertain_mean_delays[n_uncertain_mean_delays]; - int uncertain_sd_delays[n_uncertain_sd_delays]; - int fixed_delays[n_fixed_delays]; - real delay_mean_sd[delays]; // prior sd of mean incubation period - real delay_mean_mean[delays];// prior mean of mean incubation period - real delay_sd_mean[delays]; // prior sd of sd of incubation period - real delay_sd_sd[delays]; // prior sd of sd of incubation period - int delay_max[delays]; // maximum incubation period - int delay_dist[delays]; // 0 = lognormal; 1 = gamma + int delay_n; // number of delay distributions + int delay_n_p; // number of parametric delay distributions + int delay_n_np; // number of nonparametric delay distributions + real delay_mean_mean[delay_n_p]; // prior mean of mean delay distribution + real delay_mean_sd[delay_n_p]; // prior sd of mean delay distribution + real delay_sd_mean[delay_n_p]; // prior sd of sd of delay distribution + real delay_sd_sd[delay_n_p]; // prior sd of sd of delay distribution + int delay_max[delay_n_p]; // maximum delay distribution + int delay_dist[delay_n_p]; // 0 = lognormal; 1 = gamma + int delay_np_pmf_max; // number of nonparametric pmf elements + vector[delay_np_pmf_max] delay_np_pmf; // ragged array of fixed PMFs + int delay_np_pmf_groups[delay_n_np + 1]; // links to ragged array + int delay_weight[delay_n_p]; + + int delay_types; // number of delay types + int delay_types_p[delay_n]; // whether delay types are parametric + int delay_types_id[delay_n]; // whether delay types are parametric + int delay_types_groups[delay_types + 1]; // index of each delay (parametric or non) + + int delay_id; // id of generation time diff --git a/inst/stan/data/observation_model.stan b/inst/stan/data/observation_model.stan index f048292c7..f655552a0 100644 --- a/inst/stan/data/observation_model.stan +++ b/inst/stan/data/observation_model.stan @@ -1,19 +1,12 @@ int day_of_week[t - seeding_time]; // day of the week indicator (1 - 7) int model_type; // type of model: 0 = poisson otherwise negative binomial - real phi_mean; // Mean and sd of the normal prior for the - real phi_sd; // reporting process + real phi_mean; // Mean and sd of the normal prior for the + real phi_sd; // reporting process int week_effect; // length of week effect - int truncation; // 1/0 indicating if truncation should be adjusted for - real trunc_mean_mean[truncation]; // truncation mean of mean - real trunc_mean_sd[truncation]; // truncation sd of mean - real trunc_sd_mean[truncation]; // truncation mean of sd - real trunc_sd_sd[truncation]; // truncation sd of sd - int trunc_max[truncation]; // maximum truncation supported - int trunc_fixed[truncation]; // whether the truncation distribution is fixed - int trunc_dist[truncation]; // 0 = lognormal; 1 = gamma int obs_scale; // logical controlling scaling of observations real obs_scale_mean; // mean scaling factor for observations real obs_scale_sd; // standard deviation of observation scaling real obs_weight; // weight given to observation in log density int likelihood; // Should the likelihood be included in the model int return_likelihood; // Should the likehood be returned by the model + int trunc_id; // id of truncation diff --git a/inst/stan/data/rt.stan b/inst/stan/data/rt.stan index 9914b6872..0b00c2af6 100644 --- a/inst/stan/data/rt.stan +++ b/inst/stan/data/rt.stan @@ -8,3 +8,4 @@ int future_fixed; // is underlying future Rt assumed to be fixed int fixed_from; // Reference date for when Rt estimation should be fixed int pop; // Initial susceptible population + int gt_id; // id of generation time diff --git a/inst/stan/data/simulation_delays.stan b/inst/stan/data/simulation_delays.stan index 8bc833886..d2af64f18 100644 --- a/inst/stan/data/simulation_delays.stan +++ b/inst/stan/data/simulation_delays.stan @@ -1,5 +1,18 @@ - int delays; // no. of delay distributions - real delay_mean[n, delays]; // mean of delays - real delay_sd[n, delays]; // sd of delays - int delay_max[delays]; // maximum delay - int delay_dist[delays]; // 0 = lognormal, 1 = gamma + int delay_n; // number of delay distribution distributions + int delay_n_p; // number of parametric delay distributions + int delay_n_np; // number of nonparametric delay distributions + real delay_mean[n, delay_n_p]; // prior mean of mean delay distribution + real delay_sd[n, delay_n_p]; // prior sd of sd of delay distribution + int delay_max[delay_n_p]; // maximum delay distribution + int delay_dist[delay_n_p]; // 0 = lognormal; 1 = gamma + int delay_np_pmf_max; // number of nonparametric pmf elements + vector[delay_np_pmf_max] delay_np_pmf; // ragged array of fixed PMFs + int delay_np_pmf_groups[delay_n_np + 1]; // links to ragged array + int delay_weight[delay_n_p]; + + int delay_types; // number of delay types + int delay_types_p[delay_n]; // whether delay types are parametric + int delay_types_id[delay_n]; // whether delay types are parametric + int delay_types_groups[delay_types + 1]; // index of each delay (parametric or non) + + int delay_id; // id of generation time diff --git a/inst/stan/data/simulation_observation_model.stan b/inst/stan/data/simulation_observation_model.stan index 10ab8b2d4..a42f6529f 100644 --- a/inst/stan/data/simulation_observation_model.stan +++ b/inst/stan/data/simulation_observation_model.stan @@ -5,3 +5,4 @@ real frac_obs[n, obs_scale]; int model_type; real rep_phi[n, model_type]; // overdispersion of the reporting process + int trunc_id; // id of truncation diff --git a/inst/stan/data/simulation_rt.stan b/inst/stan/data/simulation_rt.stan index 9fcfeb06b..93b1206a1 100644 --- a/inst/stan/data/simulation_rt.stan +++ b/inst/stan/data/simulation_rt.stan @@ -1,8 +1,7 @@ real initial_infections[seeding_time ? n : 0, 1]; // initial logged infections real initial_growth[seeding_time > 1 ? n : 0, 1]; //initial growth - real gt_mean[n, 1]; // mean of generation time - real gt_sd[n, 1]; // sd of generation time - int gt_max[1]; // maximum generation time - int gt_dist[1]; // 0 = lognormal; 1 = gamma + matrix[n, t - seeding_time] R; // reproduction number int pop; // susceptible population + + int gt_id; // id of generation time diff --git a/inst/stan/estimate_infections.stan b/inst/stan/estimate_infections.stan index 16b9fa91e..5b24b6b05 100644 --- a/inst/stan/estimate_infections.stan +++ b/inst/stan/estimate_infections.stan @@ -1,6 +1,7 @@ functions { #include functions/convolve.stan #include functions/pmfs.stan +#include functions/delays.stan #include functions/gaussian_process.stan #include functions/rt.stan #include functions/infections.stan @@ -13,7 +14,6 @@ data { #include data/observations.stan #include data/delays.stan #include data/gaussian_process.stan -#include data/generation_time.stan #include data/rt.stan #include data/backcalc.stan #include data/observation_model.stan @@ -30,29 +30,10 @@ transformed data{ real r_logmean = log(r_mean^2 / sqrt(r_sd^2 + r_mean^2)); real r_logsd = sqrt(log(1 + (r_sd^2 / r_mean^2))); - int delay_max_fixed = (n_fixed_delays == 0 ? 0 : - sum(delay_max[fixed_delays]) - num_elements(fixed_delays) + 1); - int delay_max_total = (delays == 0 ? 0 : - sum(delay_max) - num_elements(delay_max) + 1); - vector[gt_fixed[1] ? gt_max[1] : 0] gt_fixed_pmf; - vector[truncation && trunc_fixed[1] ? trunc_max[1] : 0] trunc_fixed_pmf; - vector[delay_max_fixed] delays_fixed_pmf; - - if (gt_fixed[1]) { - gt_fixed_pmf = discretised_pmf(gt_mean_mean[1], gt_sd_mean[1], gt_max[1], gt_dist[1], 1); - } - if (truncation && trunc_fixed[1]) { - trunc_fixed_pmf = discretised_pmf( - trunc_mean_mean[1], trunc_sd_mean[1], trunc_max[1], trunc_dist[1], 0 - ); - } - if (n_fixed_delays) { - delays_fixed_pmf = combine_pmfs( - to_vector([ 1 ]), delay_mean_mean[fixed_delays], - delay_sd_mean[fixed_delays], delay_max[fixed_delays], - delay_dist[fixed_delays], delay_max_fixed, 0, 0 - ); - } + int delay_type_max[delay_types] = get_delay_type_max( + delay_types, delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf_groups + ); } parameters{ @@ -64,17 +45,13 @@ parameters{ vector[estimate_r] log_R; // baseline reproduction number estimate (log) real initial_infections[estimate_r] ; // seed infections real initial_growth[estimate_r && seeding_time > 1 ? 1 : 0]; // seed growth rate - real gt_mean[estimate_r && gt_mean_sd[1] > 0]; // mean of generation time (if uncertain) - real gt_sd[estimate_r && gt_sd_sd[1] > 0]; // sd of generation time (if uncertain) real bp_sd[bp_n > 0 ? 1 : 0]; // standard deviation of breakpoint effect real bp_effects[bp_n]; // Rt breakpoint effects // observation model - real delay_mean[n_uncertain_mean_delays]; // mean of delays - real delay_sd[n_uncertain_sd_delays]; // sd of delays + real delay_mean[delay_n_p]; // mean of delays + real delay_sd[delay_n_p]; // sd of delays simplex[week_effect] day_of_week_simplex;// day of week reporting effect real frac_obs[obs_scale]; // fraction of cases that are ultimately observed - real trunc_mean[truncation && !trunc_fixed[1]]; // mean of truncation - real trunc_sd[truncation && !trunc_fixed[1]]; // sd of truncation real rep_phi[model_type]; // overdispersion of the reporting process } @@ -84,16 +61,18 @@ transformed parameters { vector[t] infections; // latent infections vector[ot_h] reports; // estimated reported cases vector[ot] obs_reports; // observed estimated reported cases + vector[delay_type_max[gt_id]] gt_rev_pmf; // GP in noise - spectral densities if (!fixed) { noise = update_gp(PHI, M, L, alpha[1], rho[1], eta, gp_type); } // Estimate latent infections if (estimate_r) { - // via Rt - vector[gt_max[1]] gt_rev_pmf; - gt_rev_pmf = combine_pmfs( - gt_fixed_pmf, gt_mean, gt_sd, gt_max, gt_dist, gt_max[1], 1, 1 + gt_rev_pmf = get_delay_rev_pmf( + gt_id, delay_type_max[gt_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + 1, 1, 0 ); R = update_Rt( ot_h, log_R[estimate_r], noise, breakpoints, bp_effects, stationary @@ -109,15 +88,19 @@ transformed parameters { ); } // convolve from latent infections to mean of observations - { - vector[delay_max_total] delay_rev_pmf; - delay_rev_pmf = combine_pmfs( - delays_fixed_pmf, delay_mean, delay_sd, delay_max, delay_dist, delay_max_total, 0, 1 + if (delay_id) { + vector[delay_type_max[delay_id]] delay_rev_pmf = get_delay_rev_pmf( + delay_id, delay_type_max[delay_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + 0, 1, 0 ); reports = convolve_to_report(infections, delay_rev_pmf, seeding_time); + } else { + reports = infections[(seeding_time + 1):t]; } - // weekly reporting effect - if (week_effect > 1) { + // weekly reporting effect + if (week_effect > 1) { reports = day_of_week_effect(reports, day_of_week, day_of_week_simplex); } // scaling of reported cases by fraction observed @@ -125,12 +108,14 @@ transformed parameters { reports = scale_obs(reports, frac_obs[1]); } // truncate near time cases to observed reports - if (truncation) { - vector[trunc_max[1]] trunc_rev_cmf; - trunc_rev_cmf = reverse_mf(cumulative_sum(combine_pmfs( - trunc_fixed_pmf, trunc_mean, trunc_sd, trunc_max, trunc_dist, trunc_max[1], 0, 0 - ))); - obs_reports = truncate(reports[1:ot], trunc_rev_cmf, 0); + if (trunc_id) { + vector[delay_type_max[trunc_id]] trunc_rev_cmf = get_delay_rev_pmf( + trunc_id, delay_type_max[trunc_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + 0, 1, 1 + ); + obs_reports = truncate(reports[1:ot], trunc_rev_cmf, 0); } else { obs_reports = reports[1:ot]; } @@ -145,17 +130,9 @@ model { } // penalised priors for delay distributions delays_lp( - delay_mean, delay_mean_mean[uncertain_mean_delays], - delay_mean_sd[uncertain_mean_delays], - delay_sd, delay_sd_mean[uncertain_sd_delays], - delay_sd_sd[uncertain_sd_delays], delay_dist[uncertain_mean_delays], t - ); - // priors for truncation - delays_lp( - trunc_mean, trunc_sd, - trunc_mean_mean, trunc_mean_sd, - trunc_sd_mean, trunc_sd_sd, - trunc_dist, 1 + delay_mean, delay_mean_mean, + delay_mean_sd, delay_sd, delay_sd_mean, delay_sd_sd, + delay_dist, delay_weight ); if (estimate_r) { // priors on Rt @@ -163,10 +140,6 @@ model { log_R, initial_infections, initial_growth, bp_effects, bp_sd, bp_n, seeding_time, r_logmean, r_logsd, prior_infections, prior_growth ); - // penalised_prior on generation interval - delays_lp( - gt_mean, gt_mean_mean, gt_mean_sd, gt_sd, gt_sd_mean, gt_sd_sd, gt_dist, gt_weight - ); } // prior observation scaling if (obs_scale) { @@ -184,31 +157,34 @@ generated quantities { int imputed_reports[ot_h]; vector[estimate_r > 0 ? 0: ot_h] gen_R; real r[ot_h]; + real gt_mean; + real gt_var; vector[return_likelihood ? ot : 0] log_lik; if (estimate_r){ // estimate growth from estimated Rt - real set_gt_mean = (gt_mean_sd[1] > 0 ? gt_mean[1] : gt_mean_mean[1]); - real set_gt_sd = (gt_sd_sd [1]> 0 ? gt_sd[1] : gt_sd_mean[1]); - r = R_to_growth(R, set_gt_mean, set_gt_sd); + gt_mean = rev_pmf_mean(gt_rev_pmf, 1); + gt_var = rev_pmf_var(gt_rev_pmf, 1, gt_mean); + r = R_to_growth(R, gt_mean, gt_var); } else { // sample generation time - real gt_mean_sample[1]; - real gt_sd_sample[1]; - vector[gt_max[1]] gt_rev_pmf; - - gt_mean_sample[1] = (gt_mean_sd[1] > 0 ? normal_rng(gt_mean_mean[1], gt_mean_sd[1]) : gt_mean_mean[1]); - gt_sd_sample[1] = (gt_sd_sd[1] > 0 ? normal_rng(gt_sd_mean[1], gt_sd_sd[1]) : gt_sd_mean[1]); - gt_rev_pmf = combine_pmfs( - gt_fixed_pmf, gt_mean_sample, gt_sd_sample, gt_max, gt_dist, gt_max[1], - 1, 1 + real delay_mean_sample[delay_n_p] = + normal_rng(delay_mean_mean, delay_mean_sd); + real delay_sd_sample[delay_n_p] = + normal_rng(delay_sd_mean, delay_sd_sd); + vector[delay_type_max[gt_id]] sampled_gt_rev_pmf = get_delay_rev_pmf( + gt_id, delay_type_max[gt_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean_sample, delay_sd_sample, + delay_dist, 1, 1, 0 ); - + gt_mean = rev_pmf_mean(sampled_gt_rev_pmf, 1); + gt_var = rev_pmf_var(sampled_gt_rev_pmf, 1, gt_mean); // calculate Rt using infections and generation time gen_R = calculate_Rt( - infections, seeding_time, gt_rev_pmf, rt_half_window + infections, seeding_time, sampled_gt_rev_pmf, rt_half_window ); // estimate growth from calculated Rt - r = R_to_growth(gen_R, gt_mean_sample[1], gt_sd_sample[1]); + r = R_to_growth(gen_R, gt_mean, gt_var); } // simulate reported cases imputed_reports = report_rng(reports, rep_phi, model_type); diff --git a/inst/stan/estimate_secondary.stan b/inst/stan/estimate_secondary.stan index 192cee0ad..0b4770a3e 100644 --- a/inst/stan/estimate_secondary.stan +++ b/inst/stan/estimate_secondary.stan @@ -1,6 +1,7 @@ functions { #include functions/convolve.stan #include functions/pmfs.stan +#include functions/delays.stan #include functions/observation_model.stan #include functions/secondary.stan } @@ -15,36 +16,19 @@ data { #include data/observation_model.stan } -transformed data { - int delay_max_fixed = (n_fixed_delays == 0 ? 0 : - sum(delay_max[fixed_delays]) - num_elements(fixed_delays) + 1); - int delay_max_total = (delays == 0 ? 0 : - sum(delay_max) - num_elements(delay_max) + 1); - vector[truncation && trunc_fixed[1] ? trunc_max[1] : 0] trunc_fixed_pmf; - vector[delay_max_fixed] delays_fixed_pmf; - - if (truncation && trunc_fixed[1]) { - trunc_fixed_pmf = discretised_pmf( - trunc_mean_mean[1], trunc_sd_mean[1], trunc_max[1], trunc_dist[1], 0 - ); - } - if (n_fixed_delays) { - delays_fixed_pmf = combine_pmfs( - to_vector([ 1 ]), delay_mean_mean[fixed_delays], - delay_sd_mean[fixed_delays], delay_max[fixed_delays], - delay_dist[fixed_delays], delay_max_fixed, 0, 0 - ); - } +transformed data{ + int delay_type_max[delay_types] = get_delay_type_max( + delay_types, delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf_groups + ); } parameters{ // observation model - real delay_mean[n_uncertain_mean_delays]; - real delay_sd[n_uncertain_sd_delays]; // sd of delays + real delay_mean[delay_n_p]; + real delay_sd[delay_n_p]; // sd of delays simplex[week_effect] day_of_week_simplex; // day of week reporting effect real frac_obs[obs_scale]; // fraction of cases that are ultimately observed - real trunc_mean[truncation && !trunc_fixed[1]]; // mean of truncation - real trunc_sd[truncation && !trunc_fixed[1]]; // sd of truncation real rep_phi[model_type]; // overdispersion of the reporting process } @@ -53,10 +37,17 @@ transformed parameters { // calculate secondary reports from primary { - vector[delay_max_total] delay_rev_pmf; - delay_rev_pmf = combine_pmfs( - delays_fixed_pmf, delay_mean, delay_sd, delay_max, delay_dist, delay_max_total, 0, 1 - ); + vector[delay_type_max[delay_id]] delay_rev_pmf; + if (delay_id) { + delay_rev_pmf = get_delay_rev_pmf( + delay_id, delay_type_max[delay_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + 0, 1, 0 + ); + } else { + delay_rev_pmf = to_vector({ 1 }); + } secondary = calculate_secondary( primary, obs, frac_obs, delay_rev_pmf, cumulative, historic, primary_hist_additive, current, primary_current_additive, t @@ -68,27 +59,24 @@ transformed parameters { secondary = day_of_week_effect(secondary, day_of_week, day_of_week_simplex); } // truncate near time cases to observed reports - if (truncation) { - vector[trunc_max[1]] trunc_rev_cmf; - trunc_rev_cmf = reverse_mf(cumulative_sum(combine_pmfs( - trunc_fixed_pmf, trunc_mean, trunc_sd, trunc_max, trunc_dist, trunc_max[1], 0, 0 - ))); - secondary = truncate(secondary, trunc_rev_cmf, 0); + if (trunc_id) { + vector[delay_type_max[trunc_id]] trunc_rev_cmf = get_delay_rev_pmf( + trunc_id, delay_type_max[trunc_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean, delay_sd, delay_dist, + 0, 1, 1 + ); + secondary = truncate(secondary, trunc_rev_cmf, 0); } } model { // penalised priors for delay distributions delays_lp( - delay_mean, delay_mean_mean[uncertain_mean_delays], - delay_mean_sd[uncertain_mean_delays], - delay_sd, delay_sd_mean[uncertain_sd_delays], - delay_sd_sd[uncertain_sd_delays], delay_dist[uncertain_mean_delays], t + delay_mean, delay_mean_mean, delay_mean_sd, delay_sd, delay_sd_mean, + delay_sd_sd, delay_dist, delay_weight ); - // priors for truncation - delays_lp(trunc_mean, trunc_sd, trunc_mean_mean, trunc_mean_sd, - trunc_sd_mean, trunc_sd_sd, trunc_dist, 1); // prior primary report scaling if (obs_scale) { frac_obs[1] ~ normal(obs_scale_mean, obs_scale_sd) T[0, 1]; diff --git a/inst/stan/estimate_truncation.stan b/inst/stan/estimate_truncation.stan index 47c6ee333..226b7fd81 100644 --- a/inst/stan/estimate_truncation.stan +++ b/inst/stan/estimate_truncation.stan @@ -21,7 +21,7 @@ transformed parameters{ matrix[trunc_max, obs_sets - 1] trunc_obs; real sqrt_phi = 1 / sqrt(phi); vector[trunc_max] rev_cmf = reverse_mf(cumulative_sum( - discretised_pmf(logmean, logsd, trunc_max, trunc_dist, 0) + discretised_pmf(logmean, logsd, trunc_max, trunc_dist) )); { vector[t] last_obs; diff --git a/inst/stan/functions/convolve.stan b/inst/stan/functions/convolve.stan index 7da2cbeb0..9f9a719be 100644 --- a/inst/stan/functions/convolve.stan +++ b/inst/stan/functions/convolve.stan @@ -1,29 +1,18 @@ -// convolve two vectors -// length of x -// produces a convolution across the lenght specified -vector convolve(vector x, vector y, int len) { - int xlen = num_elements(x); - int ylen = num_elements(y); - vector[len] convolution = rep_vector(0, len); - for (i in 1:xlen) { - for (j in 1:(min(len - i + 1, ylen))) { - convolution[i + j - 1] += x[i] * y[j]; - } - } - return(convolution); -} - // convolve two vectors as a backwards dot product -// y vector shoud be reversed +// y vector should be reversed // limited to the length of x and backwards looking for x indexes -// designed for use convolve a case vector and a delay pmf -vector convolve_dot_product(vector x, vector y, int len) { +vector convolve_with_rev_pmf(vector x, vector y, int len) { + int xlen = num_elements(x); int ylen = num_elements(y); vector[len] z; + if (xlen + ylen <= len) { + reject("convolve_with_rev_pmf: len is longer then x and y combined"); + } for (s in 1:len) { - z[s] = dot_product( - x[max(1, (s - ylen + 1)):s], tail(y, min(ylen, s)) - ); + z[s] = dot_product( + x[max(1, (s - ylen + 1)):min(s, xlen)], + y[max(1, ylen - s + 1):min(ylen, ylen + xlen - s)] + ); } return(z); } @@ -38,7 +27,7 @@ vector convolve_to_report(vector infections, vector[t] unobs_reports = infections; int delays = num_elements(delay_rev_pmf); if (delays) { - unobs_reports = convolve_dot_product(unobs_reports, delay_rev_pmf, t); + unobs_reports = convolve_with_rev_pmf(unobs_reports, delay_rev_pmf, t); reports = unobs_reports[(seeding_time + 1):t]; } else { reports = infections[(seeding_time + 1):t]; diff --git a/inst/stan/functions/delays.stan b/inst/stan/functions/delays.stan new file mode 100644 index 000000000..69a88ba17 --- /dev/null +++ b/inst/stan/functions/delays.stan @@ -0,0 +1,104 @@ +int[] get_delay_type_max( + int delay_types, int[] delay_types_p, int[] delay_types_id, + int[] delay_types_groups, int[] delay_max, int[] delay_np_pmf_groups +) { + int ret[delay_types]; + for (i in 1:delay_types) { + ret[i] = 1; + for (j in delay_types_groups[i]:(delay_types_groups[i + 1] - 1)) { + if (delay_types_p[j]) { // parametric + ret[i] += delay_max[delay_types_id[j]] - 1; + } else { // nonparametric + ret[i] += delay_np_pmf_groups[delay_types_id[j] + 1] - + delay_np_pmf_groups[delay_types_id[j]] - 1; + } + } + } + return ret; +} + +vector get_delay_rev_pmf( + int delay_id, int len, int[] delay_types_p, int[] delay_types_id, + int[] delay_types_groups, int[] delay_max, + vector delay_np_pmf, int[] delay_np_pmf_groups, + real[] delay_mean, real[] delay_sigma, int[] delay_dist, + int left_truncate, int reverse_pmf, int cumulative +) { + // loop over delays + vector[len] pmf = rep_vector(0, len); + int current_len = 1; + int new_len; + for (i in delay_types_groups[delay_id]:(delay_types_groups[delay_id + 1] - 1)) { + if (delay_types_p[i]) { // parametric + vector[delay_max[delay_types_id[i]]] new_variable_pmf = + discretised_pmf( + delay_mean[delay_types_id[i]], + delay_sigma[delay_types_id[i]], + delay_max[delay_types_id[i]], + delay_dist[delay_types_id[i]] + ); + new_len = current_len + delay_max[delay_types_id[i]] - 1; + if (current_len == 1) { // first delay + pmf[1:new_len] = new_variable_pmf; + } else { // subsequent delay to be convolved + pmf[1:new_len] = convolve_with_rev_pmf( + pmf[1:current_len], reverse_mf(new_variable_pmf), new_len + ); + } + } else { // nonparametric + int start = delay_np_pmf_groups[delay_types_id[i]]; + int end = delay_np_pmf_groups[delay_types_id[i] + 1] - 1; + new_len = current_len + end - start; + if (current_len == 1) { // first delay + pmf[1:new_len] = delay_np_pmf[start:end]; + } else { // subsequent delay to be convolved + pmf[1:new_len] = convolve_with_rev_pmf( + pmf[1:current_len], reverse_mf(delay_np_pmf[start:end]), new_len + ); + } + } + current_len = new_len; + } + if (left_truncate) { + pmf = append_row( + rep_vector(0, left_truncate), + pmf[(left_truncate + 1):len] / sum(pmf[(left_truncate + 1):len]) + ); + } + if (cumulative) { + pmf = cumulative_sum(pmf); + } + if (reverse_pmf) { + pmf = reverse_mf(pmf); + } + return pmf; +} + + +void delays_lp(real[] delay_mean, real[] delay_mean_mean, real[] delay_mean_sd, + real[] delay_sd, real[] delay_sd_mean, real[] delay_sd_sd, + int[] delay_dist, int[] weight) { + int mean_delays = num_elements(delay_mean); + int sd_delays = num_elements(delay_sd); + if (mean_delays) { + for (s in 1:mean_delays) { + if (delay_mean_sd[s] > 0) { + // uncertain mean + target += normal_lpdf(delay_mean[s] | delay_mean_mean[s], delay_mean_sd[s]) * weight[s]; + // if a distribution with postive support only truncate the prior + if (delay_dist[s]) { + target += -normal_lccdf(0 | delay_mean_mean[s], delay_mean_sd[s]) * weight[s]; + } + } + } + } + if (sd_delays) { + for (s in 1:sd_delays) { + if (delay_sd_sd[s] > 0) { + // uncertain sd + target += normal_lpdf(delay_sd[s] | delay_sd_mean[s], delay_sd_sd[s]) * weight[s]; + target += -normal_lccdf(0 | delay_sd_mean[s], delay_sd_sd[s]) * weight[s]; + } + } + } +} diff --git a/inst/stan/functions/generated_quantities.stan b/inst/stan/functions/generated_quantities.stan index df78996bb..e2bc1d064 100644 --- a/inst/stan/functions/generated_quantities.stan +++ b/inst/stan/functions/generated_quantities.stan @@ -29,11 +29,11 @@ vector calculate_Rt(vector infections, int seeding_time, return(sR); } // Convert an estimate of Rt to growth -real[] R_to_growth(vector R, real gt_mean, real gt_sd) { +real[] R_to_growth(vector R, real gt_mean, real gt_var) { int t = num_elements(R); real r[t]; - if (gt_sd > 0) { - real k = pow(gt_sd / gt_mean, 2); + if (gt_var > 0) { + real k = gt_var / pow(gt_mean, 2); for (s in 1:t) { r[s] = (pow(R[s], k) - 1) / (k * gt_mean); } diff --git a/inst/stan/functions/infections.stan b/inst/stan/functions/infections.stan index 347522fb0..015d07168 100644 --- a/inst/stan/functions/infections.stan +++ b/inst/stan/functions/infections.stan @@ -6,11 +6,11 @@ real update_infectiousness(vector infections, vector gt_rev_pmf, // work out where to start the convolution of past infections with the // generation time distribution: (current_time - maximal generation time) if // that is >= 1, otherwise 1 - int inf_start = max(1, (index + seeding_time - gt_max)); - // work out where to end the convolution: (current_time - 1) - int inf_end = (index + seeding_time - 1); + int inf_start = max(1, (index + seeding_time - gt_max + 1)); + // work out where to end the convolution: current_time + int inf_end = (index + seeding_time); // number of indices of the generation time to sum over (inf_end - inf_start + 1) - int pmf_accessed = min(gt_max, index + seeding_time - 1); + int pmf_accessed = min(gt_max, index + seeding_time); // calculate the elements of the convolution real new_inf = dot_product( infections[inf_start:inf_end], tail(gt_rev_pmf, pmf_accessed) diff --git a/inst/stan/functions/pmfs.stan b/inst/stan/functions/pmfs.stan index e5511b597..9b077b28e 100644 --- a/inst/stan/functions/pmfs.stan +++ b/inst/stan/functions/pmfs.stan @@ -4,28 +4,28 @@ // Adapted from https://github.com/epiforecasts/epinowcast // @author Sam Abbott // @author Adrian Lison -vector discretised_pmf(real mu, real sigma, int n, int dist, - int left_truncate) { +vector discretised_pmf(real mu, real sigma, int n, int dist) { vector[n] pmf; if (sigma > 0) { - vector[n + 1] upper_cdf; + vector[n] upper_cdf; if (dist == 0) { - for (i in 1:(n + 1)) { - upper_cdf[i] = lognormal_cdf(i - 1 + left_truncate, mu, sigma); + for (i in 1:n) { + upper_cdf[i] = lognormal_cdf(i, mu, sigma); } } else if (dist == 1) { real alpha = mu^2 / sigma^2; real beta = mu / sigma^2; - for (i in 1:(n + 1)) { - upper_cdf[i] = gamma_cdf(i - 1 + left_truncate, alpha, beta); + for (i in 1:n) { + upper_cdf[i] = gamma_cdf(i, alpha, beta); } } else { reject("Unknown distribution function provided."); } // discretise - pmf = upper_cdf[2:(n + 1)] - upper_cdf[1:n]; + pmf[1] = upper_cdf[1]; + pmf[2:n] = upper_cdf[2:n] - upper_cdf[1:(n-1)]; // normalize - pmf = pmf / (upper_cdf[n + 1] - upper_cdf[1]); + pmf = pmf / upper_cdf[n]; } else { // delta function pmf = rep_vector(0, n); @@ -44,51 +44,25 @@ vector reverse_mf(vector pmf) { return rev_pmf; } -// combined fixed/variable pmfs -vector combine_pmfs(vector fixed_pmf, real[] pmf_mu, real[] pmf_sigma, int[] pmf_n, int[] dist, int len, int left_truncate, int reverse_pmf) { - int n_fixed = num_elements(fixed_pmf); - int n_variable = num_elements(pmf_mu); - vector[len] pmf = rep_vector(0, len); - if (n_fixed > 0) { - pmf[1:n_fixed] = fixed_pmf; - } else if (n_variable > 0) { - pmf[1] = 1; +vector rev_seq(int base, int len) { + vector[len] seq; + for (i in 1:len) { + seq[i] = len + base - i; } - for (s in 1:n_variable) { - vector[pmf_n[s]] variable_pmf; - variable_pmf = discretised_pmf(pmf_mu[s], pmf_sigma[s], pmf_n[s], dist[s], left_truncate); - pmf = convolve(pmf, variable_pmf, len); - } - if (reverse_pmf) { - pmf = reverse_mf(pmf); - } - return(pmf); + return(seq); } -void delays_lp(real[] delay_mean, real[] delay_mean_mean, real[] delay_mean_sd, - real[] delay_sd, real[] delay_sd_mean, real[] delay_sd_sd, - int[] delay_dist, int weight) { - int mean_delays = num_elements(delay_mean); - int sd_delays = num_elements(delay_sd); - if (mean_delays) { - for (s in 1:mean_delays) { - if (delay_mean_sd[s] > 0) { - // uncertain mean - target += normal_lpdf(delay_mean[s] | delay_mean_mean[s], delay_mean_sd[s]) * weight; - // if a distribution with postive support only truncate the prior - if (delay_dist[s]) { - target += -normal_lccdf(0 | delay_mean_mean[s], delay_mean_sd[s]) * weight; - } - } - } - } - if (sd_delays) { - for (s in 1:sd_delays) { - if (delay_sd_sd[s] > 0) { - // uncertain sd - target += normal_lpdf(delay_sd[s] | delay_sd_mean[s], delay_sd_sd[s]) * weight; - target += -normal_lccdf(0 | delay_sd_mean[s], delay_sd_sd[s]) * weight; - } - } +real rev_pmf_mean(vector rev_pmf, int base) { + int len = num_elements(rev_pmf); + vector[len] rev_pmf_seq = rev_seq(base, len); + return(dot_product(rev_pmf_seq, rev_pmf)); +} + +real rev_pmf_var(vector rev_pmf, int base, real mean) { + int len = num_elements(rev_pmf); + vector[len] rev_pmf_seq = rev_seq(base, len); + for (i in 1:len) { + rev_pmf_seq[i] = pow(rev_pmf_seq[i], 2); } + return(dot_product(rev_pmf_seq, rev_pmf) - pow(mean, 2)); } diff --git a/inst/stan/simulate_infections.stan b/inst/stan/simulate_infections.stan index 55b2faa94..a6befed21 100644 --- a/inst/stan/simulate_infections.stan +++ b/inst/stan/simulate_infections.stan @@ -1,6 +1,7 @@ functions { #include functions/convolve.stan #include functions/pmfs.stan +#include functions/delays.stan #include functions/gaussian_process.stan #include functions/rt.stan #include functions/infections.stan @@ -23,7 +24,10 @@ data { } transformed data { - int delay_max_total = sum(delay_max) - num_elements(delay_max) + 1; + int delay_type_max[delay_types] = get_delay_type_max( + delay_types, delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf_groups + ); } generated quantities { @@ -34,25 +38,34 @@ generated quantities { real r[n, t - seeding_time]; for (i in 1:n) { // generate infections from Rt trace - vector[gt_max[1]] gt_rev_pmf; - vector[delay_max_total] delay_rev_pmf; - - gt_rev_pmf = reverse_mf(discretised_pmf( - gt_mean[i, 1], gt_sd[i, 1], gt_max[1], gt_dist[1], 1 - )); - delay_rev_pmf = combine_pmfs( - to_vector([ 1 ]), delay_mean[i], delay_sd[i], delay_max, delay_dist, - delay_max_total, 0, 1 + vector[delay_type_max[gt_id]] gt_rev_pmf; + gt_rev_pmf = get_delay_rev_pmf( + gt_id, delay_type_max[gt_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + 1, 1, 0 ); infections[i] = to_row_vector(generate_infections( to_vector(R[i]), seeding_time, gt_rev_pmf, initial_infections[i], initial_growth[i], pop, future_time )); - // convolve from latent infections to mean of observations - reports[i] = to_row_vector(convolve_to_report( - to_vector(infections[i]), delay_rev_pmf, seeding_time) - ); + + if (delay_id) { + vector[delay_type_max[delay_id]] delay_rev_pmf = get_delay_rev_pmf( + delay_id, delay_type_max[delay_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + 0, 1, 0 + ); + // convolve from latent infections to mean of observations + reports[i] = to_row_vector(convolve_to_report( + to_vector(infections[i]), delay_rev_pmf, seeding_time) + ); + } else { + reports[i] = to_row_vector(infections[(seeding_time + 1):t]); + } + // weekly reporting effect if (week_effect > 1) { reports[i] = to_row_vector( @@ -63,10 +76,14 @@ generated quantities { if (obs_scale) { reports[i] = to_row_vector(scale_obs(to_vector(reports[i]), frac_obs[i, 1])); } - // simulate reported cases - imputed_reports[i] = report_rng( + // simulate reported cases + imputed_reports[i] = report_rng( to_vector(reports[i]), rep_phi[i], model_type ); - r[i] = R_to_growth(to_vector(R[i]), gt_mean[i, 1], gt_sd[i, 1]); + { + real gt_mean = rev_pmf_mean(gt_rev_pmf, 1); + real gt_var = rev_pmf_var(gt_rev_pmf, 1, gt_mean); + r[i] = R_to_growth(to_vector(R[i]), gt_mean, gt_var); + } } } diff --git a/inst/stan/simulate_secondary.stan b/inst/stan/simulate_secondary.stan index 1f0a46a6d..c27de4d3f 100644 --- a/inst/stan/simulate_secondary.stan +++ b/inst/stan/simulate_secondary.stan @@ -1,6 +1,7 @@ functions { #include functions/convolve.stan #include functions/pmfs.stan +#include functions/delays.stan #include functions/observation_model.stan #include functions/secondary.stan } @@ -22,17 +23,21 @@ data { } transformed data { - int delay_max_total = sum(delay_max) - num_elements(delay_max) + 1; + int delay_type_max[delay_types] = get_delay_type_max( + delay_types, delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf_groups + ); } generated quantities { int sim_secondary[n, all_dates ? t : h]; for (i in 1:n) { vector[t] secondary; - vector[delay_max_total] delay_rev_pmf; - delay_rev_pmf = combine_pmfs( - to_vector([ 1 ]), delay_mean[i], delay_sd[i], delay_max, delay_dist, - delay_max_total, 0, 1 + vector[delay_type_max[delay_id]] delay_rev_pmf = get_delay_rev_pmf( + delay_id, delay_type_max[delay_id], delay_types_p, delay_types_id, + delay_types_groups, delay_max, delay_np_pmf, + delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist, + 0, 1, 0 ); // calculate secondary reports from primary diff --git a/man/bootstrapped_dist_fit.Rd b/man/bootstrapped_dist_fit.Rd index 6938f2cba..840b73e35 100644 --- a/man/bootstrapped_dist_fit.Rd +++ b/man/bootstrapped_dist_fit.Rd @@ -38,7 +38,7 @@ data. Maximum delay to allow (added to output but does impact fitting).} printed.} } \value{ -A list summarising the bootstrapped distribution +A \code{dist_spec} object summarising the bootstrapped distribution } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} diff --git a/man/c.dist_spec.Rd b/man/c.dist_spec.Rd new file mode 100644 index 000000000..c26c47dfa --- /dev/null +++ b/man/c.dist_spec.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{c.dist_spec} +\alias{c.dist_spec} +\title{Combines multiple delay distributions for further processing} +\usage{ +\method{c}{dist_spec}(...) +} +\arguments{ +\item{...}{The delay distributions (from calls to \code{\link[=dist_spec]{dist_spec()}}) to combine} +} +\value{ +Combined delay distributions (with class \code{\link[=dist_spec]{dist_spec()}}`) +} +\description{ +This combines the parameters so that they can be fed as multiple delay +distributions to \code{\link[=epinow]{epinow()}} or \code{\link[=estimate_infections]{estimate_infections()}}. +} +\author{ +Sebastian Funk +} diff --git a/man/create_stan_data.Rd b/man/create_stan_data.Rd index d92916691..361636e29 100644 --- a/man/create_stan_data.Rd +++ b/man/create_stan_data.Rd @@ -6,24 +6,21 @@ \usage{ create_stan_data( reported_cases, - generation_time, + seeding_time, rt, gp, obs, - delays, horizon, backcalc, - shifted_cases, - truncation + shifted_cases ) } \arguments{ \item{reported_cases}{A data frame of confirmed cases (confirm) by date (date). confirm must be integer and date must be in date format.} -\item{generation_time}{A call to \code{generation_time_opts()} defining the -generation time distribution used. For backwards compatibility a list of -summary parameters can also be passed.} +\item{seeding_time}{Integer; seeding time, usually obtained using +\code{get_seeding_time()}} \item{rt}{A list of options as generated by \code{rt_opts()} defining Rt estimation. Defaults to \code{rt_opts()}. Set to \code{NULL} to switch to using back @@ -36,21 +33,12 @@ Gaussian process.} \item{obs}{A list of options as generated by \code{obs_opts()} defining the observation model. Defaults to \code{obs_opts()}.} -\item{delays}{A call to \code{delay_opts()} defining delay distributions and -options. See the documentation of \code{delay_opts()} and the examples below for -details.} - \item{horizon}{Numeric, forecast horizon.} \item{backcalc}{A list of options as generated by \code{backcalc_opts()} to define the back calculation. Defaults to \code{backcalc_opts()}.} \item{shifted_cases}{A dataframe of delay shifted cases} - -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} } \value{ A list of stan data diff --git a/man/create_stan_delays.Rd b/man/create_stan_delays.Rd new file mode 100644 index 000000000..aa16a0bb1 --- /dev/null +++ b/man/create_stan_delays.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create.R +\name{create_stan_delays} +\alias{create_stan_delays} +\title{Create delay variables for stan} +\usage{ +create_stan_delays(..., ot) +} +\arguments{ +\item{...}{Named delay distributions specified using \code{dist_spec()}. +The names are assigned to IDs} + +\item{ot}{Integer, number of observations (needed if weighing any priors) +with the number of observations} +} +\value{ +A list of variables as expected by the stan model +} +\description{ +Create delay variables for stan +} +\author{ +Sebastian Funk +} diff --git a/man/delay_opts.Rd b/man/delay_opts.Rd index 02ab8dbfa..114ab92f1 100644 --- a/man/delay_opts.Rd +++ b/man/delay_opts.Rd @@ -4,20 +4,11 @@ \alias{delay_opts} \title{Delay Distribution Options} \usage{ -delay_opts(..., fixed = FALSE) +delay_opts(dist = dist_spec()) } \arguments{ -\item{...}{A list of lists specifying distributions of reporting delays. -Each list is passed to \code{\link[=dist_spec]{dist_spec()}} and so should contain parameters linked -to the functions arguments. If a list is not given then it is assumed that -no delay is present. If multiple lists are given then they are assumed to -be independent.} - -\item{fixed}{Logical, defaults to \code{FALSE}. Should all reporting delays be -treated as coming from fixed (vs uncertain) distributions. Making this -simplification drastically reduces compute requirements. Setting this here -overrides any of the constituent delay distributions being set to be fixed -or not.} +\item{dist}{A delay distribution or series of delay distributions generated +using \code{\link[=dist_spec]{dist_spec()}}. Default is an empty call to \code{\link[=dist_spec]{dist_spec()}}, i.e. no delay} } \value{ A list summarising the input delay distributions. @@ -32,17 +23,15 @@ functions. delay_opts() # A single delay that has uncertainty -delay <- list(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 15) +delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 15) delay_opts(delay) -# A single delay where we override the uncertainty assumption -delay_opts(delay, fixed = TRUE) - -# A delay where uncertainty is implict -delay_opts(list(mean = 1, mean_sd = 0, sd = 0.5, sd_sd = 0, max = 15)) +# A single delay without uncertainty +delay <- dist_spec(mean = 1, sd = 0.5, max = 15) +delay_opts(delay) -# Multiple delays -delay_opts(delay, delay) +# Multiple delays (in this case twice the same) +delay_opts(delay + delay) } \seealso{ convert_to_logmean convert_to_logsd bootstrapped_dist_fit dist_spec diff --git a/man/dist_skel.Rd b/man/dist_skel.Rd index 958705908..a3e7d84f8 100644 --- a/man/dist_skel.Rd +++ b/man/dist_skel.Rd @@ -4,7 +4,15 @@ \alias{dist_skel} \title{Distribution Skeleton} \usage{ -dist_skel(n, dist = FALSE, cum = TRUE, model, params, max_value = 120) +dist_skel( + n, + dist = FALSE, + cum = TRUE, + model, + discrete = FALSE, + params, + max_value = 120 +) } \arguments{ \item{n}{Numeric vector, number of samples to take (or days for the @@ -17,7 +25,15 @@ returned rather than a number of samples.} distribution be cumulative.} \item{model}{Character string, defining the model to be used. Supported -options are exponential ("exp"), gamma ("gamma"), and log normal ("lognorm")} +options are exponential ("exp"), gamma ("gamma"), and log normal +("lognormal")} + +\item{discrete}{Logical, defaults to \code{FALSE}. Should the probability +distribution be discretised. In this case each entry of the probability +mass function corresponds to the 1-length interval ending at the entry, +i.e. the probability mass function is a vector where the first entry +corresponds to the integral over the (0,1] interval of the continuous +distribution, the second entry corresponds to the (1,2] interval etc.} \item{params}{A list of parameters values (by name) required for each model. For the exponential model this is a rate parameter and for the gamma model @@ -52,36 +68,38 @@ dist_skel(1:10, ## Gamma model # sample -dist_skel(10, model = "gamma", params = list(alpha = 1, beta = 2)) +dist_skel(10, model = "gamma", params = list(shape = 1, scale = 2)) # cumulative prob density dist_skel(0:10, model = "gamma", dist = TRUE, - params = list(alpha = 1, beta = 2) + params = list(shape = 1, scale = 2) ) # probability density dist_skel(0:10, model = "gamma", dist = TRUE, - cum = FALSE, params = list(alpha = 2, beta = 2) + cum = FALSE, params = list(shape = 2, scale = 2) ) ## Log normal model # sample -dist_skel(10, model = "lognorm", params = list(mean = log(5), sd = log(2))) +dist_skel(10, model = "lognormal", params = list(mean = log(5), sd = log(2))) # cumulative prob density dist_skel(0:10, - model = "lognorm", dist = TRUE, + model = "lognormal", dist = TRUE, params = list(mean = log(5), sd = log(2)) ) # probability density dist_skel(0:10, - model = "lognorm", dist = TRUE, cum = FALSE, + model = "lognormal", dist = TRUE, cum = FALSE, params = list(mean = log(5), sd = log(2)) ) } \author{ Sam Abbott + +Sebastian Funk } diff --git a/man/dist_spec.Rd b/man/dist_spec.Rd index 43e67e1f4..707f54d97 100644 --- a/man/dist_spec.Rd +++ b/man/dist_spec.Rd @@ -9,9 +9,11 @@ dist_spec( sd = 0, mean_sd = 0, sd_sd = 0, - dist = c("lognormal", "gamma"), - max = NULL, - fixed = FALSE + distribution = c("lognormal", "gamma"), + max, + pmf = numeric(0), + fixed = FALSE, + prior_weight = 0L ) } \arguments{ @@ -30,7 +32,7 @@ prior.} \item{sd_sd}{Numeric, defaults to 0. Sets the standard deviation of the uncertainty around the sd of the distribution assuming a normal prior.} -\item{dist}{Character, defaults to "lognormal". The (discretised +\item{distribution}{Character, defaults to "lognormal". The (discretised distribution to be used. If sd == 0 then the distribution is fixed and a delta function is used. If sd > 0 then the distribution is discretised and truncated. @@ -46,7 +48,7 @@ model fitting these are then transformed to the shape and scale of the gamma distribution. } -When \code{dist} is the default lognormal distribution the other function +When \code{distribution} is the default lognormal distribution the other function arguments have the following definition: \itemize{ \item \code{mean} is the mean of the natural logarithm of the delay (on the @@ -57,9 +59,21 @@ log scale). \item{max}{Numeric, maximum value of the distribution. The distribution will be truncated at this value.} +\item{pmf}{Numeric, a vector of values that represent the (nonparametric) +probability mass function of the delay (starting with 0); defaults to an +empty vector corresponding to a parametric specification of the distribution +(using \code{mean}, \code{sd} and corresponding uncertainties)} + \item{fixed}{Logical, defaults to \code{FALSE}. Should delays be treated -as coming from fixed (vs uncertain) distributions. Making this simplification +as coming from fixed (vs uncertain) distributions. Overrides any values +assigned to \code{mean_sd} and \code{sd_sd} by setting them to zero. reduces compute requirement but may produce spuriously precise estimates.} + +\item{prior_weight}{Integer, weight given to the generation time prior. +By default (prior_weight = 00) the priors will be weighted by the number of +observation data points, usually preventing the posteriors from shifting +much from the given distribution. Another sensible option would be 1, +i.e. treating the generation time distribution as a single parameter.} } \value{ A list of distribution options. @@ -68,10 +82,22 @@ A list of distribution options. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Defines the parameters of a supported distribution for use in onward modelling. Multiple distribution families are supported - see the -documentation for \code{family} for details. This function provides distribution +documentation for \code{family} for details. Alternatively, a nonparametric +distribution can be specified using the \code{pmf} argument. +This function provides distribution functionality in \code{\link[=delay_opts]{delay_opts()}}, \code{\link[=generation_time_opts]{generation_time_opts()}}, and \code{\link[=trunc_opts]{trunc_opts()}}. } +\examples{ +# A fixed lognormal distribution with mean 5 and sd 1. +dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") + +# An uncertain gamma distribution with mean 3 and sd 2 +dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" +) +} \author{ Sebastian Funk diff --git a/man/dist_spec_plus.Rd b/man/dist_spec_plus.Rd new file mode 100644 index 000000000..ddfe389d5 --- /dev/null +++ b/man/dist_spec_plus.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{dist_spec_plus} +\alias{dist_spec_plus} +\title{Creates a delay distribution as the sum of two other delay distributions} +\usage{ +dist_spec_plus(e1, e2, tolerance = 0.001) +} +\arguments{ +\item{e1}{The first delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +combine.} + +\item{e2}{The second delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +combine.} + +\item{tolerance}{A numeric value that sets the cumulative probability +to retain when truncating the cumulative distribution function of the +combined nonparametric delays. The default value is 0.001 with this retaining +0.999 of the cumulative probability. Note that using a larger tolerance may +result in a smaller number of points in the combined nonparametric delay but +may also impact the accuracy of the combined delay (i.e., change the mean +and standard deviation).} +} +\value{ +A delay distribution representing the sum of the two delays +(with class \code{\link[=dist_spec]{dist_spec()}}) +} +\description{ +This is done via convolution with \code{stats::convolve()}. Nonparametric delays +that can be combined are processed together, and their cumulative +distribution function is truncated at a specified tolerance level, ensuring +numeric stability. +} +\author{ +Sebastian Funk + +Sam Abbott +} diff --git a/man/epinow.Rd b/man/epinow.Rd index 6bf02eb55..357d2b523 100644 --- a/man/epinow.Rd +++ b/man/epinow.Rd @@ -40,10 +40,9 @@ summary parameters can also be passed.} options. See the documentation of \code{delay_opts()} and the examples below for details.} -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} +\item{truncation}{A call to \code{trunc_opts()} defining the truncation of +observed data. Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for +an approach to estimating truncation from data.} \item{rt}{A list of options as generated by \code{rt_opts()} defining Rt estimation. Defaults to \code{rt_opts()}. Set to \code{NULL} to switch to using back @@ -136,7 +135,7 @@ generation_time <- get_generation_time( incubation_period <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), @@ -149,9 +148,10 @@ reported_cases <- example_confirmed[1:40] # estimate Rt and nowcast/forecast cases by date of infection out <- epinow( - reported_cases = reported_cases, generation_time = generation_time, + reported_cases = reported_cases, + generation_time = generation_time_opts(generation_time), rt = rt_opts(prior = list(mean = 2, sd = 0.1)), - delays = delay_opts(incubation_period, reporting_delay) + delays = delay_opts(incubation_period + reporting_delay) ) # summary of the latest estimates summary(out) diff --git a/man/estimate_delay.Rd b/man/estimate_delay.Rd index 659dc74b3..e6edeefc4 100644 --- a/man/estimate_delay.Rd +++ b/man/estimate_delay.Rd @@ -12,7 +12,7 @@ estimate_delay(delays, ...) \item{...}{Arguments to pass to internal methods.} } \value{ -A list summarising the bootstrapped distribution +A \code{dist_spec} summarising the bootstrapped distribution } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} diff --git a/man/estimate_infections.Rd b/man/estimate_infections.Rd index ddd168df2..d456f84be 100644 --- a/man/estimate_infections.Rd +++ b/man/estimate_infections.Rd @@ -35,10 +35,9 @@ summary parameters can also be passed.} options. See the documentation of \code{delay_opts()} and the examples below for details.} -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} +\item{truncation}{A call to \code{trunc_opts()} defining the truncation of +observed data. Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for +an approach to estimating truncation from data.} \item{rt}{A list of options as generated by \code{rt_opts()} defining Rt estimation. Defaults to \code{rt_opts()}. Set to \code{NULL} to switch to using back @@ -112,22 +111,26 @@ options(mc.cores = ifelse(interactive(), 4, 1)) reported_cases <- example_confirmed[1:60] # set up example generation time -generation_time <- generation_time_opts( +generation_time <- get_generation_time( disease = "SARS-CoV-2", source = "ganyani", fixed = TRUE ) # set delays between infection and case report incubation_period <- get_incubation_period( + disease = "SARS-CoV-2", source = "lauer", fixed = TRUE +) +# delays between infection and case report, with uncertainty +incubation_period_uncertain <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0, sd = convert_to_logsd(2, 1), sd_sd = 0, max = 10 ) # default settings but assuming that delays are fixed rather than uncertain def <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1)), stan = stan_opts(control = list(adapt_delta = 0.95)) ) @@ -140,8 +143,8 @@ plot(def) #computation. # These settings are an area of active research. See ?gp_opts for details. agp <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1)), gp = gp_opts(ls_min = 10, basis_prop = 0.1), stan = stan_opts(control = list(adapt_delta = 0.95)) @@ -151,8 +154,8 @@ plot(agp) # Adjusting for future susceptible depletion dep <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts( prior = list(mean = 2, sd = 0.1), pop = 1000000, future = "latest" @@ -164,15 +167,15 @@ plot(dep) # Adjusting for truncation of the most recent data # See estimate_truncation for an approach to estimating this from data -trunc_dist <- trunc_opts(dist = list( +trunc_dist <- dist_spec( mean = convert_to_logmean(0.5, 0.5), mean_sd = 0.1, sd = convert_to_logsd(0.5, 0.5), sd_sd = 0.1, max = 3 -)) +) trunc <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), - truncation = trunc_dist, + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), + truncation = trunc_opts(trunc_dist), rt = rt_opts(prior = list(mean = 2, sd = 0.1)), gp = gp_opts(ls_min = 10, basis_prop = 0.1), stan = stan_opts(control = list(adapt_delta = 0.95)) @@ -188,8 +191,8 @@ plot(trunc) # can be optionally switched off using backcalc_opts(prior = "none"), # see ?backcalc_opts for other options backcalc <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = NULL, backcalc = backcalc_opts(), obs = obs_opts(scale = list(mean = 0.4, sd = 0.05)), horizon = 0 @@ -198,8 +201,8 @@ plot(backcalc) # Rt projected into the future using the Gaussian process project_rt <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts( prior = list(mean = 2, sd = 0.1), future = "project" @@ -210,8 +213,8 @@ plot(project_rt) # default settings on a later snapshot of data snapshot_cases <- example_confirmed[80:130] snapshot <- estimate_infections(snapshot_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay, fixed = TRUE), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 1, sd = 0.1)) ) plot(snapshot) @@ -219,8 +222,8 @@ plot(snapshot) # stationary Rt assumption (likely to provide biased real-time estimates) # with uncertain reporting delays stat <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period_uncertain + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1), gp_on = "R0") ) plot(stat) @@ -228,15 +231,16 @@ plot(stat) # no gaussian process (i.e fixed Rt assuming no breakpoints) # with uncertain reporting delays fixed <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period_uncertain + reporting_delay), gp = NULL ) plot(fixed) # no delays no_delay <- estimate_infections( - reported_cases, generation_time = generation_time + reported_cases, + generation_time = generation_time_opts(generation_time) ) plot(no_delay) @@ -247,8 +251,8 @@ bp_cases <- bp_cases[, breakpoint := ifelse(date == as.Date("2020-03-16"), 1, 0) ] bkp <- estimate_infections(bp_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period_uncertain + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1)), gp = NULL ) @@ -259,8 +263,8 @@ plot(bkp) # weekly random walk # with uncertain reporting delays rw <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period_uncertain + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1), rw = 7), gp = NULL ) diff --git a/man/estimate_secondary.Rd b/man/estimate_secondary.Rd index 720262ed4..54cccc338 100644 --- a/man/estimate_secondary.Rd +++ b/man/estimate_secondary.Rd @@ -7,7 +7,8 @@ estimate_secondary( reports, secondary = secondary_opts(), - delays = delay_opts(list(mean = 2.5, mean_sd = 0.5, sd = 0.47, sd_sd = 0.25, max = 30)), + delays = delay_opts(dist_spec(mean = 2.5, mean_sd = 0.5, sd = 0.47, sd_sd = 0.25, max = + 30, prior_weight = 1)), truncation = trunc_opts(), obs = obs_opts(), burn_in = 14, @@ -33,10 +34,9 @@ for details. By default a diffuse prior is assumed with a mean of 14 days and standard deviation of 7 days (with a standard deviation of 0.5 and 0.25 respectively on the log scale).} -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} +\item{truncation}{A call to \code{trunc_opts()} defining the truncation of +observed data. Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for +an approach to estimating truncation from data.} \item{obs}{A list of options as generated by \code{obs_opts()} defining the observation model. Defaults to \code{obs_opts()}.} diff --git a/man/figures/unnamed-chunk-14-1.png b/man/figures/unnamed-chunk-14-1.png index 0b4c129bd..d6022922f 100644 Binary files a/man/figures/unnamed-chunk-14-1.png and b/man/figures/unnamed-chunk-14-1.png differ diff --git a/man/figures/unnamed-chunk-15-1.png b/man/figures/unnamed-chunk-15-1.png new file mode 100644 index 000000000..bd17ce2a9 Binary files /dev/null and b/man/figures/unnamed-chunk-15-1.png differ diff --git a/man/figures/unnamed-chunk-18-1.png b/man/figures/unnamed-chunk-18-1.png index ba2ac0540..47534567e 100644 Binary files a/man/figures/unnamed-chunk-18-1.png and b/man/figures/unnamed-chunk-18-1.png differ diff --git a/man/figures/unnamed-chunk-19-1.png b/man/figures/unnamed-chunk-19-1.png new file mode 100644 index 000000000..28b59741d Binary files /dev/null and b/man/figures/unnamed-chunk-19-1.png differ diff --git a/man/generation_time_opts.Rd b/man/generation_time_opts.Rd index aa1ab99d8..64a90997b 100644 --- a/man/generation_time_opts.Rd +++ b/man/generation_time_opts.Rd @@ -4,42 +4,12 @@ \alias{generation_time_opts} \title{Generation Time Distribution Options} \usage{ -generation_time_opts( - ..., - disease, - source, - max = 15L, - fixed = FALSE, - prior_weight = NULL -) +generation_time_opts(dist = dist_spec(mean = 1)) } \arguments{ -\item{...}{Any parameters to be passed to \code{\link[=dist_spec]{dist_spec()}}, if the generation -time is given as parameters of a distribution rather than a \code{disease} -and \code{source}. In this case if the \code{mean} parameter is not set a -mean of 1 will be assumed, if the \code{max} parameter not set then the -\code{max} will be set to 15 to ensure backwards compatibility, and if no -\code{dist} parameter is given then a gamma distribution will be used for -backwards compatibility. As for \code{\link[=delay_opts]{delay_opts()}} a list of parameters can -also be supplied that describe a distribution (but unlike \code{\link[=delay_opts]{delay_opts()}} -multiple distributions may not currently be used (for example as output by -get_generation_time()).} - -\item{disease}{A character string indicating the disease of interest.} - -\item{source}{A character string indicating the source of interest.} - -\item{max}{Integer, defaults to 15. Maximum generation time. This will -not be used if a maximum is set in the distribution parameters.} - -\item{fixed}{Logical, defaults to \code{FALSE}. Should the generation time be -treated as coming from fixed (vs uncertain) distributions.} - -\item{prior_weight}{numeric, weight given to the generation time prior. -By default the generation time prior will be weighted by the number of -observation data points, usually preventing the posteriors from shifting -much from the given distribution. Another sensible option would be 1, -i.e. treating the generation time distribution as a single parameter.} +\item{dist}{A delay distribution or series of delay distributions generated +using \code{\link[=dist_spec]{dist_spec()}} or \code{\link[=get_generation_time]{get_generation_time()}}. If no distribution is given +a fixed generation time of 1 will be assumed.} } \value{ A list summarising the input delay distributions. @@ -52,15 +22,20 @@ be passed to \link{get_generation_time}, or as parameters of a distribution to b passed to \link{dist_spec}. } \examples{ -# default settings with a fixed generation time +# default settings with a fixed generation time of 1 generation_time_opts() # A fixed gamma distributed generation time -generation_time_opts(mean = 3, sd = 2) +generation_time_opts(dist_spec(mean = 3, sd = 2, max = 15)) # An uncertain gamma distributed generation time -generation_time_opts(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5) +generation_time_opts( + dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 15) +) +# A generation time sourced from the literature +dist <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani") +generation_time_opts(dist) } \seealso{ convert_to_logmean convert_to_logsd bootstrapped_dist_fit dist_spec diff --git a/man/get_regional_results.Rd b/man/get_regional_results.Rd index e022d9e76..5a16040c4 100644 --- a/man/get_regional_results.Rd +++ b/man/get_regional_results.Rd @@ -59,8 +59,8 @@ dir <- file.path(tempdir(check = TRUE), "results") # run multiregion estimates regional_out <- regional_epinow( reported_cases = cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(rw = 7), gp = NULL, output = c("regions", "latest"), target_folder = dir, diff --git a/man/get_seeding_time.Rd b/man/get_seeding_time.Rd new file mode 100644 index 000000000..77c7f51b6 --- /dev/null +++ b/man/get_seeding_time.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get.R +\name{get_seeding_time} +\alias{get_seeding_time} +\title{Estimate seeding time from delays and generation time} +\usage{ +get_seeding_time(delays, generation_time) +} +\arguments{ +\item{delays}{Delays as specified using \code{dist_spec}} + +\item{generation_time}{Generation time as specified using \code{dist_spec}} +} +\value{ +An integer seeding time +} +\description{ +The seeding time is set to the mean of the specified delays, constrained +to be at least the maximum generation time +} +\author{ +Sebastian Funk +} diff --git a/man/mean.dist_spec.Rd b/man/mean.dist_spec.Rd new file mode 100644 index 000000000..a0513b8eb --- /dev/null +++ b/man/mean.dist_spec.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{mean.dist_spec} +\alias{mean.dist_spec} +\title{Returns the mean of one or more delay distribution} +\usage{ +\method{mean}{dist_spec}(x, ...) +} +\arguments{ +\item{x}{The \code{\link[=dist_spec]{dist_spec()}} to use} + +\item{...}{Not used} +} +\value{ +A vector of means. +} +\description{ +This works out the mean of all the (parametric / nonparametric) delay +distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}}. +} +\examples{ +# A fixed lognormal distribution with mean 5 and sd 1. +lognormal <- dist_spec( + mean = 5, sd = 1, max = 20, distribution = "lognormal" +) +mean(lognormal) + +# An uncertain gamma distribution with mean 3 and sd 2 +gamma <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" +) +mean(gamma) + +# The mean of the sum of two distributions +mean(lognormal + gamma) +} +\author{ +Sebastian Funk +} diff --git a/man/plot.dist_spec.Rd b/man/plot.dist_spec.Rd new file mode 100644 index 000000000..82e0e4613 --- /dev/null +++ b/man/plot.dist_spec.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{plot.dist_spec} +\alias{plot.dist_spec} +\title{Plot PMF and CDF for a dist_spec object} +\usage{ +\method{plot}{dist_spec}(x, ...) +} +\arguments{ +\item{x}{A \link{dist_spec} object} + +\item{...}{Additional arguments to pass to \code{ggplot}} +} +\description{ +This function takes a \link{dist_spec} object and plots its probability mass +function (PMF) and cumulative distribution function (CDF) using \link{ggplot2}. +Note that currently uncertainty in distributions is not plot. +} +\examples{ +#' # A fixed lognormal distribution with mean 5 and sd 1. +lognormal <- dist_spec( + mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal" +) +plot(lognormal) + +# An uncertain gamma distribution with mean 3 and sd 2 +gamma <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" +) +plot(gamma) + +# Multiple distributions +plot(lognormal + gamma + lognormal) + +# A combination of the two fixed distributions +plot(lognormal + lognormal) +} +\author{ +Sam Abbott +} diff --git a/man/plot_estimates.Rd b/man/plot_estimates.Rd index 233ce1eb4..77b2e7a00 100644 --- a/man/plot_estimates.Rd +++ b/man/plot_estimates.Rd @@ -64,8 +64,8 @@ reporting_delay <- estimate_delay(rlnorm(100, log(6), 1), max_value = 10) # run model out <- estimate_infections(cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay) + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay) ) # plot infections plot_estimates( diff --git a/man/plus-.dist_spec.Rd b/man/plus-.dist_spec.Rd new file mode 100644 index 000000000..9e737ff78 --- /dev/null +++ b/man/plus-.dist_spec.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{+.dist_spec} +\alias{+.dist_spec} +\title{Creates a delay distribution as the sum of two other delay distributions} +\usage{ +\method{+}{dist_spec}(e1, e2) +} +\arguments{ +\item{e1}{The first delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +combine.} + +\item{e2}{The second delay distribution (from a call to \code{\link[=dist_spec]{dist_spec()}}) to +combine.} +} +\value{ +A delay distribution representing the sum of the two delays +(with class \code{\link[=dist_spec]{dist_spec()}}) +} +\description{ +This is done via convolution with \code{stats::convolve()}. Nonparametric delays +that can be combined are processed together, and their cumulative +distribution function is truncated at a specified tolerance level, ensuring +numeric stability. +} +\examples{ +# A fixed lognormal distribution with mean 5 and sd 1. +lognormal <- dist_spec( + mean = 1.6, sd = 1, max = 20, distribution = "lognormal" +) +lognormal + lognormal + +# An uncertain gamma distribution with mean 3 and sd 2 +gamma <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" +) +lognormal + gamma + +# Using tolerance parameter +EpiNow2:::dist_spec_plus(lognormal, lognormal, tolerance = 0.5) +} +\author{ +Sebastian Funk +} diff --git a/man/print.dist_spec.Rd b/man/print.dist_spec.Rd new file mode 100644 index 000000000..f1e4f01bd --- /dev/null +++ b/man/print.dist_spec.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dist.R +\name{print.dist_spec} +\alias{print.dist_spec} +\title{Prints the parameters of one or more delay distributions} +\usage{ +\method{print}{dist_spec}(x, ...) +} +\arguments{ +\item{x}{The \code{\link[=dist_spec]{dist_spec()}} to use} + +\item{...}{Not used} +} +\value{ +invisible +} +\description{ +This displays the parameters of the uncertain and probability mass +functions of fixed delay distributions combined in the passed \code{\link[=dist_spec]{dist_spec()}}. +} +\examples{ +#' # A fixed lognormal distribution with mean 5 and sd 1. +lognormal <- dist_spec( + mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal" +) +print(lognormal) + +# An uncertain gamma distribution with mean 3 and sd 2 +gamma <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" +) +print(gamma) +} +\author{ +Sebastian Funk +} diff --git a/man/regional_epinow.Rd b/man/regional_epinow.Rd index a01a2712b..05b1fe593 100644 --- a/man/regional_epinow.Rd +++ b/man/regional_epinow.Rd @@ -39,10 +39,9 @@ summary parameters can also be passed.} options. See the documentation of \code{delay_opts()} and the examples below for details.} -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} +\item{truncation}{A call to \code{trunc_opts()} defining the truncation of +observed data. Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for +an approach to estimating truncation from data.} \item{rt}{A list of options as generated by \code{rt_opts()} defining Rt estimation. Defaults to \code{rt_opts()}. Set to \code{NULL} to switch to using back @@ -142,7 +141,7 @@ generation_time <- get_generation_time( incubation_period <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), @@ -160,8 +159,8 @@ cases <- data.table::rbindlist(list( # samples and warmup have been reduced for this example def <- regional_epinow( reported_cases = cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.2)), stan = stan_opts( samples = 100, warmup = 200, @@ -177,8 +176,8 @@ gp <- update_list(gp, list(realland = NULL)) rt <- opts_list(rt_opts(), cases, realland = rt_opts(rw = 7)) region_rt <- regional_epinow( reported_cases = cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt, gp = gp, stan = stan_opts( samples = 100, warmup = 200, diff --git a/man/regional_runtimes.Rd b/man/regional_runtimes.Rd index 70da030c4..f7f181f4d 100644 --- a/man/regional_runtimes.Rd +++ b/man/regional_runtimes.Rd @@ -52,8 +52,8 @@ cases <- data.table::rbindlist(list( # run basic nowcasting pipeline regional_out <- regional_epinow( reported_cases = cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts(samples = 100, warmup = 100), output = c("region", "timing") ) diff --git a/man/regional_summary.Rd b/man/regional_summary.Rd index 75616e034..834201ae6 100644 --- a/man/regional_summary.Rd +++ b/man/regional_summary.Rd @@ -82,8 +82,8 @@ cases <- data.table::rbindlist(list( # run basic nowcasting pipeline out <- regional_epinow( reported_cases = cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), output = "region", rt = NULL ) diff --git a/man/report_cases.Rd b/man/report_cases.Rd index e0ba69224..6575ace52 100644 --- a/man/report_cases.Rd +++ b/man/report_cases.Rd @@ -60,7 +60,7 @@ generation_time <- get_generation_time( incubation_period <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10 ) @@ -72,7 +72,7 @@ cases <- cases[, confirm := NULL][, sample := 1] reported_cases <- report_cases( case_estimates = cases, - delays = delay_opts(incubation_period, reporting_delay), + delays = delay_opts(incubation_period + reporting_delay), type = "sample" ) print(reported_cases) diff --git a/man/report_plots.Rd b/man/report_plots.Rd index c8cc1babf..ca2eba88d 100644 --- a/man/report_plots.Rd +++ b/man/report_plots.Rd @@ -49,8 +49,8 @@ reporting_delay <- bootstrapped_dist_fit( # run model out <- estimate_infections(cases, stan = stan_opts(samples = 500), - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = NULL ) diff --git a/man/run_region.Rd b/man/run_region.Rd index db2eaf318..eb76fc82f 100644 --- a/man/run_region.Rd +++ b/man/run_region.Rd @@ -38,10 +38,9 @@ summary parameters can also be passed.} options. See the documentation of \code{delay_opts()} and the examples below for details.} -\item{truncation}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} A list of options as -generated by \code{trunc_opts()} defining the truncation of observed data. -Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for an approach to -estimating truncation from data.} +\item{truncation}{A call to \code{trunc_opts()} defining the truncation of +observed data. Defaults to \code{trunc_opts()}. See \code{estimate_truncation()} for +an approach to estimating truncation from data.} \item{rt}{A list of options as generated by \code{rt_opts()} defining Rt estimation. Defaults to \code{rt_opts()}. Set to \code{NULL} to switch to using back diff --git a/man/simulate_infections.Rd b/man/simulate_infections.Rd index 5ba779c31..6605551ed 100644 --- a/man/simulate_infections.Rd +++ b/man/simulate_infections.Rd @@ -65,15 +65,15 @@ generation_time <- get_generation_time( incubation_period <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 15 ) # fit model to data to recover Rt estimates est <- estimate_infections(reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), rt = rt_opts(prior = list(mean = 2, sd = 0.1), rw = 7), stan = stan_opts(control = list(adapt_delta = 0.9)), obs = obs_opts(scale = list(mean = 0.1, sd = 0.01)), diff --git a/man/trunc_opts.Rd b/man/trunc_opts.Rd index 6d9214352..8dba374b5 100644 --- a/man/trunc_opts.Rd +++ b/man/trunc_opts.Rd @@ -4,11 +4,12 @@ \alias{trunc_opts} \title{Truncation Distribution Options} \usage{ -trunc_opts(dist = list()) +trunc_opts(dist = dist_spec()) } \arguments{ -\item{dist}{Parameters of a distribution as supported by -\code{\link[=dist_spec]{dist_spec()}}.} +\item{dist}{A delay distribution or series of delay distributions reflecting +the truncation generated using \code{\link[=dist_spec]{dist_spec()}} or \code{\link[=estimate_truncation]{estimate_truncation()}}. +Default is an empty call to \code{\link[=dist_spec]{dist_spec()}}, i.e. no truncation} } \value{ A list summarising the input truncation distribution. @@ -24,7 +25,7 @@ estimate these distributions. trunc_opts() # truncation dist -trunc_opts(dist = list(mean = 3, sd = 2)) +trunc_opts(dist = dist_spec(mean = 3, sd = 2, max = 10)) } \seealso{ convert_to_logmean convert_to_logsd bootstrapped_dist_fit dist_spec diff --git a/tests/testthat/_snaps/calc_CrI.new.md b/tests/testthat/_snaps/calc_CrI.new.md deleted file mode 100644 index e9c6d413b..000000000 --- a/tests/testthat/_snaps/calc_CrI.new.md +++ /dev/null @@ -1,14 +0,0 @@ -# calc_CrI works as expected with default arguments - - value CrI - - 1: 1.45 lower_90 - 2: 9.55 upper_90 - -# calc_CrI works as expected when grouping - - type value CrI - - 1: car 1.45 lower_90 - 2: car 9.55 upper_90 - diff --git a/tests/testthat/_snaps/calc_CrIs.new.md b/tests/testthat/_snaps/calc_CrIs.new.md deleted file mode 100644 index beff841fa..000000000 --- a/tests/testthat/_snaps/calc_CrIs.new.md +++ /dev/null @@ -1,21 +0,0 @@ -# calc_CrI works as expected with default arguments - - Key: <.> - . lower_90 lower_50 lower_20 upper_20 upper_50 upper_90 - - 1: . 1.45 3.25 4.6 6.4 7.75 9.55 - -# calc_CrI works as expected when grouping - - Key: - type lower_90 lower_50 lower_20 upper_20 upper_50 upper_90 - - 1: car 1.45 3.25 4.6 6.4 7.75 9.55 - -# calc_CrI works as expected when given a custom CrI list - - Key: <.> - . lower_95 lower_40 lower_10 upper_10 upper_40 upper_95 - - 1: . 1.225 3.7 5.05 5.95 7.3 9.775 - diff --git a/tests/testthat/_snaps/calc_summary_measures.new.md b/tests/testthat/_snaps/calc_summary_measures.new.md deleted file mode 100644 index e96e9546e..000000000 --- a/tests/testthat/_snaps/calc_summary_measures.new.md +++ /dev/null @@ -1,27 +0,0 @@ -# calc_summary_measures works as expected with default arguments - - type median mean sd lower_90 lower_50 lower_20 upper_20 upper_50 - - 1: car 5.5 5.5 3.02765 1.45 3.25 4.6 6.4 7.75 - upper_90 - - 1: 9.55 - -# calc_CrI works as expected when grouping - - type median mean sd lower_90 lower_50 lower_20 upper_20 upper_50 - - 1: car 5.5 5.5 3.02765 1.45 3.25 4.6 6.4 7.75 - upper_90 - - 1: 9.55 - -# calc_CrI works as expected when given a custom CrI list - - type median mean sd lower_95 lower_40 lower_10 upper_10 upper_40 - - 1: car 5.5 5.5 3.02765 1.225 3.7 5.05 5.95 7.3 - upper_95 - - 1: 9.775 - diff --git a/tests/testthat/_snaps/calc_summary_stats.new.md b/tests/testthat/_snaps/calc_summary_stats.new.md deleted file mode 100644 index 672a30a7e..000000000 --- a/tests/testthat/_snaps/calc_summary_stats.new.md +++ /dev/null @@ -1,12 +0,0 @@ -# calc_summary_stats works as expected with default arguments - - median mean sd - - 1: 5.5 5.5 3.02765 - -# calc_summary_stats works as expected when grouping - - type median mean sd - - 1: car 5.5 5.5 3.02765 - diff --git a/tests/testthat/_snaps/report_cases.md b/tests/testthat/_snaps/report_cases.md deleted file mode 100644 index 6c57610b7..000000000 --- a/tests/testthat/_snaps/report_cases.md +++ /dev/null @@ -1,43 +0,0 @@ -# report_cases can simulate infections forward - - Code - reported_cases - Output - $samples - sample date value - - 1: 1 2020-02-23 2 - 2: 1 2020-02-24 6 - 3: 1 2020-02-25 13 - 4: 1 2020-02-26 21 - 5: 1 2020-02-27 33 - 6: 1 2020-02-28 43 - 7: 1 2020-02-29 66 - 8: 1 2020-03-01 122 - 9: 1 2020-03-02 129 - - $summarised - date median mean sd lower_90 lower_50 lower_20 upper_20 upper_50 - - 1: 2020-02-23 2 2 NA 2 2 2 2 2 - 2: 2020-02-24 6 6 NA 6 6 6 6 6 - 3: 2020-02-25 13 13 NA 13 13 13 13 13 - 4: 2020-02-26 21 21 NA 21 21 21 21 21 - 5: 2020-02-27 33 33 NA 33 33 33 33 33 - 6: 2020-02-28 43 43 NA 43 43 43 43 43 - 7: 2020-02-29 66 66 NA 66 66 66 66 66 - 8: 2020-03-01 122 122 NA 122 122 122 122 122 - 9: 2020-03-02 129 129 NA 129 129 129 129 129 - upper_90 - - 1: 2 - 2: 6 - 3: 13 - 4: 21 - 5: 33 - 6: 43 - 7: 66 - 8: 122 - 9: 129 - - diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 510190028..4c5261438 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,7 +1,7 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) { files <- c( "convolve.stan", "pmfs.stan", "observation_model.stan", "secondary.stan", - "rt.stan", "infections.stan" + "rt.stan", "infections.stan", "delays.stan" ) if (!(tolower(Sys.info()[["sysname"]]) %in% "windows")) { suppressMessages( diff --git a/tests/testthat/test-delays.R b/tests/testthat/test-delays.R index d9f4dfdbe..524c58d7d 100644 --- a/tests/testthat/test-delays.R +++ b/tests/testthat/test-delays.R @@ -1,118 +1,106 @@ -test_stan_data <- function(generation_time = generation_time_opts(), - delays = delay_opts(), - truncation = trunc_opts(), - params = NULL) { - data <- create_stan_data( - reported_cases = example_confirmed, +test_stan_delays <- function(generation_time = generation_time_opts(), + delays = delay_opts(), + truncation = trunc_opts(), + params = c()) { + data <- create_stan_delays( generation_time = generation_time, delays = delays, truncation = truncation, - rt = rt_opts(), - gp = gp_opts(), - obs = obs_opts(), - backcalc = backcalc_opts(), - shifted_cases = NULL, - horizon = 7 + ot = 10 ) return(unlist(unname(data[params]))) } +delay_params <- + c("delay_mean_mean", "delay_mean_sd", "delay_sd_mean", "delay_sd_sd", "delay_max", + "delay_np_pmf") + test_that("generation times can be specified in different ways", { - gt_params <- - c("gt_mean_mean", "gt_mean_sd", "gt_sd_mean", "gt_sd_sd", "gt_max") expect_equal( - test_stan_data(params = gt_params), - c(1, 0, 0, 0, 1) + test_stan_delays(params = delay_params), + c(0, 1) ) expect_equal( - test_stan_data( - generation_time = generation_time_opts(mean = 3), - params = gt_params + test_stan_delays( + generation_time = generation_time_opts(dist_spec(mean = 3)), + params = delay_params ), - c(3, 0, 0, 0, 3) + c(0, 0, 0, 1) ) expect_equal( - test_stan_data( - generation_time = generation_time_opts(mean = 3, sd = 1, max = 5), - params = gt_params - ), - c(3, 0, 1, 0, 5) + round(test_stan_delays( + generation_time = generation_time_opts(dist_spec(mean = 3, sd = 1, max = 5)), + params = delay_params + ), digits = 2), + c(0.02, 0.11, 0.22, 0.30, 0.35) ) expect_equal( - round(test_stan_data( + round(test_stan_delays( generation_time = generation_time_opts( get_generation_time( disease = "SARS-CoV-2", source = "ganyani", max = 10, fixed = TRUE ) ), - params = gt_params + params = delay_params ), digits = 2), - c(3.64, 0, 3.08, 0, 10) + c(0.18, 0.20, 0.17, 0.13, 0.10, 0.07, 0.05, 0.04, 0.03, 0.02) ) expect_equal( - round(test_stan_data( + round(test_stan_delays( generation_time = generation_time_opts( - disease = "SARS-CoV-2", source = "ganyani", max = 10 + get_generation_time( + disease = "SARS-CoV-2", source = "ganyani", max = 10 + ) ), - params = gt_params + params = delay_params ), digits = 2), - c(3.64, 0.71, 3.08, 0.77, 10) + c(3.64, 0.71, 3.08, 0.77, 10.00) ) }) test_that("delay parameters can be specified in different ways", { - delay_params <- - c( - "delay_mean_mean", "delay_mean_sd", "delay_sd_mean", "delay_sd_sd", - "delay_max" - ) expect_equal( - test_stan_data( - delays = delay_opts(list(mean = 3)), + tail(test_stan_delays( + delays = delay_opts(dist_spec(mean = 3)), params = delay_params - ), - c(3, 0, 0, 0, 3) + ), n = -2), + c(0, 0, 0, 1) ) expect_equal( - test_stan_data( - delays = delay_opts(list(mean = 3, sd = 1, max = 5)), + tail(round(test_stan_delays( + delays = delay_opts(dist_spec(mean = 3, sd = 1, max = 5)), params = delay_params - ), - c(3, 0, 1, 0, 5) + ), digits = 2), n = -2), + c(0.02, 0.11, 0.22, 0.30, 0.35) ) }) test_that("truncation parameters can be specified in different ways", { - trunc_params <- - c( - "trunc_mean_mean", "trunc_mean_sd", "trunc_sd_mean", "trunc_sd_sd", - "trunc_max" - ) expect_equal( - test_stan_data( - truncation = trunc_opts(dist = list(mean = 3, sd = 1, max = 5)), - params = trunc_params - ), - c(3, 0, 1, 0, 5) + tail(round(test_stan_delays( + truncation = trunc_opts(dist = dist_spec(mean = 3, sd = 1, max = 5)), + params = delay_params + ), digits = 2), n = -2), + c(0.02, 0.11, 0.22, 0.30, 0.35) ) }) test_that("contradictory generation times are caught", { - expect_error(generation_time_opts(mean = 3.5), "must be an integer") + expect_error(generation_time_opts(dist_spec(mean = 3.5)), "must be an integer") expect_error( - generation_time_opts(mean = 3, mean_sd = 1), + generation_time_opts(dist_spec(mean = 3, mean_sd = 1)), "must be 0" ) }) test_that("contradictory delays are caught", { expect_error( - test_stan_data(delays = delay_opts(list(mean = 3.5))), + test_stan_delays(delays = delay_opts(dist_spec(mean = 3.5))), "must be an integer" ) expect_error( - test_stan_data(delays = delay_opts(list(mean = 3, mean_sd = 1))), + test_stan_delays(delays = delay_opts(dist_spec(mean = 3, mean_sd = 1))), "must be 0" ) }) diff --git a/tests/testthat/test-dist.R b/tests/testthat/test-dist.R new file mode 100644 index 000000000..e0fb2b673 --- /dev/null +++ b/tests/testthat/test-dist.R @@ -0,0 +1,26 @@ +skip_on_cran() +skip_on_os("windows") + +test_that("distributions are the same in R and stan", { + args <- list(mean = 3, mean_sd = 0, sd = 2, sd_sd = 0, max_value = 15) + lognormal_params <- + do.call(lognorm_dist_def, (c(args, list(samples = 1))))$params[[1]] + gamma_params <- + do.call(gamma_dist_def, (c(args, list(samples = 1))))$params[[1]] + pmf_r_lognormal <- dist_skel( + n = seq_len(args$max_value) - 1, dist = TRUE, cum = FALSE, + model = "lognormal", params = lognormal_params, max_value = args$max, + discrete = TRUE + ) + pmf_r_gamma <- dist_skel( + n = seq_len(args$max_value) - 1, dist = TRUE, cum = FALSE, + model = "gamma", params = gamma_params, max_value = args$max, + discrete = TRUE + ) + + pmf_stan_lognormal <- discretised_pmf(args$mean, args$sd, args$max_value, 0) + pmf_stan_gamma <- discretised_pmf(args$mean, args$sd, args$max_value, 1) + + expect_equal(pmf_r_lognormal, pmf_stan_lognormal) + expect_equal(pmf_r_gamma, pmf_stan_gamma) +}) diff --git a/tests/testthat/test-dist_spec.R b/tests/testthat/test-dist_spec.R new file mode 100644 index 000000000..f2887155e --- /dev/null +++ b/tests/testthat/test-dist_spec.R @@ -0,0 +1,232 @@ + +test_that("dist_spec returns correct output for fixed lognormal distribution", { + result <- dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") + expect_equal(dim(result$mean_mean), 0) + expect_equal(dim(result$sd_mean), 0) + expect_equal(dim(result$dist), 0) + expect_equal(dim(result$max), 0) + expect_equal(result$fixed, array(1)) + expect_equal( + as.vector(round(result$np_pmf, 2)), + c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03, + 0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09, + 0.10, 0.10, 0.11, 0.12) + ) +}) + +test_that("dist_spec returns correct output for uncertain gamma distribution", { + result <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" + ) + expect_equal(result$mean_mean, array(3L)) + expect_equal(result$sd_mean, array(2)) + expect_equal(result$mean_sd, array(0.5)) + expect_equal(result$sd_sd, array(0.5)) + expect_equal(result$dist, array(1)) + expect_equal(result$max, array(20)) + expect_equal(result$fixed, array(0L)) +}) + +test_that("dist_spec returns correct output for fixed distribution", { + result <- dist_spec( + mean = 5, mean_sd = 3, sd = 1, max = 20, distribution = "lognormal", + fixed = TRUE + ) + expect_equal(dim(result$mean_mean), 0) + expect_equal(dim(result$sd_mean), 0) + expect_equal(result$fixed, array(1L)) + expect_equal( + as.vector(round(result$np_pmf, 2)), + c(0.00, 0.00, 0.00, 0.00, 0.01, 0.01, 0.02, 0.03, + 0.03, 0.04, 0.05, 0.06, 0.07, 0.07, 0.08, 0.09, + 0.10, 0.10, 0.11, 0.12) + ) +}) + +test_that("dist_spec returns error when both pmf and distributional parameters are specified", { + expect_error(dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal", pmf = c(0.1, 0.2, 0.3, 0.4)), + "Distributional parameters or a pmf can be specified, but not both.") +}) + +test_that("dist_spec returns error when mean is missing but other distributional parameters are given", { + expect_error(dist_spec(sd = 1, max = 20, distribution = "lognormal"), + "If any distributional parameters are given then so must the mean.") +}) + +test_that("dist_spec returns error when maximum of parametric distributions is not specified", { + expect_error(dist_spec(mean = 5, sd = 1, distribution = "lognormal"), + "Maximum of parametric distributions must be specified.") +}) + +test_that("+.dist_spec returns correct output for sum of two distributions", { + lognormal <- dist_spec(mean = 5, sd = 1, max = 20, distribution = "lognormal") + gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, distribution = "gamma") + result <- lognormal + gamma + expect_equal(result$mean_mean, array(3)) + expect_equal(result$sd_mean, array(2)) + expect_equal(result$mean_sd, array(0.5)) + expect_equal(result$sd_sd, array(0.5)) + expect_equal(result$n, 2) + expect_equal(result$n_p, 1) + expect_equal(result$n_np, 1) + expect_equal(result$np_pmf_max, 20) + expect_equal(result$np_pmf_length, array(20)) +}) + +test_that("+.dist_spec returns correct output for sum of two fixed distributions", { + lognormal <- dist_spec( + mean = 5, sd = 1, max = 20, distribution = "lognormal", fixed = TRUE + ) + gamma <- dist_spec( + mean = 3, sd = 2, max = 20, distribution = "gamma", fixed = TRUE + ) + result <- lognormal + gamma + expect_equal(dim(result$mean_mean), 0) + expect_equal(dim(result$sd_mean), 0) + expect_equal(result$n, 1) + expect_equal(result$n_p, 0) + expect_equal(result$n_np, 1) + expect_equal(result$np_pmf_max, 30) + expect_equal(result$np_pmf_length, 30) +}) + +test_that("+.dist_spec returns correct output for sum of two nonparametric distributions", { + lognormal <- dist_spec(pmf = c(0.1, 0.2, 0.3, 0.4)) + gamma <- dist_spec(pmf = c(0.1, 0.2, 0.3, 0.4)) + result <- lognormal + gamma + expect_equal(dim(result$mean_mean), 0) + expect_equal(dim(result$sd_mean), 0) + expect_equal(result$n, 1) + expect_equal(result$n_p, 0) + expect_equal(result$n_np, 1) + expect_equal(result$np_pmf_max, 7) + expect_equal(result$np_pmf_length, 7) + expect_equal( + as.vector(round(result$np_pmf, 2)), + c(0.01, 0.04, 0.10, 0.20, 0.25, 0.24, 0.16) + ) +}) + +test_that("Testing `+.dist_spec` function with tolerance parameter", { + # Create distributions + lognormal <- dist_spec( + mean = 1.6, sd = 1, max = 20, distribution = "lognormal" + ) + gamma <- dist_spec( + mean = 3, sd = 2, max = 20, distribution = "gamma" + ) + + # Compute combined distribution with default tolerance + combined_default <- `+.dist_spec`(lognormal, gamma) + + # Compute combined distribution with larger tolerance + combined_larger_tolerance <- EpiNow2:::dist_spec_plus( + lognormal, gamma, tolerance = 0.01 + ) + + # The length of the combined PMF should be greater with default tolerance + expect_true( + length(combined_default$np_pmf) > length(combined_larger_tolerance$np_pmf) + ) + # Both should sum to 1 + expect_equal(sum(combined_default$np_pmf), 1) + expect_equal(sum(combined_larger_tolerance$np_pmf), 1) + # The first 5 entries should be within 0.01 of each other + expect_equal( + combined_default$np_pmf[1:5], combined_larger_tolerance$np_pmf[1:5], + tolerance = 0.01 + ) + expect_equal( + mean(combined_default), mean(combined_larger_tolerance), tolerance = 0.1 + ) +}) + + +test_that("mean.dist_spec returns correct output for fixed lognormal distribution", { + lognormal <- dist_spec( + mean = convert_to_logmean(3, 1), sd = convert_to_logsd(3, 1), + max = 20, distribution = "lognormal" + ) + result <- mean.dist_spec(lognormal) + expect_equal(result, 2.49, tolerance = 0.01) # here we can see the bias from + # using this kind of discretisation approach +}) + +test_that("mean.dist_spec returns correct output for uncertain gamma distribution", { + gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, distribution = "gamma") + result <- mean.dist_spec(gamma) + expect_equal(result, 3) +}) + +test_that("mean.dist_spec returns correct output for sum of two distributions", { + lognormal <- dist_spec(mean = 1, sd = 1, max = 20, distribution = "lognormal") + gamma <- dist_spec(mean = 3, sd = 2, max = 20, distribution = "gamma") + result <- mean.dist_spec(lognormal + gamma) + expect_equal(result, c(5.84), tolerance = 0.001) +}) + +test_that("print.dist_spec correctly prints the parameters of the fixed lognormal", { + lognormal <- dist_spec(mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal") + + expect_output(print(lognormal), "\\n Fixed distribution with PMF \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]\\n") +}) + +test_that("print.dist_spec correctly prints the parameters of the uncertain gamma", { + gamma <- dist_spec( + mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, + distribution = "gamma" + ) + + expect_output(print(gamma), "\\n Uncertain gamma distribution with \\(untruncated\\) mean 3 \\(SD 0\\.5\\) and SD 2 \\(SD 0\\.5\\)\\n") +}) + +test_that("print.dist_spec correctly prints the parameters of the uncertain lognormal", { + lognormal_uncertain <- dist_spec(mean = 1.5, sd = 0.5, mean_sd = 0.1, sd_sd = 0.1, max = 20, distribution = "lognormal") + + expect_output(print(lognormal_uncertain), "\\n Uncertain lognormal distribution with \\(untruncated\\) logmean 1\\.5 \\(SD 0\\.1\\) and logSD 0\\.5 \\(SD 0\\.1\\)\\n") +}) + +test_that("print.dist_spec correctly prints the parameters of an empty distribution", { + empty <- dist_spec() + + expect_output(print(empty), "Empty `dist_spec` distribution.") +}) + +test_that("print.dist_spec correctly prints the parameters of a combination of distributions", { + lognormal <- dist_spec(mean = 1.5, sd = 0.5, max = 20, distribution = "lognormal") + gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, distribution = "gamma") + combined <- lognormal + gamma + + expect_output(print(combined), "Combination of delay distributions:\\n Fixed distribution with PMF \\[0\\.0014 0\\.052 0\\.16 0\\.2 0\\.18 0\\.13 0\\.094 0\\.063 0\\.042 0\\.027 0\\.018 0\\.012 0\\.0079 0\\.0052 0\\.0035 0\\.0024 0\\.0016 0\\.0011 0\\.00078 0\\.00055\\]\\n Uncertain gamma distribution with \\(untruncated\\) mean 3 \\(SD 0\\.5\\) and SD 2 \\(SD 0\\.5\\)\\n") +}) + +test_that("plot.dist_spec returns a ggplot object", { + lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal") + plot <- plot(lognormal) + expect_s3_class(plot, "ggplot") +}) + +test_that("plot.dist_spec correctly plots a single distribution", { + lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal") + plot <- plot(lognormal) + expect_equal(length(plot$layers), 2) + expect_equal(length(plot$facet$params$facets), 1) +}) + +test_that("plot.dist_spec correctly plots multiple distributions", { + lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal") + gamma <- dist_spec(mean = 3, sd = 2, mean_sd = 0.5, sd_sd = 0.5, max = 20, distribution = "gamma") + combined <- lognormal + gamma + plot <- plot(combined) + expect_equal(length(plot$layers), 2) + expect_equal(length(plot$facet$params$facets), 1) +}) + +test_that("plot.dist_spec correctly plots a combination of fixed distributions", { + lognormal <- dist_spec(mean = 1.6, sd = 0.5, max = 20, distribution = "lognormal") + combined <- lognormal + lognormal + plot <- plot(combined) + expect_equal(length(plot$layers), 2) + expect_equal(length(plot$facet$params$facets), 1) +}) diff --git a/tests/testthat/test-epinow.R b/tests/testthat/test-epinow.R index ac720cb8f..ed9d2867c 100644 --- a/tests/testthat/test-epinow.R +++ b/tests/testthat/test-epinow.R @@ -3,7 +3,7 @@ skip_on_cran() generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani", max_value = 15) incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer", max_value = 15) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10 @@ -21,8 +21,8 @@ expected_out <- c("estimates", "estimated_reported_cases", "summary", "plots", " test_that("epinow produces expected output when run with default settings", { out <- suppressWarnings(epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts( samples = 25, warmup = 25, cores = 1, chains = 2, @@ -43,8 +43,8 @@ test_that("epinow produces expected output when run with default settings", { test_that("epinow runs without error when saving to disk", { expect_null(suppressWarnings(epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts( samples = 25, warmup = 25, cores = 1, chains = 2, control = list(adapt_delta = 0.8) @@ -57,8 +57,8 @@ test_that("epinow runs without error when saving to disk", { test_that("epinow can produce partial output as specified", { out <- suppressWarnings(epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts( samples = 25, warmup = 25, cores = 1, chains = 2, @@ -80,8 +80,8 @@ test_that("epinow can produce partial output as specified", { test_that("epinow fails as expected when given a short timeout", { expect_error(suppressWarnings(x = epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts( samples = 100, warmup = 100, cores = 1, chains = 2, @@ -96,9 +96,9 @@ test_that("epinow fails as expected when given a short timeout", { test_that("epinow fails if given NUTs arguments when using variational inference", { expect_error(suppressWarnings(epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), - stan = delay_opts( + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), + stan = stan_opts( samples = 100, warmup = 100, cores = 1, chains = 2, method = "vb" @@ -111,8 +111,8 @@ test_that("epinow fails if given NUTs arguments when using variational inference test_that("epinow fails if given variational inference arguments when using NUTs", { expect_error(suppressWarnings(epinow( reported_cases = reported_cases, - generation_time = generation_time, - delays = delay_opts(incubation_period, reporting_delay), + generation_time = generation_time_opts(generation_time), + delays = delay_opts(incubation_period + reporting_delay), stan = stan_opts(method = "sampling", tol_rel_obj = 1), logs = NULL, verbose = FALSE ))) diff --git a/tests/testthat/test-estimate_infections.R b/tests/testthat/test-estimate_infections.R index 46903c77c..b705621b6 100644 --- a/tests/testthat/test-estimate_infections.R +++ b/tests/testthat/test-estimate_infections.R @@ -5,7 +5,7 @@ futile.logger::flog.threshold("FATAL") reported_cases <- EpiNow2::example_confirmed[1:30] generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani", max_value = 10) incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer", max_value = 10) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10 ) @@ -21,7 +21,7 @@ default_estimate_infections <- function(..., add_stan = list(), delay = TRUE) { stan_args <- c(stan_args, add_stan) suppressWarnings(estimate_infections(..., - generation_time = generation_time, + generation_time = generation_time_opts(generation_time), delays = ifelse(delay, list(delay_opts(reporting_delay)), list(delay_opts()))[[1]], stan = stan_args, verbose = FALSE )) diff --git a/tests/testthat/test-estimate_secondary.R b/tests/testthat/test-estimate_secondary.R index e155a913b..f896737f8 100644 --- a/tests/testthat/test-estimate_secondary.R +++ b/tests/testthat/test-estimate_secondary.R @@ -23,6 +23,7 @@ cases[ # with a secondary case inc <- estimate_secondary(cases[1:60], obs = obs_opts(scale = list(mean = 0.2, sd = 0.2), week_effect = FALSE), + control = list(adapt_delta = 0.98), verbose = FALSE ) @@ -54,7 +55,7 @@ prev <- estimate_secondary(cases[1:100], week_effect = FALSE, scale = list(mean = 0.4, sd = 0.1) ), - verbose = FALSE + verbose = TRUE ) # extract posterior parameters of interest diff --git a/tests/testthat/test-estimate_truncation.R b/tests/testthat/test-estimate_truncation.R index 9c44eeada..0b6f5c2b8 100644 --- a/tests/testthat/test-estimate_truncation.R +++ b/tests/testthat/test-estimate_truncation.R @@ -10,7 +10,7 @@ options(mc.cores = ifelse(interactive(), 4, 1)) reported_cases <- example_confirmed[1:60] # define example truncation distribution (note not integer adjusted) -trunc_dist <- list( +trunc_dist <- dist_spec( mean = convert_to_logmean(3, 2), mean_sd = 0.1, sd = convert_to_logsd(3, 2), @@ -24,8 +24,8 @@ construct_truncation <- function(index, cases, dist) { cmf <- cumsum( dlnorm( 1:(dist$max + 1), - rnorm(1, dist$mean, dist$mean_sd), - rnorm(1, dist$sd, dist$sd_sd) + rnorm(1, dist$mean_mean, dist$mean_sd), + rnorm(1, dist$sd_mean, dist$sd_sd) ) ) cmf <- cmf / cmf[dist$max + 1] diff --git a/tests/testthat/test-get_dist.R b/tests/testthat/test-get_dist.R index 17607cd14..0785ebd2f 100644 --- a/tests/testthat/test-get_dist.R +++ b/tests/testthat/test-get_dist.R @@ -1,17 +1,18 @@ test_that("get_dist returns distributional definition data in the format expected by EpiNow2", { data <- data.table::data.table(mean = 1, mean_sd = 1, sd = 1, sd_sd = 1, source = "test", disease = "test", dist = "gamma") - + dist <- get_dist(data, disease = "test", source = "test") expect_equal( - get_dist(data, disease = "test", source = "test"), - list(mean = 1, mean_sd = 1, sd = 1, sd_sd = 1, max = 15, dist = "gamma") + dist[c("mean_mean", "mean_sd", "sd_mean", "sd_sd", "max")], + list(mean_mean = array(1), mean_sd = array(1), + sd_mean = array(1), sd_sd = array(1), max = array(15L)) ) }) test_that("get_dist returns distributional definition data in the format expected by EpiNow2 with no uncertainty", { data <- data.table::data.table(mean = 1, mean_sd = 1, sd = 1, sd_sd = 1, source = "test", disease = "test", dist = "lognormal") - + dist <- get_dist(data, disease = "test", source = "test", fixed = TRUE) expect_equal( - get_dist(data, disease = "test", source = "test", fixed = TRUE), - list(mean = 1, mean_sd = 0, sd = 1, sd_sd = 0, max = 15, dist = "lognormal") + round(dist$np_pmf[1:7], digits = 2), + array(c(0.17, 0.23, 0.17, 0.12, 0.08, 0.06, 0.04)) ) }) diff --git a/tests/testthat/test-models/estimate_infections.R b/tests/testthat/test-models/estimate_infections.R index 80562323c..0ba61d1c6 100644 --- a/tests/testthat/test-models/estimate_infections.R +++ b/tests/testthat/test-models/estimate_infections.R @@ -10,7 +10,7 @@ incubation_period <- get_incubation_period( # static model fit <- estimate_infections( reported_cases, - generation_time, + generation_time_opts(generation_time), delays = delay_opts(incubation_period), stan = stan_opts(chains = 2, warmup = 200, samples = 1000), gp = NULL diff --git a/tests/testthat/test-regional_epinow.R b/tests/testthat/test-regional_epinow.R index 05d7bf6de..59b593580 100644 --- a/tests/testthat/test-regional_epinow.R +++ b/tests/testthat/test-regional_epinow.R @@ -2,7 +2,7 @@ skip_on_cran() generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani", max_value = 5) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = log(3), mean_sd = 0.1, sd = log(2), sd_sd = 0.1, max = 5 ) @@ -23,7 +23,8 @@ df_non_zero <- function(df) { test_that("regional_epinow produces expected output when run with default settings", { out <- suppressWarnings( regional_epinow( - reported_cases = cases, generation_time = generation_time, + reported_cases = cases, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), rt = rt_opts(rw = 10), gp = NULL, stan = stan_opts( @@ -52,7 +53,8 @@ test_that("regional_epinow produces expected output when run with default settin test_that("regional_epinow runs without error when given a very short timeout", { expect_error( regional_epinow( - reported_cases = cases, generation_time = generation_time, + reported_cases = cases, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), stan = stan_opts( samples = 1000, warmup = 100, @@ -65,7 +67,8 @@ test_that("regional_epinow runs without error when given a very short timeout", ) expect_error( regional_epinow( - reported_cases = cases, generation_time = generation_time, + reported_cases = cases, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), stan = stan_opts( samples = 1000, warmup = 100, @@ -85,7 +88,8 @@ test_that("regional_epinow produces expected output when run with region specifi rt <- opts_list(rt_opts(), cases, realland = rt_opts(rw = 7)) out <- suppressWarnings( regional_epinow( - reported_cases = cases, generation_time = generation_time, + reported_cases = cases, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), rt = rt, gp = gp, stan = stan_opts( diff --git a/tests/testthat/test-regional_runtimes.R b/tests/testthat/test-regional_runtimes.R index a1e662063..13ce6f5ae 100644 --- a/tests/testthat/test-regional_runtimes.R +++ b/tests/testthat/test-regional_runtimes.R @@ -1,5 +1,5 @@ generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani", max_value = 5) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = log(3), mean_sd = 0.1, sd = log(2), sd_sd = 0.1, max = 5 ) @@ -18,7 +18,7 @@ df_non_zero <- function(df) { out <- suppressWarnings(regional_epinow( reported_cases = cases, - generation_time = generation_time, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), stan = stan_opts( samples = 25, warmup = 25, diff --git a/tests/testthat/test-report_cases.R b/tests/testthat/test-report_cases.R index 5c0318d16..54fa17df5 100644 --- a/tests/testthat/test-report_cases.R +++ b/tests/testthat/test-report_cases.R @@ -9,7 +9,7 @@ test_that("report_cases can simulate infections forward", { incubation_period <- get_incubation_period( disease = "SARS-CoV-2", source = "lauer" ) - reporting_delay <- list( + reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 5 ) @@ -21,7 +21,7 @@ test_that("report_cases can simulate infections forward", { set.seed(123) reported_cases <- report_cases( case_estimates = cases, - delays = delay_opts(incubation_period, reporting_delay), + delays = delay_opts(incubation_period + reporting_delay), type = "sample" ) expect_equal(class(reported_cases), "list") diff --git a/tests/testthat/test-simulate_infections.R b/tests/testthat/test-simulate_infections.R index 8d1104cfd..f900557b9 100644 --- a/tests/testthat/test-simulate_infections.R +++ b/tests/testthat/test-simulate_infections.R @@ -4,14 +4,14 @@ futile.logger::flog.threshold("FATAL") reported_cases <- EpiNow2::example_confirmed[1:50] generation_time <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani", max_value = 10) incubation_period <- get_incubation_period(disease = "SARS-CoV-2", source = "lauer", max_value = 10) -reporting_delay <- list( +reporting_delay <- dist_spec( mean = convert_to_logmean(2, 1), mean_sd = 0.1, sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 10 ) library(data.table) out <- suppressWarnings(estimate_infections(reported_cases, - generation_time = generation_time, + generation_time = generation_time_opts(generation_time), delays = delay_opts(reporting_delay), gp = NULL, rt = rt_opts(rw = 14), stan = stan_opts( @@ -28,14 +28,14 @@ test_that("simulate_infections works to simulate a passed in estimate_infections }) test_that("simulate_infections works to simulate a passed in estimate_infections object with an adjusted Rt", { - R <- c(rep(NA_real_, 40), rep(0.5, 9)) + R <- c(rep(NA_real_, 40), rep(0.5, 17)) sims <- simulate_infections(out, R) expect_equal(names(sims), c("samples", "summarised", "observations")) expect_equal(tail(sims$summarised[variable == "R"]$median, 9), rep(0.5, 9)) }) test_that("simulate_infections works to simulate a passed in estimate_infections object with a short adjusted Rt", { - R <- c(rep(NA_real_, 40), rep(0.5, 10)) + R <- c(rep(NA_real_, 40), rep(0.5, 17)) sims <- simulate_infections(out, R) expect_equal(names(sims), c("samples", "summarised", "observations")) expect_equal(tail(sims$summarised[variable == "R"]$median, 9), rep(0.5, 9)) @@ -63,7 +63,7 @@ test_that("simulate infections fails as expected", { }) test_that("simulate_infections works to simulate a passed in estimate_infections object with an adjusted Rt in data frame", { - R <- c(rep(1.4, 32), rep(0.5, 17)) + R <- c(rep(1.4, 40), rep(0.5, 17)) R_dt <- data.frame(date = summary(out, type = "parameters", param = "R")$date, value = R) sims_dt <- simulate_infections(out, R_dt) expect_equal(names(sims_dt), c("samples", "summarised", "observations")) diff --git a/tests/testthat/test-stan-convole.R b/tests/testthat/test-stan-convole.R index 1cd9fd42e..ce8bce254 100644 --- a/tests/testthat/test-stan-convole.R +++ b/tests/testthat/test-stan-convole.R @@ -3,12 +3,14 @@ skip_on_os("windows") test_that("convolve can combine two pmfs as expected", { expect_equal( - convolve(c(0.1, 0.2, 0.7), c(0.1, 0.2, 0.7), 6), - c(0.01, 0.04, 0.18, 0.28, 0.49, 0.00), + convolve_with_rev_pmf(c(0.1, 0.2, 0.7), rev(c(0.1, 0.2, 0.7)), 5), + c(0.01, 0.04, 0.18, 0.28, 0.49), tolerance = 0.01 ) expect_equal( - sum(convolve(c(0.05, 0.55, 0.4), c(0.1, 0.2, 0.7), 6)), 1 + sum(convolve_with_rev_pmf( + c(0.05, 0.55, 0.4), rev(c(0.1, 0.2, 0.7)), 5 + )), 1 ) }) @@ -22,10 +24,10 @@ test_that("convolve performs the same as a numerical convolution", { # Add sampled Poisson distributions up to get combined distribution z <- x + y # Analytical convolution of PMFs - conv_pmf <- convolve(xpmf, ypmf, 42) + conv_pmf <- convolve_with_rev_pmf(xpmf, rev(ypmf), 41) conv_cdf <- cumsum(conv_pmf) # Empirical convolution of PMFs - cdf <- ecdf(z)(0:41) + cdf <- ecdf(z)(0:40) # Test analytical and numerical convolutions are similar with a small error # allowed expect_lte(sum(abs(conv_cdf - cdf)), 0.1) @@ -33,12 +35,12 @@ test_that("convolve performs the same as a numerical convolution", { test_that("convolve_dot_product can combine vectors as we expect", { expect_equal( - convolve_dot_product(c(0.1, 0.2, 0.7), rev(c(0.1, 0.2, 0.7)), 3), + convolve_with_rev_pmf(c(0.1, 0.2, 0.7), rev(c(0.1, 0.2, 0.7)), 3), c(0.01, 0.04, 0.18), tolerance = 0.01 ) expect_equal( - convolve_dot_product( + convolve_with_rev_pmf( seq_len(10), rev(c(0.1, 0.4, 0.3, 0.2)), 10 ), c(0.1, 0.6, 1.4, 2.4, 3.4, 4.4, 5.4, 6.4, 7.4, 8.4) @@ -47,7 +49,7 @@ test_that("convolve_dot_product can combine vectors as we expect", { x[2:10] <- x[1:9] / 2 x[1] <- 0 expect_equal( - convolve_dot_product( + convolve_with_rev_pmf( seq_len(10), rev(c(0, 0.5, 0, 0)), 10 ), x diff --git a/tests/testthat/test-stan-infections.R b/tests/testthat/test-stan-infections.R index a1165e350..15736f996 100644 --- a/tests/testthat/test-stan-infections.R +++ b/tests/testthat/test-stan-infections.R @@ -14,13 +14,19 @@ test_that("update_infectiousness works as expected with default settings", { expect_error(update_infectiousness(rep(1, 20), rep(0.1, 5), 5, 10, 10)) }) -gt_rev_pmf <- rev(discretised_pmf(3, 2, 15, 1, 1)) +pmf <- discretised_pmf(3, 2, 15, 1) +gt_rev_pmf <- get_delay_rev_pmf( + 1L, 15L, array(0L), array(1L), + array(c(1L, 2L)), array(15L), pmf, + array(c(1L, 16L)), numeric(0), numeric(0), 0L, + 1L, 1L, 0L +) # test generate infections test_that("generate_infections works as expected", { expect_equal( round(generate_infections(c(1, rep(1, 9)), 10, gt_rev_pmf, log(1000), 0, 0, 0), 0), - c(rep(1000, 10), 996, rep(997, 9)) + c(rep(1000, 10), 996, rep(997, 3), rep(998, 6)) ) expect_equal( round(generate_infections(c(1, rep(1.1, 9)), 10, gt_rev_pmf, log(20), 0.03, 0, 0), 0), @@ -32,11 +38,11 @@ test_that("generate_infections works as expected", { ) expect_equal( round(generate_infections(c(1, rep(1, 9)), 4, gt_rev_pmf, log(500), -0.02, 0, 0), 0), - c(500, 490, 480, 471, 402, 413, 416, 417, rep(418, 6)) + c(500, 490, 480, 471, 402, 413, 417, 417, rep(418, 6)) ) expect_equal( round(generate_infections(c(1, rep(1.1, 9)), 4, gt_rev_pmf, log(500), 0, 0, 0), 0), - c(rep(500, 4), 416, 472, 490, 507, 524, 543, 561, 581, 601, 622) + c(rep(500, 4), 417, 473, 490, 507, 525, 543, 562, 581, 601, 622) ) expect_equal( round(generate_infections(c(1, rep(1, 9)), 1, gt_rev_pmf, log(40), numeric(0), 0, 0), 0), @@ -48,6 +54,6 @@ test_that("generate_infections works as expected", { ) expect_equal( round(generate_infections(c(1, rep(1, 9)), 10, gt_rev_pmf, log(1000), 0, 100000, 4), 0), - c(rep(1000, 10), 996, rep(997, 5), 980, 964, 944, 921) + c(rep(1000, 10), 996, rep(997, 3), rep(998, 2), 980, 964, 944, 921) ) }) diff --git a/tests/testthat/test-stan-secondary.R b/tests/testthat/test-stan-secondary.R index 8e0796628..a408a54c2 100644 --- a/tests/testthat/test-stan-secondary.R +++ b/tests/testthat/test-stan-secondary.R @@ -4,7 +4,7 @@ skip_on_os("windows") # test primary reports and observations reports <- rep(10, 20) obs <- rep(4, 20) -delay_pmf <- reverse_mf(discretised_pmf(log(3), 0.1, 5, 0, 0)) +delay_pmf <- reverse_mf(discretised_pmf(log(3), 0.1, 5, 0)) check_equal <- function(args, target, dof = 0, dev = FALSE) { out <- do.call(calculate_secondary, args) diff --git a/touchstone/setup.R b/touchstone/setup.R index 80730e28e..d06725aae 100644 --- a/touchstone/setup.R +++ b/touchstone/setup.R @@ -19,7 +19,6 @@ reporting_delay <- dist_spec( delays <- delay_opts(incubation_period + reporting_delay) - # set up example generation time u_generation_time <- get_generation_time( disease = "SARS-CoV-2", source = "ganyani", fixed = FALSE