epiforecasts · sbfnk · Oct 11, 2023 · Sep 19, 2023 · Sep 20, 2023 · Sep 21, 2023
diff --git a/NAMESPACE b/NAMESPACE
@@ -185,6 +185,7 @@ importFrom(purrr,transpose)
 importFrom(purrr,walk)
 importFrom(rlang,abort)
 importFrom(rlang,cnd_muffle)
+importFrom(rlang,warn)
 importFrom(rstan,expose_stan_functions)
 importFrom(rstan,extract)
 importFrom(rstan,sampling)

diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,7 @@
 ## Package
 
 * Reduced the number of long-running examples. By @sbfnk in #459 and reviewed by @seabbs.
+* Changed all instances of arguments that refer to the maximum of a distribution to reflect the maximum. Previously this did, in some instance, refer to the length of the PMF. By @sbfnk in #468.
 
 # EpiNow2 1.4.0
 

diff --git a/R/create.R b/R/create.R
@@ -669,13 +669,18 @@ create_stan_delays <- function(..., weight = 1) {
   ret$types_id <- array(ret$types_id)
   ## map delays to identifiers
   ret$types_groups <- array(c(0, cumsum(unname(type_n[type_n > 0]))) + 1)
-  ## map pmfs
-  ret$np_pmf_groups <- array(c(0, cumsum(combined_delays$np_pmf_length)) + 1)
+  ## get non zero length delay pmf lengths
+  ret$np_pmf_groups <- array(
+    c(0, cumsum(
+      combined_delays$np_pmf_length[combined_delays$np_pmf_length > 0])
+    ) + 1
+  )
+  ## calculate total np pmf length
+  ret$np_pmf_length <- sum(combined_delays$np_pmf_length)
   ## assign prior weights
   ret$weight <- array(rep(weight, ret$n_p))
   ## remove auxiliary variables
   ret$fixed <- NULL
-  ret$np_pmf_length <- NULL
 
   names(ret) <- paste("delay", names(ret), sep = "_")
   ret <- c(ret, ids)

diff --git a/R/dist.R b/R/dist.R
@@ -95,41 +95,41 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model,
     ddist <- function(n) {
       (pexp(n + 1, params$rate) -
         pexp(n, params$rate)) /
-        pexp(max_value, params$rate)
+        pexp(max_value + 1, params$rate)
     }
   } else if (model %in% "gamma") {
     rdist <- function(n) {
       rgamma(n, params$shape, params$scale)
     }
     pdist <- function(n) {
       pgamma(n, params$shape, params$scale) /
-        pgamma(max_value, params$shape, params$scale)
+        pgamma(max_value + 1, params$shape, params$scale)
     }
     ddist <- function(n) {
       (pgamma(n + 1, params$shape, params$scale) -
         pgamma(n, params$shape, params$scale)) /
-        pgamma(max_value, params$shape, params$scale)
+        pgamma(max_value + 1, params$shape, params$scale)
     }
   } else if (model %in% "lognormal") {
     rdist <- function(n) {
       rlnorm(n, params$mean, params$sd)
     }
     pdist <- function(n) {
       plnorm(n, params$mean, params$sd) /
-        plnorm(max_value, params$mean, params$sd)
+        plnorm(max_value + 1, params$mean, params$sd)
     }
     ddist <- function(n) {
       (plnorm(n + 1, params$mean, params$sd) -
         plnorm(n, params$mean, params$sd)) /
-        plnorm(max_value, params$mean, params$sd)
+        plnorm(max_value + 1, params$mean, params$sd)
     }
   }
 
   if (discrete) {
     cmf <- c(0, pdist(seq_len(max_value + 1)))
     pmf <- diff(cmf)
     rdist <- function(n) {
-      sample(x = seq_len(max_value) - 1, size = n, prob = pmf)
+      sample(x = seq_len(max_value + 1) - 1, size = n, prob = pmf)
     }
     pdist <- function(n) {
       cmf[n + 1]
@@ -144,14 +144,13 @@ dist_skel <- function(n, dist = FALSE, cum = TRUE, model,
     if (!dist) {
       rdist(n)
     } else {
-      if (length(n) > max_value) {
-        n <- 1:max_value
-      }
       if (cum) {
-        pdist(n)
+        ret <- pdist(n)
       } else {
-        ddist(n)
+        ret <- ddist(n)
       }
+      ret[ret > 1] <- NA_real_
+      return(ret)
     }
   }
 
@@ -893,6 +892,7 @@ tune_inv_gamma <- function(lower = 2, upper = 21) {
 #'
 #' @author Sebastian Funk
 #' @author Sam Abbott
+#' @importFrom rlang warn
 #' @export
 #' @examples
 #' # A fixed lognormal distribution with mean 5 and sd 1.
@@ -906,6 +906,18 @@ tune_inv_gamma <- function(lower = 2, upper = 21) {
 dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0,
                       distribution = c("lognormal", "gamma"), max,
                       pmf = numeric(0), fixed = FALSE) {
+  ## deprecate previous behaviour
+  warn(
+    message = paste(
+      "The meaning of the 'max' argument has changed compared to",
+      "previous versions. It now indicates the maximum of a distribution",
+      "rather than the length of the probability mass function (including 0)",
+      "that it represented previously. To replicate previous behaviour reduce",
+      "max by 1."
+    ),
+    .frequency = "regularly",
+    .frequency_id = "dist_spec_max"
+  )
   ## check if parametric or nonparametric
   if (length(pmf) > 0 &&
     !all(
@@ -970,9 +982,7 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0,
           n = 0,
           n_p = 0,
           n_np = 0,
-          np_pmf_max = 0,
           np_pmf = numeric(0),
-          np_pmf_length = integer(0),
           fixed = integer(0)
         ))
       } else { ## parametric fixed
@@ -991,7 +1001,7 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0,
             )
           }
           pmf <- dist_skel(
-            n = seq_len(max) - 1, dist = TRUE, cum = FALSE,
+            n = seq_len(max + 1) - 1, dist = TRUE, cum = FALSE,
             model = distribution, params = params$params[[1]], max_value = max,
             discrete = TRUE
           )
@@ -1008,9 +1018,7 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0,
         n = 1,
         n_p = 0,
         n_np = 1,
-        np_pmf_max = length(pmf),
         np_pmf = pmf,
-        np_pmf_length = length(pmf),
         fixed = 1L
       ))
     }
@@ -1026,14 +1034,13 @@ dist_spec <- function(mean, sd = 0, mean_sd = 0, sd_sd = 0,
       n = 1,
       n_p = 1,
       n_np = 0,
-      np_pmf_max = 0,
       np_pmf = numeric(0),
-      np_pmf_length = integer(0),
       fixed = 0L
     )
   }
   ret <- purrr::map(ret, array)
-  sum_args <- grep("(^n$|^n_|_max$)", names(ret))
+  sum_args <- grep("(^n$|^n_$)", names(ret))
+  ret$np_pmf_length <- length(ret$np_pmf)
   ret[sum_args] <- purrr::map(ret[sum_args], sum)
   attr(ret, "class") <- c("list", "dist_spec")
   return(ret)
@@ -1089,9 +1096,8 @@ dist_spec_plus <- function(e1, e2, tolerance = 0.001) {
     delays$fixed <- c(1, rep(0, delays$n_p))
     delays$n_np <- 1
     delays$n <- delays$n_p + 1
-    delays$np_pmf_max <- length(delays$np_pmf)
-    delays$np_pmf_length <- length(delays$np_pmf)
   }
+  delays$np_pmf_length <- length(delays$np_pmf)
   return(delays)
 }
 
@@ -1151,7 +1157,7 @@ dist_spec_plus <- function(e1, e2, tolerance = 0.001) {
   delays <- purrr::transpose(delays)
   ## convert back to arrays
   delays <- purrr::map(delays, function(x) array(unlist(x)))
-  sum_args <- grep("(^n$|^n_|_max$)", names(delays))
+  sum_args <- grep("^n($|_)", names(delays))
   delays[sum_args] <- purrr::map(delays[sum_args], sum)
   attr(delays, "class") <- c("list", "dist_spec")
   return(delays)

diff --git a/R/estimate_secondary.R b/R/estimate_secondary.R
@@ -497,7 +497,7 @@ simulate_secondary <- function(data, type = "incidence", family = "poisson",
       list(i = index, m = meanlog, s = sdlog),
       function(i, m, s) {
         discretised_lognormal_pmf_conv(
-          scaled[max(1, i - delay_max):i],
+          scaled[max(1, i - delay_max - 1):i],
           meanlog = m, sdlog = s
         )
       }

diff --git a/R/get.R b/R/get.R
@@ -169,7 +169,7 @@ get_regional_results <- function(regional_output,
 #'
 #' @param source A character string indicating the source of interest.
 #'
-#' @param max_value Numeric, the maximum value to allow. Defaults to 15 days.
+#' @param max_value Numeric, the maximum value to allow. Defaults to 14 days.
 #'
 #' @param fixed Logical, defaults to `FALSE`. Should distributions be supplied
 #' as fixed values (vs with uncertainty)?
@@ -182,7 +182,7 @@ get_regional_results <- function(regional_output,
 #' get_dist(
 #'  EpiNow2::generation_times, disease = "SARS-CoV-2", source = "ganyani"
 #' )
-get_dist <- function(data, disease, source, max_value = 15, fixed = FALSE) {
+get_dist <- function(data, disease, source, max_value = 14, fixed = FALSE) {
   target_disease <- disease
   target_source <- source
   data <- data[disease == target_disease][source == target_source]
@@ -204,7 +204,7 @@ get_dist <- function(data, disease, source, max_value = 15, fixed = FALSE) {
 #' @author Sam Abbott
 #' @examples
 #' get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
-get_generation_time <- function(disease, source, max_value = 15,
+get_generation_time <- function(disease, source, max_value = 14,
                                 fixed = FALSE) {
   dist <- get_dist(EpiNow2::generation_times,
     disease = disease, source = source,
@@ -224,7 +224,7 @@ get_generation_time <- function(disease, source, max_value = 15,
 #' @export
 #' @examples
 #' get_incubation_period(disease = "SARS-CoV-2", source = "lauer")
-get_incubation_period <- function(disease, source, max_value = 15,
+get_incubation_period <- function(disease, source, max_value = 14,
                                   fixed = FALSE) {
   dist <- get_dist(EpiNow2::incubation_periods,
     disease = disease, source = source,
@@ -291,8 +291,8 @@ get_seeding_time <- function(delays, generation_time) {
   ## make sure we have at least (length of total gt pmf - 1) seeding time
   seeding_time <- max(
     seeding_time,
-    sum(generation_time$max) + sum(generation_time$np_pmf_max) -
-      length(generation_time$max) - length(generation_time$np_pmf_max)
+    sum(generation_time$max - 1) + sum(generation_time$np_pmf_length) -
+      length(generation_time$max) - length(generation_time$np_pmf_length)
   )
   return(seeding_time)
 }
diff --git a/R/opts.R b/R/opts.R
@@ -28,18 +28,18 @@
 #' generation_time_opts()
 #'
 #' # A fixed gamma distributed generation time
-#' generation_time_opts(dist_spec(mean = 3, sd = 2, max = 15))
+#' generation_time_opts(dist_spec(mean = 3, sd = 2, max = 14))
 #'
 #' # An uncertain gamma distributed generation time
 #' generation_time_opts(
-#'  dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 15)
+#'  dist_spec(mean = 3, sd = 2, mean_sd = 1, sd_sd = 0.5, max = 14)
 #' )
 #'
 #' # A generation time sourced from the literature
 #' dist <- get_generation_time(disease = "SARS-CoV-2", source = "ganyani")
 #' generation_time_opts(dist)
 generation_time_opts <- function(dist = dist_spec(mean = 1), ...,
-                                 disease, source, max = 15L, fixed = FALSE,
+                                 disease, source, max = 14, fixed = FALSE,
                                  prior_weight) {
   deprecated_options_given <- FALSE
   dot_options <- list(...)
@@ -50,7 +50,7 @@ generation_time_opts <- function(dist = dist_spec(mean = 1), ...,
   if (type_options > 1) {
     stop(
       "Generation time can be given either as distributional options ",
-      "or as disease/source, but not both."
+      "or as a combination of disease and source, but not both."
     )
   }
   if (length(dot_options) > 0) {
@@ -120,11 +120,11 @@ generation_time_opts <- function(dist = dist_spec(mean = 1), ...,
 #' delay_opts()
 #'
 #' # A single delay that has uncertainty
-#' delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 15)
+#' delay <- dist_spec(mean = 1, mean_sd = 0.2, sd = 0.5, sd_sd = 0.1, max = 14)
 #' delay_opts(delay)
 #'
 #' # A single delay without uncertainty
-#' delay <- dist_spec(mean = 1, sd = 0.5, max = 15)
+#' delay <- dist_spec(mean = 1, sd = 0.5, max = 14)
 #' delay_opts(delay)
 #'
 #' # Multiple delays (in this case twice the same)
@@ -158,7 +158,7 @@ delay_opts <- function(dist = dist_spec(), ..., fixed = FALSE) {
     )
   } else if (length(dot_options) > 0) {
     ## can be removed once dot options are hard deprecated
-    stop("Unknown named arguments passed to `delay_opts`" )
+    stop("Unknown named arguments passed to `delay_opts`")
   }
   return(dist)
 }

diff --git a/R/simulate_infections.R b/R/simulate_infections.R
@@ -56,7 +56,7 @@
 #' )
 #' reporting_delay <- dist_spec(
 #'   mean = convert_to_logmean(2, 1), mean_sd = 0.1,
-#'   sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 15
+#'   sd = convert_to_logsd(2, 1), sd_sd = 0.1, max = 14
 #' )
 #'
 #' # fit model to data to recover Rt estimates

diff --git a/R/utilities.R b/R/utilities.R
@@ -306,17 +306,19 @@ convert_to_logsd <- function(mean, sd) {
 }
 
 discretised_lognormal_pmf <- function(meanlog, sdlog, max_d, reverse = FALSE) {
-  pmf <- plnorm(1:max_d, meanlog, sdlog) -
-    plnorm(0:(max_d - 1), meanlog, sdlog)
-  pmf <- as.vector(pmf) / as.vector(plnorm(max_d, meanlog, sdlog))
+  pmf <- plnorm(1:(max_d + 1), meanlog, sdlog) -
+    plnorm(0:max_d, meanlog, sdlog)
+  pmf <- as.vector(pmf) / as.vector(plnorm(max_d + 1, meanlog, sdlog))
   if (reverse) {
     pmf <- rev(pmf)
   }
   return(pmf)
 }
 
 discretised_lognormal_pmf_conv <- function(x, meanlog, sdlog) {
-  pmf <- discretised_lognormal_pmf(meanlog, sdlog, length(x), reverse = TRUE)
+  pmf <- discretised_lognormal_pmf(
+    meanlog, sdlog, length(x) - 1, reverse = TRUE
+  )
   conv <- sum(x * pmf, na.rm = TRUE)
   return(conv)
 }
@@ -325,9 +327,10 @@ discretised_gamma_pmf <- function(mean, sd, max_d, zero_pad = 0,
                                   reverse = FALSE) {
   alpha <- exp(2 * (log(mean) - log(sd)))
   beta <- exp(log(mean) - 2 * log(sd))
-  pmf <- pgamma(1:max_d, shape = alpha, scale = beta) -
-    pgamma(0:(max_d - 1), shape = alpha, scale = beta)
-  pmf <- as.vector(pmf) / as.vector(pgamma(max_d, shape = alpha, scale = beta))
+  pmf <- pgamma(1:(max_d + 1), shape = alpha, scale = beta) -
+    pgamma(0:max_d, shape = alpha, scale = beta)
+  pmf <- as.vector(pmf) /
+    as.vector(pgamma(max_d + 1, shape = alpha, scale = beta))
   if (zero_pad > 0) {
     pmf <- c(rep(0, zero_pad), pmf)
   }

diff --git a/README.Rmd b/README.Rmd
@@ -100,7 +100,7 @@ For example if data on the delay between onset and infection was available we co
 ```{r, eval = FALSE}
 reporting_delay <- estimate_delay(
   rlnorm(1000, log(2), 1),
-  max_value = 15, bootstraps = 1
+  max_value = 14, bootstraps = 1
 )
 ```