From a61fb6153932ba6d5fe398da231c316856950492 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 13 Sep 2024 11:53:33 +0100 Subject: [PATCH 01/13] add first draft of stan model --- inst/pcens_model.stan | 104 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 inst/pcens_model.stan diff --git a/inst/pcens_model.stan b/inst/pcens_model.stan new file mode 100644 index 0000000..63a0da4 --- /dev/null +++ b/inst/pcens_model.stan @@ -0,0 +1,104 @@ +functions { + #include primary_censored_dist.stan + #include expgrowth.stan + + real partial_sum(array[] int y_slice, + int start, int end, + array[] int y_upper, array[] int n, + array[] int pwindow, array[] int D, + int dist_id, vector params, + int primary_dist_id, array[] real primary_params) { + real partial_target = 0; + for (i in start:end) { + partial_target += n[i] * primary_censored_dist_lpmf( + y_slice[i] | dist_id, to_array_1d(params), + pwindow[i], y_upper[i], D[i], + primary_dist_id, primary_params + ); + } + return partial_target; + } +} + +data { + int N; // number of observations + array[N] int y; // observed delays + array[N] int y_upper; // observed delays upper bound + array[N] int n; // number of occurrences for each delay + array[N] int pwindow; // primary censoring window + array[N] int D; // maximum delay + int dist_id; // distribution identifier + int primary_dist_id; // primary distribution identifier + int n_params; // number of distribution parameters + int n_primary_params; // number of primary distribution parameters + int compute_log_lik; // whether to compute log likelihood + int use_reduce_sum; // whether to use reduce_sum + vector[n_params] param_lower_bounds; // lower bounds for parameters + vector[n_params] param_upper_bounds; // upper bounds for parameters + vector[n_primary_params] primary_param_lower_bounds; // lower bounds for primary parameters + vector[n_primary_params] primary_param_upper_bounds; // upper bounds for primary parameters + array[n_params] real prior_location; // location parameters for priors + array[n_params] real prior_scale; // scale parameters for priors + array[n_primary_params] real primary_prior_location; // location parameters for primary priors + array[n_primary_params] real primary_prior_scale; // scale parameters for primary priors +} + +transformed data { + array[n_primary_params] real primary_params; + for (i in 1:n_primary_params) { + primary_params[i] = 0; // Initialize to 0, update in parameters block if needed + } +} + +parameters { + vector[n_params] params; // distribution parameters + vector[n_primary_params] primary_params_raw; // primary distribution parameters +} + +transformed parameters { + if (n_primary_params > 0) { + for (i in 1:n_primary_params) { + primary_params[i] = primary_params_raw[i]; + } + } +} + +model { + // Priors + for (i in 1:n_params) { + params[i] ~ normal(prior_location[i], prior_scale[i]); + } + if (n_primary_params > 0) { + for (i in 1:n_primary_params) { + primary_params_raw[i] ~ normal(primary_prior_location[i], primary_prior_scale[i]); + } + } + + // Likelihood + if (use_reduce_sum) { + target += reduce_sum(partial_sum, y, 1, + y_upper, n, pwindow, D, dist_id, params, + primary_dist_id, primary_params); + } else { + for (i in 1:N) { + target += n[i] * primary_censored_dist_lpmf( + y[i] | dist_id, to_array_1d(params), + pwindow[i], y_upper[i], D[i], + primary_dist_id, primary_params + ); + } + } +} + +generated quantities { + vector[compute_log_lik ? N : 0] log_lik; + if (compute_log_lik) { + for (i in 1:N) { + log_lik[i] = primary_censored_dist_lpmf( + y[i] | dist_id, to_array_1d(params), + pwindow[i], y_upper[i], D[i], + primary_dist_id, primary_params + ); + } + } +} From ebb2e5ed1ee01c66a09d92c5ee4ebc1ca2d29339 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 13 Sep 2024 12:07:08 +0100 Subject: [PATCH 02/13] add a function to compile the model --- NAMESPACE | 1 + R/pcd-stan-tools.R | 48 ++++++++++++++++++++++++++++++++++++++++ man/fitdistdoublecens.Rd | 4 ++++ man/pcd_cmdstan_model.Rd | 45 +++++++++++++++++++++++++++++++++++++ 4 files changed, 98 insertions(+) create mode 100644 man/pcd_cmdstan_model.Rd diff --git a/NAMESPACE b/NAMESPACE index 6c221e8..4cff75d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(dexpgrowth) export(dpcens) export(dprimarycensoreddist) export(fitdistdoublecens) +export(pcd_cmdstan_model) export(pcd_load_stan_functions) export(pcd_stan_files) export(pcd_stan_functions) diff --git a/R/pcd-stan-tools.R b/R/pcd-stan-tools.R index 996f10c..a682f80 100644 --- a/R/pcd-stan-tools.R +++ b/R/pcd-stan-tools.R @@ -227,3 +227,51 @@ pcd_load_stan_functions <- function( return(result) } +#' Create a CmdStanModel with primarycensoreddist Stan functions +#' +#' This function creates a CmdStanModel object using the Stan model and +#' functions from primarycensoreddist and optionally includes additional +#' user-specified Stan files. +#' +#' @param include_paths Character vector of paths to include for Stan +#' compilation. Defaults to the result of `pcd_stan_path()`. +#' +#' @param ... Additional arguments passed to cmdstanr::cmdstan_model(). +#' +#' @return A CmdStanModel object. +#' +#' @details +#' The underlying Stan model (`pcens_model.stan`) supports various features: +#' - Multiple probability distributions for modeling delays +#' - Primary and secondary censoring +#' - Truncation +#' - Optional use of reduce_sum for improved performance (via +#' within chain parallelism). +#' - Flexible prior specifications +#' - Optional computation of log-likelihood for model comparison +#' +#' @export +#' @family modelhelpers +#' +#' @examples +#' \dontrun{ +#' model <- pcd_cmdstan_model() +#' fit <- model$sample(data = stan_data) +#' } +pcd_cmdstan_model <- function( + include_paths = primarycensoreddist::pcd_stan_path(), ...) { + if (!requireNamespace("cmdstanr", quietly = TRUE)) { + stop("Package 'cmdstanr' is required but not installed for this function.") + } + + pcd_stan_model <- system.file( + "pcens_model.stan", + package = "primarycensoreddist" + ) + + cmdstanr::cmdstan_model( + pcd_stan_model, + include_paths = include_paths, + ... + ) +} diff --git a/man/fitdistdoublecens.Rd b/man/fitdistdoublecens.Rd index b503e4d..84cce41 100644 --- a/man/fitdistdoublecens.Rd +++ b/man/fitdistdoublecens.Rd @@ -84,4 +84,8 @@ fit_norm <- fitdistdoublecens( summary(fit_norm) } +\seealso{ +Modelling wrappers for external fitting packages +\code{\link{pcd_cmdstan_model}()} +} \concept{modelhelpers} diff --git a/man/pcd_cmdstan_model.Rd b/man/pcd_cmdstan_model.Rd new file mode 100644 index 0000000..41ad79b --- /dev/null +++ b/man/pcd_cmdstan_model.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcd-stan-tools.R +\name{pcd_cmdstan_model} +\alias{pcd_cmdstan_model} +\title{Create a CmdStanModel with primarycensoreddist Stan functions} +\usage{ +pcd_cmdstan_model(include_paths = primarycensoreddist::pcd_stan_path(), ...) +} +\arguments{ +\item{include_paths}{Character vector of paths to include for Stan +compilation. Defaults to the result of \code{pcd_stan_path()}.} + +\item{...}{Additional arguments passed to cmdstanr::cmdstan_model().} +} +\value{ +A CmdStanModel object. +} +\description{ +This function creates a CmdStanModel object using the Stan model and +functions from primarycensoreddist and optionally includes additional +user-specified Stan files. +} +\details{ +The underlying Stan model (\code{pcens_model.stan}) supports various features: +\itemize{ +\item Multiple probability distributions for modeling delays +\item Primary and secondary censoring +\item Truncation +\item Optional use of reduce_sum for improved performance (via +within chain parallelism). +\item Flexible prior specifications +\item Optional computation of log-likelihood for model comparison +} +} +\examples{ +\dontrun{ +model <- pcd_cmdstan_model() +fit <- model$sample(data = stan_data) +} +} +\seealso{ +Modelling wrappers for external fitting packages +\code{\link{fitdistdoublecens}()} +} +\concept{modelhelpers} From 85a3dffed759ec93b38a8808d37070cd8e618230 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 13 Sep 2024 23:36:15 +0100 Subject: [PATCH 03/13] model compiling --- .Rbuildignore | 1 + .gitignore | 1 + inst/pcens_model.stan | 29 +++++++++-------------------- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 74dc628..37d7371 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -24,3 +24,4 @@ ^CITATION\.cff$ ^vignettes/using-stan-tools\.Rmd$ ^vignettes/fitting-dists-with-stan\.Rmd$ +^inst/pcens_model diff --git a/.gitignore b/.gitignore index 6c63fa9..fa2ccbc 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ cache .here *.pdf *.html +inst/pcens_model diff --git a/inst/pcens_model.stan b/inst/pcens_model.stan index 63a0da4..ca5a9d1 100644 --- a/inst/pcens_model.stan +++ b/inst/pcens_model.stan @@ -43,34 +43,23 @@ data { array[n_primary_params] real primary_prior_scale; // scale parameters for primary priors } -transformed data { - array[n_primary_params] real primary_params; - for (i in 1:n_primary_params) { - primary_params[i] = 0; // Initialize to 0, update in parameters block if needed - } -} - parameters { vector[n_params] params; // distribution parameters - vector[n_primary_params] primary_params_raw; // primary distribution parameters + vector[n_primary_params] primary_params; // primary distribution parameters } -transformed parameters { - if (n_primary_params > 0) { - for (i in 1:n_primary_params) { - primary_params[i] = primary_params_raw[i]; - } - } -} model { // Priors for (i in 1:n_params) { params[i] ~ normal(prior_location[i], prior_scale[i]); } - if (n_primary_params > 0) { + if (n_primary_params) { for (i in 1:n_primary_params) { - primary_params_raw[i] ~ normal(primary_prior_location[i], primary_prior_scale[i]); + primary_params[i] ~ normal( + primary_prior_location[i], + primary_prior_scale[i] + ); } } @@ -78,13 +67,13 @@ model { if (use_reduce_sum) { target += reduce_sum(partial_sum, y, 1, y_upper, n, pwindow, D, dist_id, params, - primary_dist_id, primary_params); + primary_dist_id, to_array_1d(primary_params)); } else { for (i in 1:N) { target += n[i] * primary_censored_dist_lpmf( y[i] | dist_id, to_array_1d(params), pwindow[i], y_upper[i], D[i], - primary_dist_id, primary_params + primary_dist_id, to_array_1d(primary_params) ); } } @@ -97,7 +86,7 @@ generated quantities { log_lik[i] = primary_censored_dist_lpmf( y[i] | dist_id, to_array_1d(params), pwindow[i], y_upper[i], D[i], - primary_dist_id, primary_params + primary_dist_id, to_array_1d(primary_params) ); } } From bc351eaec681a92596746c448d73a44795863d4e Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Sep 2024 00:56:07 +0100 Subject: [PATCH 04/13] add data helper --- NAMESPACE | 1 + R/pcd-stan-tools.R | 48 ----------- R/pcd_cmdstan_model.R | 169 +++++++++++++++++++++++++++++++++++++ inst/pcens_model.stan | 24 +++--- man/fitdistdoublecens.Rd | 1 + man/pcd_as_cmdstan_data.Rd | 99 ++++++++++++++++++++++ man/pcd_cmdstan_model.Rd | 5 +- 7 files changed, 285 insertions(+), 62 deletions(-) create mode 100644 R/pcd_cmdstan_model.R create mode 100644 man/pcd_as_cmdstan_data.Rd diff --git a/NAMESPACE b/NAMESPACE index 4cff75d..c4a64e9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(dexpgrowth) export(dpcens) export(dprimarycensoreddist) export(fitdistdoublecens) +export(pcd_as_cmdstan_data) export(pcd_cmdstan_model) export(pcd_load_stan_functions) export(pcd_stan_files) diff --git a/R/pcd-stan-tools.R b/R/pcd-stan-tools.R index a682f80..996f10c 100644 --- a/R/pcd-stan-tools.R +++ b/R/pcd-stan-tools.R @@ -227,51 +227,3 @@ pcd_load_stan_functions <- function( return(result) } -#' Create a CmdStanModel with primarycensoreddist Stan functions -#' -#' This function creates a CmdStanModel object using the Stan model and -#' functions from primarycensoreddist and optionally includes additional -#' user-specified Stan files. -#' -#' @param include_paths Character vector of paths to include for Stan -#' compilation. Defaults to the result of `pcd_stan_path()`. -#' -#' @param ... Additional arguments passed to cmdstanr::cmdstan_model(). -#' -#' @return A CmdStanModel object. -#' -#' @details -#' The underlying Stan model (`pcens_model.stan`) supports various features: -#' - Multiple probability distributions for modeling delays -#' - Primary and secondary censoring -#' - Truncation -#' - Optional use of reduce_sum for improved performance (via -#' within chain parallelism). -#' - Flexible prior specifications -#' - Optional computation of log-likelihood for model comparison -#' -#' @export -#' @family modelhelpers -#' -#' @examples -#' \dontrun{ -#' model <- pcd_cmdstan_model() -#' fit <- model$sample(data = stan_data) -#' } -pcd_cmdstan_model <- function( - include_paths = primarycensoreddist::pcd_stan_path(), ...) { - if (!requireNamespace("cmdstanr", quietly = TRUE)) { - stop("Package 'cmdstanr' is required but not installed for this function.") - } - - pcd_stan_model <- system.file( - "pcens_model.stan", - package = "primarycensoreddist" - ) - - cmdstanr::cmdstan_model( - pcd_stan_model, - include_paths = include_paths, - ... - ) -} diff --git a/R/pcd_cmdstan_model.R b/R/pcd_cmdstan_model.R new file mode 100644 index 0000000..ca96392 --- /dev/null +++ b/R/pcd_cmdstan_model.R @@ -0,0 +1,169 @@ +#' Create a CmdStanModel with primarycensoreddist Stan functions +#' +#' This function creates a CmdStanModel object using the Stan model and +#' functions from primarycensoreddist and optionally includes additional +#' user-specified Stan files. +#' +#' @param include_paths Character vector of paths to include for Stan +#' compilation. Defaults to the result of `pcd_stan_path()`. +#' +#' @param ... Additional arguments passed to cmdstanr::cmdstan_model(). +#' +#' @return A CmdStanModel object. +#' +#' @details +#' The underlying Stan model (`pcens_model.stan`) supports various features: +#' - Multiple probability distributions for modeling delays +#' - Primary and secondary censoring +#' - Truncation +#' - Optional use of reduce_sum for improved performance (via +#' within chain parallelism). +#' - Flexible prior specifications +#' - Optional computation of log-likelihood for model comparison +#' +#' @export +#' @family modelhelpers +#' +#' @examples +#' \dontrun{ +#' model <- pcd_cmdstan_model() +#' fit <- model$sample(data = stan_data) +#' } +pcd_cmdstan_model <- function( + include_paths = primarycensoreddist::pcd_stan_path(), ...) { + if (!requireNamespace("cmdstanr", quietly = TRUE)) { + stop("Package 'cmdstanr' is required but not installed for this function.") + } + + pcd_stan_model <- system.file( + "pcens_model.stan", + package = "primarycensoreddist" + ) + + cmdstanr::cmdstan_model( + pcd_stan_model, + include_paths = include_paths, + ... + ) +} + +#' Prepare data for primarycensoreddist Stan model +#' +#' This function takes in delay data and prepares it for use with the +#' primarycensoreddist Stan model. +#' +#' @param data A data frame containing the delay data. +#' +#' @param delay Column name for observed delays (default: "delay") +#' +#' @param delay_upper Column name for upper bound of delays +#' (default: "delay_upper") +#' +#' @param n Column name for count of observations (default: "n") +#' +#' @param pwindow Column name for primary window (default: "pwindow") +#' +#' @param relative_obs_time Column name for relative observation time +#' (default: "relative_obs_time") +#' +#' @param dist_id Integer identifying the delay distribution: +#' 1 = Lognormal, 2 = Gamma, 3 = Weibull, 4 = Exponential, +#' 5 = Generalized Gamma, 6 = Negative Binomial, 7 = Poisson, +#' 8 = Bernoulli, 9 = Beta, 10 = Binomial, 11 = Categorical, 12 = Cauchy, +#' 13 = Chi-square, 14 = Dirichlet, 15 = Gumbel, 16 = Inverse Gamma, +#' 17 = Logistic +#' +#' @param primary_dist_id Integer identifying the primary distribution: +#' 1 = Uniform, 2 = Exponential growth +#' +#' @param param_bounds A list with elements `lower` and `upper`, each a numeric +#' vector specifying bounds for the delay distribution parameters. +#' +#' @param primary_param_bounds A list with elements `lower` and `upper`, each a +#' numeric vector specifying bounds for the primary distribution parameters. +#' +#' @param priors A list with elements `location` and `scale`, each a numeric +#' vector specifying priors for the delay distribution parameters. +#' +#' @param primary_priors A list with elements `location` and `scale`, each a +#' numeric vector specifying priors for the primary distribution parameters. +#' +#' @param compute_log_lik Logical; compute log likelihood? (default: FALSE) +#' +#' @param use_reduce_sum Logical; use reduce_sum for performance? +#' (default: FALSE) +#' +#' @return A list containing the data formatted for use with +#' [pcd_cmdstan_model()] +#' +#' @export +#' @family modelhelpers +#' +#' @examples +#' \dontrun{ +#' data <- data.frame( +#' delay = c(1, 2, 3), +#' delay_upper = c(2, 3, 4), +#' n = c(10, 20, 15), +#' pwindow = c(1, 1, 2), +#' relative_obs_time = c(10, 10, 10) +#' ) +#' stan_data <- pcd_as_cmdstan_data( +#' data, +#' dist_id = 1, +#' primary_dist_id = 1, +#' param_bounds = list(lower = c(0, 0), upper = c(10, 10)), +#' primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), +#' priors = list(location = c(1, 1), scale = c(1, 1)), +#' primary_priors = list(location = numeric(0), scale = numeric(0)) +#' ) +#' } +pcd_as_cmdstan_data <- function( + data, delay = "delay", delay_upper = "delay_upper", + n = "n", pwindow = "pwindow", + relative_obs_time = "relative_obs_time", + dist_id, primary_dist_id, + param_bounds, primary_param_bounds, + priors, primary_priors, + compute_log_lik = FALSE, + use_reduce_sum = FALSE) { + required_cols <- c(delay, delay_upper, n, pwindow, relative_obs_time) + missing_cols <- setdiff(required_cols, names(data)) + if (length(missing_cols) > 0) { + stop( + "Missing required columns: ", paste(missing_cols, collapse = ", "), "\n", + "Please ensure your data frame contains these columns or set the", + "corresponding arguments:\n", + "delay = '", delay, "'\n", + "delay_upper = '", delay_upper, "'\n", + "n = '", n, "'\n", + "pwindow = '", pwindow, "'\n", + "relative_obs_time = '", relative_obs_time, "'" + ) + } + + stan_data <- list( + N = nrow(data), + d = data[[delay]], + d_upper = data[[delay_upper]], + n = data[[n]], + pwindow = data[[pwindow]], + D = data[[relative_obs_time]], + dist_id = dist_id, + primary_dist_id = primary_dist_id, + n_params = length(param_bounds$lower), + n_primary_params = length(primary_param_bounds$lower), + compute_log_lik = as.integer(compute_log_lik), + use_reduce_sum = as.integer(use_reduce_sum), + param_lower_bounds = param_bounds$lower, + param_upper_bounds = param_bounds$upper, + primary_param_lower_bounds = primary_param_bounds$lower, + primary_param_upper_bounds = primary_param_bounds$upper, + prior_location = priors$location, + prior_scale = priors$scale, + primary_prior_location = primary_priors$location, + primary_prior_scale = primary_priors$scale + ) + + return(stan_data) +} diff --git a/inst/pcens_model.stan b/inst/pcens_model.stan index ca5a9d1..5f694fe 100644 --- a/inst/pcens_model.stan +++ b/inst/pcens_model.stan @@ -2,17 +2,17 @@ functions { #include primary_censored_dist.stan #include expgrowth.stan - real partial_sum(array[] int y_slice, + real partial_sum(array[] int d_slice, int start, int end, - array[] int y_upper, array[] int n, + array[] int d_upper, array[] int n, array[] int pwindow, array[] int D, int dist_id, vector params, int primary_dist_id, array[] real primary_params) { real partial_target = 0; for (i in start:end) { partial_target += n[i] * primary_censored_dist_lpmf( - y_slice[i] | dist_id, to_array_1d(params), - pwindow[i], y_upper[i], D[i], + d_slice[i] | dist_id, to_array_1d(params), + pwindow[i], d_upper[i], D[i], primary_dist_id, primary_params ); } @@ -22,8 +22,8 @@ functions { data { int N; // number of observations - array[N] int y; // observed delays - array[N] int y_upper; // observed delays upper bound + array[N] int d; // observed delays + array[N] int d_upper; // observed delays upper bound array[N] int n; // number of occurrences for each delay array[N] int pwindow; // primary censoring window array[N] int D; // maximum delay @@ -65,14 +65,14 @@ model { // Likelihood if (use_reduce_sum) { - target += reduce_sum(partial_sum, y, 1, - y_upper, n, pwindow, D, dist_id, params, + target += reduce_sum(partial_sum, d, 1, + d_upper, n, pwindow, D, dist_id, params, primary_dist_id, to_array_1d(primary_params)); } else { for (i in 1:N) { target += n[i] * primary_censored_dist_lpmf( - y[i] | dist_id, to_array_1d(params), - pwindow[i], y_upper[i], D[i], + d[i] | dist_id, to_array_1d(params), + pwindow[i], d_upper[i], D[i], primary_dist_id, to_array_1d(primary_params) ); } @@ -84,8 +84,8 @@ generated quantities { if (compute_log_lik) { for (i in 1:N) { log_lik[i] = primary_censored_dist_lpmf( - y[i] | dist_id, to_array_1d(params), - pwindow[i], y_upper[i], D[i], + d[i] | dist_id, to_array_1d(params), + pwindow[i], d_upper[i], D[i], primary_dist_id, to_array_1d(primary_params) ); } diff --git a/man/fitdistdoublecens.Rd b/man/fitdistdoublecens.Rd index 84cce41..3bd4191 100644 --- a/man/fitdistdoublecens.Rd +++ b/man/fitdistdoublecens.Rd @@ -86,6 +86,7 @@ summary(fit_norm) } \seealso{ Modelling wrappers for external fitting packages +\code{\link{pcd_as_cmdstan_data}()}, \code{\link{pcd_cmdstan_model}()} } \concept{modelhelpers} diff --git a/man/pcd_as_cmdstan_data.Rd b/man/pcd_as_cmdstan_data.Rd new file mode 100644 index 0000000..4d70e71 --- /dev/null +++ b/man/pcd_as_cmdstan_data.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcd_cmdstan_model.R +\name{pcd_as_cmdstan_data} +\alias{pcd_as_cmdstan_data} +\title{Prepare data for primarycensoreddist Stan model} +\usage{ +pcd_as_cmdstan_data( + data, + delay = "delay", + delay_upper = "delay_upper", + n = "n", + pwindow = "pwindow", + relative_obs_time = "relative_obs_time", + dist_id, + primary_dist_id, + param_bounds, + primary_param_bounds, + priors, + primary_priors, + compute_log_lik = FALSE, + use_reduce_sum = FALSE +) +} +\arguments{ +\item{data}{A data frame containing the delay data.} + +\item{delay}{Column name for observed delays (default: "delay")} + +\item{delay_upper}{Column name for upper bound of delays +(default: "delay_upper")} + +\item{n}{Column name for count of observations (default: "n")} + +\item{pwindow}{Column name for primary window (default: "pwindow")} + +\item{relative_obs_time}{Column name for relative observation time +(default: "relative_obs_time")} + +\item{dist_id}{Integer identifying the delay distribution: +1 = Lognormal, 2 = Gamma, 3 = Weibull, 4 = Exponential, +5 = Generalized Gamma, 6 = Negative Binomial, 7 = Poisson, +8 = Bernoulli, 9 = Beta, 10 = Binomial, 11 = Categorical, 12 = Cauchy, +13 = Chi-square, 14 = Dirichlet, 15 = Gumbel, 16 = Inverse Gamma, +17 = Logistic} + +\item{primary_dist_id}{Integer identifying the primary distribution: +1 = Uniform, 2 = Exponential growth} + +\item{param_bounds}{A list with elements \code{lower} and \code{upper}, each a numeric +vector specifying bounds for the delay distribution parameters.} + +\item{primary_param_bounds}{A list with elements \code{lower} and \code{upper}, each a +numeric vector specifying bounds for the primary distribution parameters.} + +\item{priors}{A list with elements \code{location} and \code{scale}, each a numeric +vector specifying priors for the delay distribution parameters.} + +\item{primary_priors}{A list with elements \code{location} and \code{scale}, each a +numeric vector specifying priors for the primary distribution parameters.} + +\item{compute_log_lik}{Logical; compute log likelihood? (default: FALSE)} + +\item{use_reduce_sum}{Logical; use reduce_sum for performance? +(default: FALSE)} +} +\value{ +A list containing the data formatted for use with +\code{\link[=pcd_cmdstan_model]{pcd_cmdstan_model()}} +} +\description{ +This function takes in delay data and prepares it for use with the +primarycensoreddist Stan model. +} +\examples{ +\dontrun{ +data <- data.frame( + delay = c(1, 2, 3), + delay_upper = c(2, 3, 4), + n = c(10, 20, 15), + pwindow = c(1, 1, 2), + relative_obs_time = c(10, 10, 10) +) +stan_data <- pcd_as_cmdstan_data( + data, + dist_id = 1, + primary_dist_id = 1, + param_bounds = list(lower = c(0, 0), upper = c(10, 10)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(1, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) +) +} +} +\seealso{ +Modelling wrappers for external fitting packages +\code{\link{fitdistdoublecens}()}, +\code{\link{pcd_cmdstan_model}()} +} +\concept{modelhelpers} diff --git a/man/pcd_cmdstan_model.Rd b/man/pcd_cmdstan_model.Rd index 41ad79b..c77aa57 100644 --- a/man/pcd_cmdstan_model.Rd +++ b/man/pcd_cmdstan_model.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pcd-stan-tools.R +% Please edit documentation in R/pcd_cmdstan_model.R \name{pcd_cmdstan_model} \alias{pcd_cmdstan_model} \title{Create a CmdStanModel with primarycensoreddist Stan functions} @@ -40,6 +40,7 @@ fit <- model$sample(data = stan_data) } \seealso{ Modelling wrappers for external fitting packages -\code{\link{fitdistdoublecens}()} +\code{\link{fitdistdoublecens}()}, +\code{\link{pcd_as_cmdstan_data}()} } \concept{modelhelpers} From 041568e7865d11796ceeeea2ba6811950a401a03 Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Sep 2024 01:21:29 +0100 Subject: [PATCH 05/13] add model to cmdstan vignette --- R/pcd_cmdstan_model.R | 4 +-- vignettes/fitting-dists-with-stan.Rmd | 41 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/R/pcd_cmdstan_model.R b/R/pcd_cmdstan_model.R index ca96392..7245f33 100644 --- a/R/pcd_cmdstan_model.R +++ b/R/pcd_cmdstan_model.R @@ -131,9 +131,9 @@ pcd_as_cmdstan_data <- function( missing_cols <- setdiff(required_cols, names(data)) if (length(missing_cols) > 0) { stop( - "Missing required columns: ", paste(missing_cols, collapse = ", "), "\n", + "Missing required columns: ", toString(missing_cols), "\n", "Please ensure your data frame contains these columns or set the", - "corresponding arguments:\n", + " corresponding arguments:\n", "delay = '", delay, "'\n", "delay_upper = '", delay_upper, "'\n", "n = '", n, "'\n", diff --git a/vignettes/fitting-dists-with-stan.Rmd b/vignettes/fitting-dists-with-stan.Rmd index eeea97b..80c7dac 100644 --- a/vignettes/fitting-dists-with-stan.Rmd +++ b/vignettes/fitting-dists-with-stan.Rmd @@ -272,3 +272,44 @@ primarycensoreddist_fit ``` We see that the model has converged and the diagnostics look good. We also see that the posterior means are very near the true parameters and the 90% credible intervals include the true parameters. + + +# Using `pcd_cmdstan_model()` for a more efficient approach + +While the previous approach works well, `primarycensoreddist` provides a more efficient and convenient model which we can compile using `pcd_cmdstan_model()`. This approach not only saves time in model specification but also leverages within chain parallelisation to make best use of your machine's resources. Alongside this we also supply a convenience function `pcd_as_cmdstan_data()` to convert our data into a format that can be used to fit the model and supply priors, bounds, and other settings. + +Let's use this function to fit our data: + +```{r pcd-cmdstan-model, message = FALSE} +# Compile the model with multithreading support +pcd_model <- pcd_cmdstan_model(cpp_options = list(stan_threads = TRUE)) + +pcd_data <- pcd_as_cmdstan_data( + delay_counts, + delay = "observed_delay", + delay_upper = "observed_delay_upper", + relative_obs_time = "obs_time", + dist_id = 1, # Lognormal distribution + primary_dist_id = 1, # Uniform primary distribution + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(1, 0.5), scale = c(1, 0.5)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + use_reduce_sum = TRUE # use within chain parallelisation +) + +pcd_fit <- pcd_model$sample( + data = pcd_data, + chains = 4, + parallel_chains = 2, # Run 2 chains in parallel + threads_per_chain = 2, # Use 2 cores per chain + refresh = ifelse(interactive(), 50, 0), + show_messages = interactive() +) + +pcd_fit +``` + +In this model we have a generic `params` vector that contains the parameters for the delay distribution. In this case these are `mu` and `sigma` from the last example. We also have a `primary_params` vector that contains the parameters for the primary distribution. In this case this is empty as we are using a uniform distribution. + +We see again that the model has converged and the diagnostics look good. We also see that the posterior means are very near the true parameters and the 90% credible intervals include the true parameters as with the manually written model. **Note that we have set `parallel_chains = 2` and `threads_per_chain = 2` to demonstrate within chain parallelisation. Usually however you would want to use `parallel_chains = ` and then use the remainder of the available cores for within chain parallelisation by setting `threads_per_chain = ` to ensure that you make best use of your machine's resources.** From 709ae00fd068eb579cca56f7b9d63d1599df5dbb Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Sep 2024 01:38:33 +0100 Subject: [PATCH 06/13] check reduce_sum usage --- inst/pcens_model.stan | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/inst/pcens_model.stan b/inst/pcens_model.stan index 5f694fe..49f71fe 100644 --- a/inst/pcens_model.stan +++ b/inst/pcens_model.stan @@ -2,16 +2,15 @@ functions { #include primary_censored_dist.stan #include expgrowth.stan - real partial_sum(array[] int d_slice, - int start, int end, - array[] int d_upper, array[] int n, + real partial_sum(array[] int dummy, int start, int end, + array[] int d, array[] int d_upper, array[] int n, array[] int pwindow, array[] int D, - int dist_id, vector params, + int dist_id, array[] real params, int primary_dist_id, array[] real primary_params) { real partial_target = 0; for (i in start:end) { partial_target += n[i] * primary_censored_dist_lpmf( - d_slice[i] | dist_id, to_array_1d(params), + d[i] | dist_id, params, pwindow[i], d_upper[i], D[i], primary_dist_id, primary_params ); @@ -43,6 +42,10 @@ data { array[n_primary_params] real primary_prior_scale; // scale parameters for primary priors } +transformed data { + array[N] int indexes = linspaced_int_array(N, 1, N); +} + parameters { vector[n_params] params; // distribution parameters vector[n_primary_params] primary_params; // primary distribution parameters @@ -65,8 +68,8 @@ model { // Likelihood if (use_reduce_sum) { - target += reduce_sum(partial_sum, d, 1, - d_upper, n, pwindow, D, dist_id, params, + target += reduce_sum(partial_sum, indexes, 1, d, + d_upper, n, pwindow, D, dist_id, to_array_1d(params), primary_dist_id, to_array_1d(primary_params)); } else { for (i in 1:N) { From 3043b3f7dbf8f81165cf29465fde1175c0bfb8c5 Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Sep 2024 01:48:10 +0100 Subject: [PATCH 07/13] update spelling --- inst/WORDLIST | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inst/WORDLIST b/inst/WORDLIST index cce1da4..6243ff7 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,17 +1,22 @@ CMD Charniga CmdStan +CmdStanModel Codecov +Dirichlet Github +Gumbel Inf Lifecycle PMF PMFs +Poisson SamuelBrand athowes cdot cens cmdstan +cmdstanr dp dprimary dprimarycensoreddist @@ -25,6 +30,7 @@ leq lfloor modelhelpers modeling +obs pcd pdist pprimarycensoreddist From 239933e0b85b7ba222e2b2ca8ab05f6bb479b858 Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 14 Sep 2024 01:49:54 +0100 Subject: [PATCH 08/13] update news --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 76dfcb4..09e1ec3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,9 @@ * Added a new function `fitdistdoublecens()` to allow for fitting of double censored and truncated data using the `fitdistrplus` package. * Added low level tests for the Stan `primary_censored_ode` function. * Rephrased the stan code to use a ODE solver rather than a numerical integration method. This allows for much faster and more stable computation of the likelihood +* Added a `CmdStan` model for fitting distributions using the `cmdstanr` package. +* Added helpers functions for working with the new `CmdStan` model and added an example to the vignette. +* Added parameter recovery tests for the new `CmdStan` model which tests the `primary_censored_dist_lpmf` function when used with NUTS based fitting. # primarycensoreddist 0.3.0 From 9d282113d45bf755fe1a0d9b5ee88582383edae3 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 16 Sep 2024 11:41:58 +0100 Subject: [PATCH 09/13] update build ignore to help find the function --- .Rbuildignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.Rbuildignore b/.Rbuildignore index 37d7371..c8dc64f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -24,4 +24,4 @@ ^CITATION\.cff$ ^vignettes/using-stan-tools\.Rmd$ ^vignettes/fitting-dists-with-stan\.Rmd$ -^inst/pcens_model +^inst/pcens_model$ From c55dfb55acf410b659992587d42fbf74fa6db552 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 16 Sep 2024 11:57:57 +0100 Subject: [PATCH 10/13] linting --- tests/testthat/test-pcd_as_cmdstan_data.R | 129 ++++++++++ tests/testthat/test-pcd_cmdstan_model.R | 300 ++++++++++++++++++++++ 2 files changed, 429 insertions(+) create mode 100644 tests/testthat/test-pcd_as_cmdstan_data.R create mode 100644 tests/testthat/test-pcd_cmdstan_model.R diff --git a/tests/testthat/test-pcd_as_cmdstan_data.R b/tests/testthat/test-pcd_as_cmdstan_data.R new file mode 100644 index 0000000..fe84f66 --- /dev/null +++ b/tests/testthat/test-pcd_as_cmdstan_data.R @@ -0,0 +1,129 @@ +test_that("pcd_as_cmdstan_data correctly formats data", { + data <- data.frame( + delay = c(1, 2, 3), + delay_upper = c(2, 3, 4), + n = c(10, 20, 15), + pwindow = c(1, 1, 2), + relative_obs_time = c(10, 10, 10) + ) + + dist_id <- 1 + primary_dist_id <- 1 + param_bounds <- list(lower = c(0, 0), upper = c(10, 10)) + primary_param_bounds <- list(lower = numeric(0), upper = numeric(0)) + priors <- list(location = c(1, 1), scale = c(1, 1)) + primary_priors <- list(location = numeric(0), scale = numeric(0)) + + result <- pcd_as_cmdstan_data( + data, + dist_id = dist_id, + primary_dist_id = primary_dist_id, + param_bounds = param_bounds, + primary_param_bounds = primary_param_bounds, + priors = priors, + primary_priors = primary_priors + ) + + expect_type(result, "list") + expect_identical(result$N, nrow(data)) + expect_identical(result$d, data$delay) + expect_identical(result$d_upper, data$delay_upper) + expect_identical(result$n, data$n) + expect_identical(result$pwindow, data$pwindow) + expect_identical(result$D, data$relative_obs_time) + expect_identical(result$dist_id, dist_id) + expect_identical(result$primary_dist_id, primary_dist_id) + expect_identical(result$n_params, length(param_bounds$lower)) + expect_identical(result$n_primary_params, length(primary_param_bounds$lower)) + expect_identical(result$compute_log_lik, 0L) + expect_identical(result$use_reduce_sum, 0L) + expect_identical(result$param_lower_bounds, param_bounds$lower) + expect_identical(result$param_upper_bounds, param_bounds$upper) + expect_identical( + result$primary_param_lower_bounds, primary_param_bounds$lower + ) + expect_identical( + result$primary_param_upper_bounds, primary_param_bounds$upper + ) + expect_identical(result$prior_location, priors$location) + expect_identical(result$prior_scale, priors$scale) + expect_identical(result$primary_prior_location, primary_priors$location) + expect_identical(result$primary_prior_scale, primary_priors$scale) +}) + +test_that("pcd_as_cmdstan_data handles missing columns correctly", { + data <- data.frame( + delay = c(1, 2, 3), + n = c(10, 20, 15), + pwindow = c(1, 1, 2) + ) + + expect_error( + pcd_as_cmdstan_data( + data, + dist_id = 1, + primary_dist_id = 1, + param_bounds = list(lower = c(0, 0), upper = c(10, 10)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(1, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ), + "Missing required columns: delay_upper, relative_obs_time" + ) +}) + +test_that("pcd_as_cmdstan_data handles optional parameters correctly", { + data <- data.frame( + delay = c(1, 2, 3), + delay_upper = c(2, 3, 4), + n = c(10, 20, 15), + pwindow = c(1, 1, 2), + relative_obs_time = c(10, 10, 10) + ) + + result <- pcd_as_cmdstan_data( + data, + dist_id = 1, + primary_dist_id = 1, + param_bounds = list(lower = c(0, 0), upper = c(10, 10)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(1, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + compute_log_lik = TRUE, + use_reduce_sum = TRUE + ) + + expect_identical(result$compute_log_lik, 1L) + expect_identical(result$use_reduce_sum, 1L) +}) + +test_that("pcd_as_cmdstan_data handles custom column names correctly", { + data <- data.frame( + obs_delay = c(1, 2, 3), + obs_delay_upper = c(2, 3, 4), + count = c(10, 20, 15), + primary_window = c(1, 1, 2), + obs_time = c(10, 10, 10) + ) + + result <- pcd_as_cmdstan_data( + data, + delay = "obs_delay", + delay_upper = "obs_delay_upper", + n = "count", + pwindow = "primary_window", + relative_obs_time = "obs_time", + dist_id = 1, + primary_dist_id = 1, + param_bounds = list(lower = c(0, 0), upper = c(10, 10)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(1, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ) + + expect_identical(result$d, data$obs_delay) + expect_identical(result$d_upper, data$obs_delay_upper) + expect_identical(result$n, data$count) + expect_identical(result$pwindow, data$primary_window) + expect_identical(result$D, data$obs_time) +}) diff --git a/tests/testthat/test-pcd_cmdstan_model.R b/tests/testthat/test-pcd_cmdstan_model.R new file mode 100644 index 0000000..c98abcb --- /dev/null +++ b/tests/testthat/test-pcd_cmdstan_model.R @@ -0,0 +1,300 @@ +skip_on_cran() +if (on_ci()) { + skip_on_os("windows") + skip_on_os("mac") +} +skip_if_not_installed("cmdstanr") + +test_that("pcd_cmdstan_model creates a valid CmdStanModel object", { + model <- pcd_cmdstan_model() + expect_s3_class(model, "CmdStanModel") +}) + +test_that("pcd_cmdstan_model handles custom include paths", { + custom_path <- tempdir() + model <- pcd_cmdstan_model(include_paths = custom_path) + expect_true(custom_path %in% model$include_paths()) +}) + +test_that("pcd_cmdstan_model recovers true values for simple lognormal data", { + # Simulate data + set.seed(123) + n <- 2000 + true_meanlog <- 1.5 + true_sdlog <- 0.5 + + simulated_delays <- rprimarycensoreddist( + n = n, + rdist = rlnorm, + meanlog = true_meanlog, + sdlog = true_sdlog, + pwindow = 1, + D = 10 + ) + + simulated_data <- data.frame( + delay = simulated_delays, + delay_upper = simulated_delays + 1, + n = 1, + pwindow = 1, + relative_obs_time = 10 + ) + + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ) + + # Fit model + model <- pcd_cmdstan_model() + fit <- model$sample( + data = stan_data, + seed = 123, + chains = 4, + parallel_chains = 4, + refresh = 0 + ) + + # Extract posterior + posterior <- fit$draws(c("params[1]", "params[2]"), format = "df") + + # Check mean estimates + expect_equal(mean(posterior$`params[1]`), true_meanlog, tolerance = 0.1) + expect_equal(mean(posterior$`params[2]`), true_sdlog, tolerance = 0.1) + + # Check credible intervals + ci_meanlog <- quantile(posterior$`params[1]`, c(0.05, 0.95)) + ci_sdlog <- quantile(posterior$`params[2]`, c(0.05, 0.95)) + + expect_gt(true_meanlog, ci_meanlog[1]) + expect_lt(true_meanlog, ci_meanlog[2]) + expect_gt(true_sdlog, ci_sdlog[1]) + expect_lt(true_sdlog, ci_sdlog[2]) +}) + +test_that( + "pcd_cmdstan_model recovers true values for complex gamma data with + censoring", + { + # Simulate data + set.seed(456) + n <- 2000 + true_shape <- 2 + true_rate <- 0.5 + + simulated_delays <- rprimarycensoreddist( + n = n, + rdist = rgamma, + shape = true_shape, + rate = true_rate, + pwindow = 3, + D = 8, + rprimary = rexpgrowth, + rprimary_args = list(r = 0.1) + ) + + simulated_data <- data.frame( + delay = simulated_delays, + delay_upper = simulated_delays + 1, + n = 1, + pwindow = 3, + relative_obs_time = 8 + ) + + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 2, # Gamma + primary_dist_id = 2, # Exponential growth + param_bounds = list(lower = c(0, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = 0, upper = Inf), + priors = list(location = c(1, 1), scale = c(1, 1)), + primary_priors = list(location = 0.1, scale = 0.1) + ) + + # Fit model + model <- pcd_cmdstan_model() + fit <- model$sample( + data = stan_data, + seed = 456, + chains = 4, + parallel_chains = 4, + refresh = 0 + ) + + # Extract posterior + posterior <- fit$draws( + c("params[1]", "params[2]", "primary_params[1]"), + format = "df" + ) + + # Check mean estimates + expect_equal(mean(posterior$`params[1]`), true_shape, tolerance = 0.2) + expect_equal(mean(posterior$`params[2]`), true_rate, tolerance = 0.1) + expect_equal(mean(posterior$`primary_params[1]`), 0.1, tolerance = 0.05) + + # Check credible intervals + ci_shape <- quantile(posterior$`params[1]`, c(0.05, 0.95)) + ci_rate <- quantile(posterior$`params[2]`, c(0.05, 0.95)) + ci_growth <- quantile(posterior$`primary_params[1]`, c(0.05, 0.95)) + + expect_gt(true_shape, ci_shape[1]) + expect_lt(true_shape, ci_shape[2]) + expect_gt(true_rate, ci_rate[1]) + expect_lt(true_rate, ci_rate[2]) + expect_gt(0.1, ci_growth[1]) + expect_lt(0.1, ci_growth[2]) + } +) + +test_that("pcd_cmdstan_model works with and without within-chain parallelization", { + # Simulate simple data + set.seed(789) + n <- 2000 + simulated_delays <- rprimarycensoreddist( + n = n, + rdist = rlnorm, + meanlog = 1, + sdlog = 0.5, + pwindow = 1, + D = 8 + ) + + simulated_data <- data.frame( + delay = simulated_delays, + delay_upper = simulated_delays + 1, + n = 1, + pwindow = 1, + relative_obs_time = 8 + ) + + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + use_reduce_sum = FALSE + ) + + # Fit model without within-chain parallelization + model_no_parallel <- pcd_cmdstan_model() + fit_no_parallel <- model_no_parallel$sample( + data = stan_data, + seed = 789, + chains = 2, + parallel_chains = 2, + refresh = 0 + ) + + # Prepare data for Stan with within-chain parallelization + stan_data_parallel <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + use_reduce_sum = TRUE + ) + + # Fit model with within-chain parallelization + model_parallel <- pcd_cmdstan_model(cpp_options = list(stan_threads = TRUE)) + fit_parallel <- model_parallel$sample( + data = stan_data_parallel, + seed = 789, + chains = 2, + parallel_chains = 2, + threads_per_chain = 2, + refresh = 0 + ) + + # Check that both fits produce similar results + summary_no_parallel <- fit_no_parallel$summary() + summary_parallel <- fit_parallel$summary() + + expect_equal(summary_no_parallel$mean, summary_parallel$mean, tolerance = 0.1) + expect_equal(summary_no_parallel$sd, summary_parallel$sd, tolerance = 0.1) +}) + + +test_that("pcd_cmdstan_model recovers parameters with swindow and pwindow of 2", { + # Set seed for reproducibility + set.seed(123) + + # Generate simulated data with swindow and pwindow of 2 + n_obs <- 1000 + true_meanlog <- 1.5 + true_sdlog <- 0.5 + pwindow <- 2 + swindow <- 2 + D <- 30 + + simulated_data <- data.frame( + delay = rprimarycensoreddist( + n = n_obs, + rdist = rlnorm, + meanlog = true_meanlog, + sdlog = true_sdlog, + pwindow = pwindow, + swindow = swindow, + D = D + ), + n = 1 + ) + simulated_data$delay_upper <- simulated_data$delay + swindow + simulated_data$pwindow <- pwindow + simulated_data$relative_obs_time <- D + + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ) + + # Fit model + model <- pcd_cmdstan_model() + fit <- model$sample( + data = stan_data, + seed = 456, + chains = 4, + parallel_chains = 4, + refresh = 0 + ) + + # Extract posterior summaries + summary <- fit$summary() + + # Check if true parameters are within 95% credible intervals + expect_lt(summary$q2.5[summary$variable == "params[1]"], true_meanlog) + expect_gt(summary$q97.5[summary$variable == "params[1]"], true_meanlog) + expect_lt(summary$q2.5[summary$variable == "params[2]"], true_sdlog) + expect_gt(summary$q97.5[summary$variable == "params[2]"], true_sdlog) + + # Check if posterior means are close to true values + expect_equal( + summary$mean[summary$variable == "params[1]"], + true_meanlog, + tolerance = 0.1 + ) + expect_equal( + summary$mean[summary$variable == "params[2]"], + true_sdlog, + tolerance = 0.1 + ) +}) From 0187fbbb91cc154e800b5c3dc4f9c009e5194569 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 16 Sep 2024 14:06:46 +0100 Subject: [PATCH 11/13] resolve linting --- tests/testthat/test-pcd_cmdstan_model.R | 267 ++++++++++++------------ 1 file changed, 138 insertions(+), 129 deletions(-) diff --git a/tests/testthat/test-pcd_cmdstan_model.R b/tests/testthat/test-pcd_cmdstan_model.R index c98abcb..93d967d 100644 --- a/tests/testthat/test-pcd_cmdstan_model.R +++ b/tests/testthat/test-pcd_cmdstan_model.R @@ -153,148 +153,157 @@ test_that( } ) -test_that("pcd_cmdstan_model works with and without within-chain parallelization", { - # Simulate simple data - set.seed(789) - n <- 2000 - simulated_delays <- rprimarycensoreddist( - n = n, - rdist = rlnorm, - meanlog = 1, - sdlog = 0.5, - pwindow = 1, - D = 8 - ) +test_that( + "pcd_cmdstan_model works with and without within-chain parallelization", + { + # Simulate simple data + set.seed(789) + n <- 2000 + simulated_delays <- rprimarycensoreddist( + n = n, + rdist = rlnorm, + meanlog = 1, + sdlog = 0.5, + pwindow = 1, + D = 8 + ) - simulated_data <- data.frame( - delay = simulated_delays, - delay_upper = simulated_delays + 1, - n = 1, - pwindow = 1, - relative_obs_time = 8 - ) + simulated_data <- data.frame( + delay = simulated_delays, + delay_upper = simulated_delays + 1, + n = 1, + pwindow = 1, + relative_obs_time = 8 + ) - # Prepare data for Stan - stan_data <- pcd_as_cmdstan_data( - simulated_data, - dist_id = 1, # Lognormal - primary_dist_id = 1, # Uniform - param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), - primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), - priors = list(location = c(0, 1), scale = c(1, 1)), - primary_priors = list(location = numeric(0), scale = numeric(0)), - use_reduce_sum = FALSE - ) + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + use_reduce_sum = FALSE + ) - # Fit model without within-chain parallelization - model_no_parallel <- pcd_cmdstan_model() - fit_no_parallel <- model_no_parallel$sample( - data = stan_data, - seed = 789, - chains = 2, - parallel_chains = 2, - refresh = 0 - ) + # Fit model without within-chain parallelization + model_no_parallel <- pcd_cmdstan_model() + fit_no_parallel <- model_no_parallel$sample( + data = stan_data, + seed = 789, + chains = 2, + parallel_chains = 2, + refresh = 0 + ) - # Prepare data for Stan with within-chain parallelization - stan_data_parallel <- pcd_as_cmdstan_data( - simulated_data, - dist_id = 1, # Lognormal - primary_dist_id = 1, # Uniform - param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), - primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), - priors = list(location = c(0, 1), scale = c(1, 1)), - primary_priors = list(location = numeric(0), scale = numeric(0)), - use_reduce_sum = TRUE - ) + # Prepare data for Stan with within-chain parallelization + stan_data_parallel <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)), + use_reduce_sum = TRUE + ) - # Fit model with within-chain parallelization - model_parallel <- pcd_cmdstan_model(cpp_options = list(stan_threads = TRUE)) - fit_parallel <- model_parallel$sample( - data = stan_data_parallel, - seed = 789, - chains = 2, - parallel_chains = 2, - threads_per_chain = 2, - refresh = 0 - ) + # Fit model with within-chain parallelization + model_parallel <- pcd_cmdstan_model(cpp_options = list(stan_threads = TRUE)) + fit_parallel <- model_parallel$sample( + data = stan_data_parallel, + seed = 789, + chains = 2, + parallel_chains = 2, + threads_per_chain = 2, + refresh = 0 + ) - # Check that both fits produce similar results - summary_no_parallel <- fit_no_parallel$summary() - summary_parallel <- fit_parallel$summary() + # Check that both fits produce similar results + summary_no_parallel <- fit_no_parallel$summary() + summary_parallel <- fit_parallel$summary() - expect_equal(summary_no_parallel$mean, summary_parallel$mean, tolerance = 0.1) - expect_equal(summary_no_parallel$sd, summary_parallel$sd, tolerance = 0.1) -}) + expect_equal( + summary_no_parallel$mean, summary_parallel$mean, + tolerance = 0.1 + ) + expect_equal(summary_no_parallel$sd, summary_parallel$sd, tolerance = 0.1) + } +) -test_that("pcd_cmdstan_model recovers parameters with swindow and pwindow of 2", { - # Set seed for reproducibility - set.seed(123) +test_that( + "pcd_cmdstan_model recovers parameters with swindow and pwindow of 2", + { + # Set seed for reproducibility + set.seed(123) - # Generate simulated data with swindow and pwindow of 2 - n_obs <- 1000 - true_meanlog <- 1.5 - true_sdlog <- 0.5 - pwindow <- 2 - swindow <- 2 - D <- 30 + # Generate simulated data with swindow and pwindow of 2 + n_obs <- 1000 + true_meanlog <- 1.5 + true_sdlog <- 0.5 + pwindow <- 2 + swindow <- 2 + D <- 30 - simulated_data <- data.frame( - delay = rprimarycensoreddist( - n = n_obs, - rdist = rlnorm, - meanlog = true_meanlog, - sdlog = true_sdlog, - pwindow = pwindow, - swindow = swindow, - D = D - ), - n = 1 - ) - simulated_data$delay_upper <- simulated_data$delay + swindow - simulated_data$pwindow <- pwindow - simulated_data$relative_obs_time <- D + simulated_data <- data.frame( + delay = rprimarycensoreddist( + n = n_obs, + rdist = rlnorm, + meanlog = true_meanlog, + sdlog = true_sdlog, + pwindow = pwindow, + swindow = swindow, + D = D + ), + n = 1 + ) + simulated_data$delay_upper <- simulated_data$delay + swindow + simulated_data$pwindow <- pwindow + simulated_data$relative_obs_time <- D - # Prepare data for Stan - stan_data <- pcd_as_cmdstan_data( - simulated_data, - dist_id = 1, # Lognormal - primary_dist_id = 1, # Uniform - param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), - primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), - priors = list(location = c(0, 1), scale = c(1, 1)), - primary_priors = list(location = numeric(0), scale = numeric(0)) - ) + # Prepare data for Stan + stan_data <- pcd_as_cmdstan_data( + simulated_data, + dist_id = 1, # Lognormal + primary_dist_id = 1, # Uniform + param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), + primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), + priors = list(location = c(0, 1), scale = c(1, 1)), + primary_priors = list(location = numeric(0), scale = numeric(0)) + ) - # Fit model - model <- pcd_cmdstan_model() - fit <- model$sample( - data = stan_data, - seed = 456, - chains = 4, - parallel_chains = 4, - refresh = 0 - ) + # Fit model + model <- pcd_cmdstan_model() + fit <- model$sample( + data = stan_data, + seed = 456, + chains = 4, + parallel_chains = 4, + refresh = 0 + ) - # Extract posterior summaries - summary <- fit$summary() + # Extract posterior summaries + summary <- fit$summary() - # Check if true parameters are within 95% credible intervals - expect_lt(summary$q2.5[summary$variable == "params[1]"], true_meanlog) - expect_gt(summary$q97.5[summary$variable == "params[1]"], true_meanlog) - expect_lt(summary$q2.5[summary$variable == "params[2]"], true_sdlog) - expect_gt(summary$q97.5[summary$variable == "params[2]"], true_sdlog) + # Check if true parameters are within 95% credible intervals + expect_lt(summary$q2.5[summary$variable == "params[1]"], true_meanlog) + expect_gt(summary$q97.5[summary$variable == "params[1]"], true_meanlog) + expect_lt(summary$q2.5[summary$variable == "params[2]"], true_sdlog) + expect_gt(summary$q97.5[summary$variable == "params[2]"], true_sdlog) - # Check if posterior means are close to true values - expect_equal( - summary$mean[summary$variable == "params[1]"], - true_meanlog, - tolerance = 0.1 - ) - expect_equal( - summary$mean[summary$variable == "params[2]"], - true_sdlog, - tolerance = 0.1 - ) -}) + # Check if posterior means are close to true values + expect_equal( + summary$mean[summary$variable == "params[1]"], + true_meanlog, + tolerance = 0.1 + ) + expect_equal( + summary$mean[summary$variable == "params[2]"], + true_sdlog, + tolerance = 0.1 + ) + } +) From 6ec73b0f4502ea6b1b97be4f4dab6137d1dec61a Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 16 Sep 2024 15:51:14 +0100 Subject: [PATCH 12/13] check integration testing --- tests/testthat/test-pcd_cmdstan_model.R | 122 ++++++++++++++---------- 1 file changed, 69 insertions(+), 53 deletions(-) diff --git a/tests/testthat/test-pcd_cmdstan_model.R b/tests/testthat/test-pcd_cmdstan_model.R index 93d967d..e918fa3 100644 --- a/tests/testthat/test-pcd_cmdstan_model.R +++ b/tests/testthat/test-pcd_cmdstan_model.R @@ -4,6 +4,7 @@ if (on_ci()) { skip_on_os("mac") } skip_if_not_installed("cmdstanr") +skip_if_not_installed("dplyr") test_that("pcd_cmdstan_model creates a valid CmdStanModel object", { model <- pcd_cmdstan_model() @@ -12,7 +13,10 @@ test_that("pcd_cmdstan_model creates a valid CmdStanModel object", { test_that("pcd_cmdstan_model handles custom include paths", { custom_path <- tempdir() - model <- pcd_cmdstan_model(include_paths = custom_path) + model <- pcd_cmdstan_model( + include_paths = c(custom_path, pcd_stan_path()), + force_recompile = TRUE + ) expect_true(custom_path %in% model$include_paths()) }) @@ -35,14 +39,19 @@ test_that("pcd_cmdstan_model recovers true values for simple lognormal data", { simulated_data <- data.frame( delay = simulated_delays, delay_upper = simulated_delays + 1, - n = 1, pwindow = 1, relative_obs_time = 10 ) + delay_counts <- simulated_data |> + dplyr::summarise( + n = dplyr::n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + # Prepare data for Stan stan_data <- pcd_as_cmdstan_data( - simulated_data, + delay_counts, dist_id = 1, # Lognormal primary_dist_id = 1, # Uniform param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), @@ -58,15 +67,17 @@ test_that("pcd_cmdstan_model recovers true values for simple lognormal data", { seed = 123, chains = 4, parallel_chains = 4, - refresh = 0 + refresh = 0, + show_messages = FALSE, + iter_warmup = 500 ) # Extract posterior posterior <- fit$draws(c("params[1]", "params[2]"), format = "df") # Check mean estimates - expect_equal(mean(posterior$`params[1]`), true_meanlog, tolerance = 0.1) - expect_equal(mean(posterior$`params[2]`), true_sdlog, tolerance = 0.1) + expect_equal(mean(posterior$`params[1]`), true_meanlog, tolerance = 0.05) + expect_equal(mean(posterior$`params[2]`), true_sdlog, tolerance = 0.05) # Check credible intervals ci_meanlog <- quantile(posterior$`params[1]`, c(0.05, 0.95)) @@ -93,7 +104,7 @@ test_that( rdist = rgamma, shape = true_shape, rate = true_rate, - pwindow = 3, + pwindow = 2, D = 8, rprimary = rexpgrowth, rprimary_args = list(r = 0.1) @@ -107,9 +118,15 @@ test_that( relative_obs_time = 8 ) + delay_counts <- simulated_data |> + dplyr::summarise( + n = dplyr::n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + # Prepare data for Stan stan_data <- pcd_as_cmdstan_data( - simulated_data, + delay_counts, dist_id = 2, # Gamma primary_dist_id = 2, # Exponential growth param_bounds = list(lower = c(0, 0), upper = c(Inf, Inf)), @@ -123,9 +140,10 @@ test_that( fit <- model$sample( data = stan_data, seed = 456, - chains = 4, - parallel_chains = 4, - refresh = 0 + chains = 2, + parallel_chains = 2, + refresh = 0, + show_messages = FALSE ) # Extract posterior @@ -162,7 +180,7 @@ test_that( simulated_delays <- rprimarycensoreddist( n = n, rdist = rlnorm, - meanlog = 1, + meanlog = 1.2, sdlog = 0.5, pwindow = 1, D = 8 @@ -176,31 +194,15 @@ test_that( relative_obs_time = 8 ) + delay_counts <- simulated_data |> + dplyr::summarise( + n = dplyr::n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + # Prepare data for Stan stan_data <- pcd_as_cmdstan_data( - simulated_data, - dist_id = 1, # Lognormal - primary_dist_id = 1, # Uniform - param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), - primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), - priors = list(location = c(0, 1), scale = c(1, 1)), - primary_priors = list(location = numeric(0), scale = numeric(0)), - use_reduce_sum = FALSE - ) - - # Fit model without within-chain parallelization - model_no_parallel <- pcd_cmdstan_model() - fit_no_parallel <- model_no_parallel$sample( - data = stan_data, - seed = 789, - chains = 2, - parallel_chains = 2, - refresh = 0 - ) - - # Prepare data for Stan with within-chain parallelization - stan_data_parallel <- pcd_as_cmdstan_data( - simulated_data, + delay_counts, dist_id = 1, # Lognormal primary_dist_id = 1, # Uniform param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), @@ -216,20 +218,27 @@ test_that( data = stan_data_parallel, seed = 789, chains = 2, - parallel_chains = 2, + parallel_chains = 1, threads_per_chain = 2, - refresh = 0 + refresh = 0, + show_messages = FALSE ) - # Check that both fits produce similar results - summary_no_parallel <- fit_no_parallel$summary() - summary_parallel <- fit_parallel$summary() + # Extract posterior + posterior <- fit$draws(c("params[1]", "params[2]"), format = "df") - expect_equal( - summary_no_parallel$mean, summary_parallel$mean, - tolerance = 0.1 - ) - expect_equal(summary_no_parallel$sd, summary_parallel$sd, tolerance = 0.1) + # Check mean estimates + expect_equal(mean(posterior$`params[1]`), true_meanlog, tolerance = 0.05) + expect_equal(mean(posterior$`params[2]`), true_sdlog, tolerance = 0.05) + + # Check credible intervals + ci_meanlog <- quantile(posterior$`params[1]`, c(0.05, 0.95)) + ci_sdlog <- quantile(posterior$`params[2]`, c(0.05, 0.95)) + + expect_gt(true_meanlog, ci_meanlog[1]) + expect_lt(true_meanlog, ci_meanlog[2]) + expect_gt(true_sdlog, ci_sdlog[1]) + expect_lt(true_sdlog, ci_sdlog[2]) } ) @@ -264,9 +273,15 @@ test_that( simulated_data$pwindow <- pwindow simulated_data$relative_obs_time <- D + delay_counts <- simulated_data |> + dplyr::summarise( + n = dplyr::n(), + .by = c(pwindow, relative_obs_time, delay, delay_upper) + ) + # Prepare data for Stan stan_data <- pcd_as_cmdstan_data( - simulated_data, + delay_counts, dist_id = 1, # Lognormal primary_dist_id = 1, # Uniform param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), @@ -280,19 +295,20 @@ test_that( fit <- model$sample( data = stan_data, seed = 456, - chains = 4, - parallel_chains = 4, - refresh = 0 + chains = 2, + parallel_chains = 2, + refresh = 0, + show_messages = FALSE ) # Extract posterior summaries summary <- fit$summary() # Check if true parameters are within 95% credible intervals - expect_lt(summary$q2.5[summary$variable == "params[1]"], true_meanlog) - expect_gt(summary$q97.5[summary$variable == "params[1]"], true_meanlog) - expect_lt(summary$q2.5[summary$variable == "params[2]"], true_sdlog) - expect_gt(summary$q97.5[summary$variable == "params[2]"], true_sdlog) + expect_lt(summary$q5[summary$variable == "params[1]"], true_meanlog) + expect_gt(summary$q95[summary$variable == "params[1]"], true_meanlog) + expect_lt(summary$q5[summary$variable == "params[2]"], true_sdlog) + expect_gt(summary$q95[summary$variable == "params[2]"], true_sdlog) # Check if posterior means are close to true values expect_equal( From 2fc8f8ac562362ac7ec5ac60e3aa78b29ac86269 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 16 Sep 2024 16:07:28 +0100 Subject: [PATCH 13/13] further review integration testing --- tests/testthat/test-pcd_cmdstan_model.R | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-pcd_cmdstan_model.R b/tests/testthat/test-pcd_cmdstan_model.R index e918fa3..404b812 100644 --- a/tests/testthat/test-pcd_cmdstan_model.R +++ b/tests/testthat/test-pcd_cmdstan_model.R @@ -3,6 +3,20 @@ if (on_ci()) { skip_on_os("windows") skip_on_os("mac") } + +test_that("pcd_cmdstan_model throws error when cmdstanr is not installed", { + with_mocked_bindings( + { + expect_error( + pcd_cmdstan_model(), + "Package 'cmdstanr' is required but not installed for this function" + ) + }, + requireNamespace = function(...) FALSE, + .package = "base" + ) +}) + skip_if_not_installed("cmdstanr") skip_if_not_installed("dplyr") @@ -114,7 +128,7 @@ test_that( delay = simulated_delays, delay_upper = simulated_delays + 1, n = 1, - pwindow = 3, + pwindow = 2, relative_obs_time = 8 ) @@ -131,7 +145,7 @@ test_that( primary_dist_id = 2, # Exponential growth param_bounds = list(lower = c(0, 0), upper = c(Inf, Inf)), primary_param_bounds = list(lower = 0, upper = Inf), - priors = list(location = c(1, 1), scale = c(1, 1)), + priors = list(location = c(2, 1), scale = c(0.5, 0.5)), primary_priors = list(location = 0.1, scale = 0.1) ) @@ -153,9 +167,9 @@ test_that( ) # Check mean estimates - expect_equal(mean(posterior$`params[1]`), true_shape, tolerance = 0.2) + expect_equal(mean(posterior$`params[1]`), true_shape, tolerance = 0.1) expect_equal(mean(posterior$`params[2]`), true_rate, tolerance = 0.1) - expect_equal(mean(posterior$`primary_params[1]`), 0.1, tolerance = 0.05) + expect_equal(mean(posterior$`primary_params[1]`), 0.1, tolerance = 0.1) # Check credible intervals ci_shape <- quantile(posterior$`params[1]`, c(0.05, 0.95)) @@ -172,16 +186,19 @@ test_that( ) test_that( - "pcd_cmdstan_model works with and without within-chain parallelization", + "pcd_cmdstan_model works with within-chain parallelization", { # Simulate simple data set.seed(789) n <- 2000 + true_meanlog <- 1.6 + true_sdlog <- 0.5 + simulated_delays <- rprimarycensoreddist( n = n, rdist = rlnorm, - meanlog = 1.2, - sdlog = 0.5, + meanlog = true_meanlog, + sdlog = true_sdlog, pwindow = 1, D = 8 ) @@ -207,15 +224,15 @@ test_that( primary_dist_id = 1, # Uniform param_bounds = list(lower = c(-Inf, 0), upper = c(Inf, Inf)), primary_param_bounds = list(lower = numeric(0), upper = numeric(0)), - priors = list(location = c(0, 1), scale = c(1, 1)), + priors = list(location = c(1, 0.5), scale = c(1, 1)), primary_priors = list(location = numeric(0), scale = numeric(0)), use_reduce_sum = TRUE ) # Fit model with within-chain parallelization model_parallel <- pcd_cmdstan_model(cpp_options = list(stan_threads = TRUE)) - fit_parallel <- model_parallel$sample( - data = stan_data_parallel, + fit <- model_parallel$sample( + data = stan_data, seed = 789, chains = 2, parallel_chains = 1,