Skip to content

Commit

Permalink
Issue #69: Add check_truncation (#120)
Browse files Browse the repository at this point in the history
* add check_truncation

* add unit tests

* add truncation checking to model functions

* add news and rename

* include new news item
  • Loading branch information
seabbs authored Oct 4, 2024
1 parent 44f9871 commit 76410fe
Show file tree
Hide file tree
Showing 18 changed files with 259 additions and 50 deletions.
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ S3method(primary_censored_cdf,pcens_pgamma_dunif)
S3method(primary_censored_cdf,pcens_plnorm_dunif)
export(check_dprimary)
export(check_pdist)
export(check_truncation)
export(dexpgrowth)
export(dpcens)
export(dprimarycensoreddist)
export(fitdistdoublecens)
export(new_primary_censored_dist)
export(pcd_as_cmdstan_data)
export(pcd_as_stan_data)
export(pcd_cmdstan_model)
export(pcd_load_stan_functions)
export(pcd_stan_files)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ This is the development version of `primarycensoreddist` and is not yet ready fo

* Split "Why it works" vignette into two separate vignettes, "Why it works" and "Analytic solutions for censored delay distributions".
* Removed the need to assign functions to the global environment for `fitdistdoublecens()` by using `withr`.
* Added a `check_truncation()` function to check if the truncation time is larger than the maximum observed delay. This is used in `fitdistdoublecens()` and `pcd_as_stan_data()` to ensure that the truncation time is appropriate to maximise computational efficiency.
* `pcd_as_cmdstan_data()` has been renamed to `pcd_as_stan_data()` to better reflect that it is used for `Stan` models in general rather than just the `CmdStan` models.

## Documentation

Expand Down
65 changes: 65 additions & 0 deletions R/check.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,68 @@ check_dprimary <- function(dprimary, pwindow, dprimary_args = list(),
}
return(invisible(NULL))
}

#' Check if truncation time is appropriate relative to the maximum delay
#'
#' This function checks if the truncation time D is appropriate relative to the
#' maximum delay. If D is much larger than necessary, it suggests
#' considering setting it to `Inf` for better efficiency with minimal accuracy
#' cost.
#'
#' @param delays A numeric vector of delay times
#'
#' @param D The truncation time
#'
#' @param multiplier The multiplier for the maximum delay to compare with D.
#' Default is 2.
#'
#' @return Invisible NULL. Prints a message if the condition is met.
#'
#' @export
#' @family check
#'
#' @examples
#' check_truncation(delays = c(1, 2, 3, 4), D = 10, multiplier = 2)
check_truncation <- function(delays, D, multiplier = 2) {
if (!is.numeric(delays) || !is.numeric(D) || !is.numeric(multiplier)) {
stop("All arguments must be numeric.")
}

if (D <= 0 || multiplier <= 1) {
stop(
"Invalid argument values. D must be positive and multiplier must be ",
"greater than 1."
)
}

if (is.infinite(D)) {
return(invisible(NULL))
}

# Remove NA
delays <- delays[!is.na(delays)]

if (length(delays) == 0) {
warning("No finite observed delays to check.")
return(invisible(NULL))
}

max_delay <- max(delays)
expected_D <- max_delay * multiplier

# Check if D is much larger than expected
if (D > expected_D) {
message(
sprintf(
paste0(
"The truncation time D (%g) is larger than %g times the maximum ",
"observed delay (%g). Consider setting D to Inf for better ",
"efficiency with minimal accuracy cost for this case."
),
D, multiplier, max_delay
)
)
}

invisible(NULL)
}
16 changes: 15 additions & 1 deletion R/fitdistdoublecens.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#'
#' @param ... Additional arguments to be passed to [fitdistrplus::fitdist()].
#'
#' @param truncation_check_multiplier Numeric multiplier to use for checking
#' if the truncation time D is appropriate relative to the maximum delay.
#' Set to NULL to skip the check. Default is 2.
#'
#' @return An object of class "fitdist" as returned by fitdistrplus::fitdist.
#'
#' @export
Expand Down Expand Up @@ -56,7 +60,9 @@ fitdistdoublecens <- function(censdata, distr,
pwindow = 1, D = Inf,
dprimary = stats::dunif,
dprimary_name = NULL,
dprimary_args = list(), ...) {
dprimary_args = list(),
truncation_check_multiplier = 2,
...) {
# Check if fitdistrplus is available
if (!requireNamespace("fitdistrplus", quietly = TRUE)) {
stop(
Expand All @@ -78,6 +84,14 @@ fitdistdoublecens <- function(censdata, distr,
dprimary_name <- .extract_function_name(dprimary)
}

if (!is.null(truncation_check_multiplier)) {
check_truncation(
delays = censdata$left,
D = D,
multiplier = truncation_check_multiplier
)
}

# Get the distribution functions
pdist_name <- paste0("p", distr)
pdist <- get(pdist_name)
Expand Down
30 changes: 24 additions & 6 deletions R/pcd_cmdstan_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
#' fit <- model$sample(data = stan_data)
#' }
pcd_cmdstan_model <- function(
include_paths = primarycensoreddist::pcd_stan_path(), ...) {
include_paths = primarycensoreddist::pcd_stan_path(),
...) {
if (!requireNamespace("cmdstanr", quietly = TRUE)) {
stop("Package 'cmdstanr' is required but not installed for this function.")
}
Expand All @@ -57,7 +58,7 @@ pcd_cmdstan_model <- function(
#' @param delay Column name for observed delays (default: "delay")
#'
#' @param delay_upper Column name for upper bound of delays
#' (default: "delay_upper")
#' (default: "delay_upper")
#'
#' @param n Column name for count of observations (default: "n")
#'
Expand Down Expand Up @@ -93,8 +94,12 @@ pcd_cmdstan_model <- function(
#' @param use_reduce_sum Logical; use reduce_sum for performance?
#' (default: FALSE)
#'
#' @param truncation_check_multiplier Numeric multiplier to use for checking
#' if the truncation time D is appropriate relative to the maximum delay
#' for each unique D value. Set to NULL to skip the check. Default is 2.
#'
#' @return A list containing the data formatted for use with
#' [pcd_cmdstan_model()]
#' [pcd_cmdstan_model()]
#'
#' @export
#' @family modelhelpers
Expand All @@ -108,7 +113,7 @@ pcd_cmdstan_model <- function(
#' pwindow = c(1, 1, 2),
#' relative_obs_time = c(10, 10, 10)
#' )
#' stan_data <- pcd_as_cmdstan_data(
#' stan_data <- pcd_as_stan_data(
#' data,
#' dist_id = 1,
#' primary_dist_id = 1,
Expand All @@ -118,15 +123,16 @@ pcd_cmdstan_model <- function(
#' primary_priors = list(location = numeric(0), scale = numeric(0))
#' )
#' }
pcd_as_cmdstan_data <- function(
pcd_as_stan_data <- function(
data, delay = "delay", delay_upper = "delay_upper",
n = "n", pwindow = "pwindow",
relative_obs_time = "relative_obs_time",
dist_id, primary_dist_id,
param_bounds, primary_param_bounds,
priors, primary_priors,
compute_log_lik = FALSE,
use_reduce_sum = FALSE) {
use_reduce_sum = FALSE,
truncation_check_multiplier = 2) {
required_cols <- c(delay, delay_upper, n, pwindow, relative_obs_time)
missing_cols <- setdiff(required_cols, names(data))
if (length(missing_cols) > 0) {
Expand All @@ -142,6 +148,18 @@ pcd_as_cmdstan_data <- function(
)
}

if (!is.null(truncation_check_multiplier)) {
unique_D <- unique(data[[relative_obs_time]])
for (D in unique_D) {
delays_subset <- data[[delay]][data[[relative_obs_time]] == D]
check_truncation(
delays = delays_subset,
D = D,
multiplier = truncation_check_multiplier
)
}
}

stan_data <- list(
N = nrow(data),
d = data[[delay]],
Expand Down
3 changes: 2 additions & 1 deletion man/check_dprimary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/check_pdist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions man/check_truncation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion man/fitdistdoublecens.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 10 additions & 5 deletions man/pcd_as_cmdstan_data.Rd → man/pcd_as_stan_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pcd_cmdstan_model.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
test_that("check_pdist works correctly for valid CDF", {
expect_silent(check_pdist(pnorm, D = 10))
expect_silent(check_pdist(punif, D = 1))
})

test_that("check_pdist throws error for invalid CDF", {
invalid_cdf <- function(x, ...) x^2 - 1 # Not bounded between 0 and 1
expect_error(
check_pdist(invalid_cdf, D = 10),
"pdist is not a valid cumulative distribution function"
)
})

test_that("check_pdist handles infinite D correctly", {
expect_silent(check_pdist(pnorm, D = Inf))
})

test_that("check_dprimary works correctly for valid PDF", {
expect_silent(check_dprimary(dunif, pwindow = 1))
})
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-check_pdist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_that("check_pdist works correctly for valid CDF", {
expect_silent(check_pdist(pnorm, D = 10))
expect_silent(check_pdist(punif, D = 1))
})

test_that("check_pdist throws error for invalid CDF", {
invalid_cdf <- function(x, ...) x^2 - 1 # Not bounded between 0 and 1
expect_error(
check_pdist(invalid_cdf, D = 10),
"pdist is not a valid cumulative distribution function"
)
})

test_that("check_pdist handles infinite D correctly", {
expect_silent(check_pdist(pnorm, D = Inf))
})
Loading

0 comments on commit 76410fe

Please sign in to comment.