Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 213: Create functionality for passing in newdata #231

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ S3method(epidist_stancode,epidist_latent_individual)
S3method(epidist_validate,default)
S3method(epidist_validate,epidist_latent_individual)
export(add_mean_sd)
export(all_strata_newdata)
export(as_latent_individual)
export(calculate_censor_delay)
export(calculate_cohort_mean)
Expand Down Expand Up @@ -78,3 +79,4 @@ importFrom(stats,quantile)
importFrom(stats,rexp)
importFrom(stats,runif)
importFrom(stats,t.test)
importFrom(stats,update)
24 changes: 22 additions & 2 deletions R/postprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ predict_delay_parameters <- function(fit, newdata = NULL, ...) {
# Every brms model has the parameter mu
lp_mu <- brms::get_dpar(pp, dpar = "mu", inv_link = TRUE)
df <- expand.grid(
"index" = seq_len(ncol(lp_mu)),
"draw" = seq_len(nrow(lp_mu))
"draw" = seq_len(nrow(lp_mu)),
"index" = seq_len(ncol(lp_mu))
)
df[["mu"]] <- as.vector(lp_mu)
for (dpar in setdiff(names(pp$dpars), "mu")) {
Expand All @@ -34,6 +34,26 @@ predict_delay_parameters <- function(fit, newdata = NULL, ...) {
#' @export
predict_dpar <- predict_delay_parameters

#' Generate newdata to predict on all unique strata in the model
#'
#' @param fit A model fit with `epidist::epidist`
#' @family postprocess
#' @autoglobal
#' @export
all_strata_newdata <- function(fit) {
bterms <- brms::brmsterms(fit$formula)
vars <- lapply(bterms$dpars, function(x) all.vars(x$formula))
vars <- unique(unlist(vars))
var_values <- lapply(vars, function(var) unique(fit$data[, var]))
names(var_values) <- vars
newdata <- expand.grid(var_values)
newdata$delay_central <- 0
newdata$obs_t <- NA
newdata$pwindow_upr <- NA
newdata$swindow_upr <- NA
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extract out LHS of equation bits from bterms (make not specific to latent_individual)

return(newdata)
}

#' Convert posterior lognormal samples to long format
#'
#' @param draws ...
Expand Down
1 change: 1 addition & 0 deletions man/add_mean_sd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/add_mean_sd.default.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/add_mean_sd.gamma_samples.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/add_mean_sd.lognormal_samples.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/all_strata_newdata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/draws_to_long.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/make_relative_to_truth.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/predict_delay_parameters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/summarise_draws.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/summarise_variable.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions newdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
prep_obs <- as_latent_individual(sim_obs)
prep_obs$sex <- rbinom(n = nrow(prep_obs), size = 1, prob = 0.5)

fit_sex <- epidist(
data = prep_obs,
formula = brms::bf(mu ~ 1 + sex, sigma ~ 1 + sex)
)

draws <- posterior::as_draws_df(fit_sex$fit)

# With newdata = NULL
pred <- predict_delay_parameters(fit_sex)

# With newdata
strata_df <- prep_obs[1:2, ] |> as.data.frame()

strata_df <- strata_df |>
dplyr::select(delay_central, sex, obs_t, pwindow_upr, swindow_upr)
athowes marked this conversation as resolved.
Show resolved Hide resolved

pred_strata <- predict_delay_parameters(fit_sex, newdata = strata_df)

# extract_all_strata <- function() {
#
# }

library(ggplot2)
ggplot(pred_strata, aes(y = mean)) +
geom_histogram() +
facet_grid(~ index) +
coord_flip()
34 changes: 34 additions & 0 deletions tests/testthat/setup.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,37 @@ sim_obs_gamma <- simulate_gillespie() |>

sim_obs_gamma <-
sim_obs_gamma[sample(seq_len(.N), sample_size, replace = FALSE)]

# What follows is data with a sex difference

meanlog_m <- 2.0
sdlog_m <- 0.3

meanlog_f <- 1.3
sdlog_f <- 0.7

sim_obs_sex <- simulate_gillespie()
sim_obs_sex$sex <- rbinom(n = nrow(sim_obs_sex), size = 1, prob = 0.5)

sim_obs_sex_m <- dplyr::filter(sim_obs_sex, sex == 0) |>
simulate_secondary(
dist = rlnorm,
meanlog = meanlog_m,
sdlog = sdlog_m
)

sim_obs_sex_f <- dplyr::filter(sim_obs_sex, sex == 1) |>
simulate_secondary(
dist = rlnorm,
meanlog = meanlog_f,
sdlog = sdlog_f
)

sim_obs_sex <- dplyr::bind_rows(sim_obs_sex_m, sim_obs_sex_f) |>
dplyr::arrange(case)

sim_obs_sex <- sim_obs_sex |>
observe_process() |>
filter_obs_by_obs_time(obs_time = obs_time)

sim_obs_sex <- sim_obs_sex[sample(seq_len(.N), sample_size, replace = FALSE)]
58 changes: 46 additions & 12 deletions tests/testthat/test-unit-postprocess.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,60 @@
fit <- epidist(data = prep_obs, seed = 1, silent = 2)
pred <- predict_delay_parameters(fit)
expect_s3_class(pred, "data.table")
expect_named(pred, c("index", "draw", "mu", "sigma", "mean", "sd"))
expect_named(pred, c("draw", "index", "mu", "sigma", "mean", "sd"))
expect_true(all(pred$mean > 0))
expect_true(all(pred$sd > 0))
expect_equal(length(unique(pred$index)), nrow(prep_obs))
expect_equal(length(unique(pred$draw)), summary(fit)$total_ndraws)
})

test_that("predict_delay_parameters accepts newdata arguments", { # nolint: line_length_linter.
test_that("predict_delay_parameters accepts newdata arguments, all_strata_newdata works as expected, and predictions by sex recover underlying parameters", { # nolint: line_length_linter.
skip_on_cran()
set.seed(1)
prep_obs <- as_latent_individual(sim_obs)
fit <- epidist(data = prep_obs, seed = 1, silent = 2)
n <- 5
pred <- predict_delay_parameters(fit, newdata = prep_obs[1:n, ])
expect_s3_class(pred, "data.table")
expect_named(pred, c("index", "draw", "mu", "sigma", "mean", "sd"))
expect_true(all(pred$mean > 0))
expect_true(all(pred$sd > 0))
expect_equal(length(unique(pred$index)), 5)
expect_equal(length(unique(pred$draw)), summary(fit)$total_ndraws)
prep_obs_sex <- as_latent_individual(sim_obs_sex)
fit_sex <- epidist(
data = prep_obs_sex,
formula = brms::bf(mu ~ 1 + sex, sigma ~ 1 + sex),
seed = 1,
silent = 2
)

all_strata <- all_strata_newdata(fit_sex)
expect_equal(nrow(all_strata), 2)
expect_named(
all_strata,
c("sex", "delay_central", "obs_t", "pwindow_upr", "swindow_upr")
)
expect_equal(all_strata$sex, c(0, 1))

pred_sex <- predict_delay_parameters(fit_sex, newdata = all_strata)
expect_s3_class(pred_sex, "data.table")
expect_named(pred_sex, c("draw", "index", "mu", "sigma", "mean", "sd"))
expect_true(all(pred_sex$mean > 0))
expect_true(all(pred_sex$sd > 0))
expect_equal(length(unique(pred_sex$index)), nrow(all_strata))
expect_equal(length(unique(pred_sex$draw)), summary(fit_sex)$total_ndraws)

pred_sex_summary <- pred_sex |>
dplyr::group_by(index) |>
dplyr::summarise(
mu = mean(mu),
sigma = mean(sigma)
)

# Correct predictions of M
expect_equal(
as.numeric(pred_sex_summary[1, c("mu", "sigma")]),
c(meanlog_m, sdlog_m),
tolerance = 0.05
)

Check warning on line 55 in tests/testthat/test-unit-postprocess.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-unit-postprocess.R,line=55,col=1,[trailing_whitespace_linter] Trailing whitespace is superfluous.
# Correction predictions of F
expect_equal(
as.numeric(pred_sex_summary[2, c("mu", "sigma")]),
c(meanlog_f, sdlog_f),
tolerance = 0.05
)
})

test_that("add_mean_sd.lognormal_samples works with simulated lognormal distribution parameter data", { # nolint: line_length_linter.
Expand Down
Loading