Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in option to run jackknife standard errors for gee_test (and testing this capability) #13

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export("%>%")
export(gee_test)
export(get_test_statistic)
export(glm_test)
export(jackknife_se)
export(robust_score_test)
import(geeasy)
import(rlang)
Expand Down
15 changes: 11 additions & 4 deletions R/gee_test.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Generalized Estimating Equations under technical replication with robust and non-robust Wald and Rao (score) tests
#'
#' @param use_geeasy When TRUE, uses `geeasy` for gee estimation, when FALSE uses `geepack`
#' @param use_jack_se When TRUE uses jackknife standard errors (which take longer), when FALSE uses sandwich standard errors
#' @param ... Arguments that you would pass to a regular `geepack::geeglm` call. Note that for now we only have functionality for Poisson tests with log link
#'
#' @importFrom sandwich sandwich
Expand All @@ -14,10 +15,10 @@
#'
#'
#' @export
gee_test <- function(use_geeasy = TRUE, ...) {

gee_test <- function(use_geeasy = TRUE, use_jack_se = FALSE, ...) {
cl_orig <- match.call()
cl_orig <- call_modify(cl_orig, use_geeasy = zap())
cl_orig <- call_modify(cl_orig, use_geeasy = zap(), use_jack_se = zap())

cl <- cl_orig
if (use_geeasy) {
Expand Down Expand Up @@ -78,7 +79,13 @@ gee_test <- function(use_geeasy = TRUE, ...) {
colnames(output)[3] <- "Robust Wald p" ## "Pr(>|W|)"
colnames(output)[2] <- "Robust Std Error" ## "Std. Error"


# get jackknife standard errors if needed
if (use_jack_se) {
output[, 2] <- jackknife_se(object = gee_result,
dat = eval(cl$data, envir = rlang::caller_env())[the_reorder, ],
id = eval(cl$id, envir = rlang::caller_env())[the_reorder])
}

#### run robust score tests
pp <- nrow(output)
robust_score_p <- rep(NA, length = pp)
Expand Down
37 changes: 37 additions & 0 deletions R/jackknife_se.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Jackknife standard errors
#'
#' @param object The fitted object under the alternative.
#' @param dat The data used to fit the model
#' @param id Observations with the same id are in the same cluster. If not included, independence between
#' observations is assumed.
#'
#' @export
jackknife_se <- function(object, dat, id = NULL){
if (is.null(id)) {
id <- 1:nrow(dat)
}
if (!is.numeric(id)) {
id <- as.factor(id)
}
unique_id <- unique(id)
parm <- sapply(unique_id, update_model, object, dat, id)
parm <- t(parm)
parm.mean <- apply(parm, 2, mean)

parm.cent <- sapply(1:nrow(parm),
function(i) {
parm[i, ] - parm.mean
})
parm.cent <- t(parm.cent)

jack.var <- ((length(unique_id) - 1) / length(unique_id)) * t(parm.cent) %*% parm.cent
jack.se <- sqrt(diag(jack.var))
jack.se
}

update_model <- function(id_i, object, dat, id) {
remove_ind <- which(id == id_i)
dat.i <- dat[-remove_ind, ]
object$call <- call_modify(object$call, id = id[-remove_ind])
coef(stats::update(object, data = dat.i))
}
4 changes: 3 additions & 1 deletion man/gee_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/jackknife_se.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 28 additions & 15 deletions tests/testthat/test-replicates.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
test_that("replicates work", {
set.seed(1)
data(cars)
cars2 <- cars
cars2$wts <- log(rnorm(nrow(cars), 20))
cars2$batch1 <- rep(LETTERS[1:5], each = 10)
cars2$batch2 <- rep(LETTERS[6:10], 10)
cars2$batch3 <- rep(1:5, each = 10)

set.seed(1)
data(cars)
cars2 <- cars
cars2$wts <- log(rnorm(nrow(cars), 20))
cars2$batch1 <- rep(LETTERS[1:5], each = 10)
cars2$batch2 <- rep(LETTERS[6:10], 10)
cars2$batch3 <- rep(1:5, each = 10)
test_that("replicates work", {

### fails in test unless `cars` is loaded?
gee_test(formula = dist ~ speed,
Expand Down Expand Up @@ -86,13 +86,6 @@ test_that("replicates work", {
})

test_that("geeasy and geepack give similar results", {
set.seed(1)
data(cars)
cars2 <- cars
cars2$wts <- log(rnorm(nrow(cars), 20))
cars2$batch1 <- rep(LETTERS[1:5], each = 10)
cars2$batch2 <- rep(LETTERS[6:10], 10)
cars2$batch3 <- rep(1:5, each = 10)

geeasy_test <- gee_test(formula = dist ~ speed,
data = cars2,
Expand All @@ -109,3 +102,23 @@ test_that("geeasy and geepack give similar results", {
expect_true((geeasy_test$Estimate[1] - geepack_test$Estimate[1])/geeasy_test$Estimate[1] < 0.01)
})

test_that("jackknife standard errors work", {
cars2_obj <- geelm(formula = dist ~ speed, data = cars2, id = batch3,
family = poisson(link = "log"), offset = wts, corstr = "exchangeable")

jack_se <- jackknife_se(cars2_obj, cars2)
jack_se_cluster <- jackknife_se(cars2_obj, cars2, id = cars2$batch3)
sand_se <- summary(cars2_obj)$coef$Std.err
# expect that non-clustered se's are closer to robust se's because this data has no inherent clustering
expect_true(sum((jack_se - sand_se)^2) < sum((jack_se_cluster - sand_se)^2))

gee_res <- gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
offset = wts,
use_jack_se = TRUE)
# expect that gee test robust standard errors are equal to those directly from jackknife function
expect_true(all.equal(gee_res$`Robust Std Error`[1:2], as.vector(jack_se_cluster)))
})

Loading