diff --git a/NAMESPACE b/NAMESPACE index db7229f..4bf2f48 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export("%>%") export(gee_test) export(get_test_statistic) export(glm_test) +export(jackknife_se) export(robust_score_test) import(geeasy) import(rlang) diff --git a/R/gee_test.R b/R/gee_test.R index 6b03dd4..4c0fdfb 100644 --- a/R/gee_test.R +++ b/R/gee_test.R @@ -1,6 +1,7 @@ #' Generalized Estimating Equations under technical replication with robust and non-robust Wald and Rao (score) tests #' #' @param use_geeasy When TRUE, uses `geeasy` for gee estimation, when FALSE uses `geepack` +#' @param use_jack_se When TRUE uses jackknife standard errors (which take longer), when FALSE uses sandwich standard errors #' @param ... Arguments that you would pass to a regular `geepack::geeglm` call. Note that for now we only have functionality for Poisson tests with log link #' #' @importFrom sandwich sandwich @@ -14,10 +15,10 @@ #' #' #' @export -gee_test <- function(use_geeasy = TRUE, ...) { - +gee_test <- function(use_geeasy = TRUE, use_jack_se = FALSE, ...) { + cl_orig <- match.call() - cl_orig <- call_modify(cl_orig, use_geeasy = zap()) + cl_orig <- call_modify(cl_orig, use_geeasy = zap(), use_jack_se = zap()) cl <- cl_orig if (use_geeasy) { @@ -78,7 +79,13 @@ gee_test <- function(use_geeasy = TRUE, ...) { colnames(output)[3] <- "Robust Wald p" ## "Pr(>|W|)" colnames(output)[2] <- "Robust Std Error" ## "Std. Error" - + # get jackknife standard errors if needed + if (use_jack_se) { + output[, 2] <- jackknife_se(object = gee_result, + dat = eval(cl$data, envir = rlang::caller_env())[the_reorder, ], + id = eval(cl$id, envir = rlang::caller_env())[the_reorder]) + } + #### run robust score tests pp <- nrow(output) robust_score_p <- rep(NA, length = pp) diff --git a/R/jackknife_se.R b/R/jackknife_se.R new file mode 100644 index 0000000..6d1a599 --- /dev/null +++ b/R/jackknife_se.R @@ -0,0 +1,37 @@ +#' Jackknife standard errors +#' +#' @param object The fitted object under the alternative. +#' @param dat The data used to fit the model +#' @param id Observations with the same id are in the same cluster. If not included, independence between +#' observations is assumed. +#' +#' @export +jackknife_se <- function(object, dat, id = NULL){ + if (is.null(id)) { + id <- 1:nrow(dat) + } + if (!is.numeric(id)) { + id <- as.factor(id) + } + unique_id <- unique(id) + parm <- sapply(unique_id, update_model, object, dat, id) + parm <- t(parm) + parm.mean <- apply(parm, 2, mean) + + parm.cent <- sapply(1:nrow(parm), + function(i) { + parm[i, ] - parm.mean + }) + parm.cent <- t(parm.cent) + + jack.var <- ((length(unique_id) - 1) / length(unique_id)) * t(parm.cent) %*% parm.cent + jack.se <- sqrt(diag(jack.var)) + jack.se +} + +update_model <- function(id_i, object, dat, id) { + remove_ind <- which(id == id_i) + dat.i <- dat[-remove_ind, ] + object$call <- call_modify(object$call, id = id[-remove_ind]) + coef(stats::update(object, data = dat.i)) +} diff --git a/man/gee_test.Rd b/man/gee_test.Rd index 09d82db..f9d6491 100644 --- a/man/gee_test.Rd +++ b/man/gee_test.Rd @@ -4,11 +4,13 @@ \alias{gee_test} \title{Generalized Estimating Equations under technical replication with robust and non-robust Wald and Rao (score) tests} \usage{ -gee_test(use_geeasy = TRUE, ...) +gee_test(use_geeasy = TRUE, use_jack_se = FALSE, ...) } \arguments{ \item{use_geeasy}{When TRUE, uses \code{geeasy} for gee estimation, when FALSE uses \code{geepack}} +\item{use_jack_se}{When TRUE uses jackknife standard errors (which take longer), when FALSE uses sandwich standard errors} + \item{...}{Arguments that you would pass to a regular \code{geepack::geeglm} call. Note that for now we only have functionality for Poisson tests with log link} } \description{ diff --git a/man/jackknife_se.Rd b/man/jackknife_se.Rd new file mode 100644 index 0000000..de45d8b --- /dev/null +++ b/man/jackknife_se.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jackknife_se.R +\name{jackknife_se} +\alias{jackknife_se} +\title{Jackknife standard errors} +\usage{ +jackknife_se(object, dat, id = NULL) +} +\arguments{ +\item{object}{The fitted object under the alternative.} + +\item{dat}{The data used to fit the model} + +\item{id}{Observations with the same id are in the same cluster. If not included, independence between +observations is assumed.} +} +\description{ +Jackknife standard errors +} diff --git a/tests/testthat/test-replicates.R b/tests/testthat/test-replicates.R index ca331cc..1c683f4 100644 --- a/tests/testthat/test-replicates.R +++ b/tests/testthat/test-replicates.R @@ -1,12 +1,12 @@ -test_that("replicates work", { +set.seed(1) +data(cars) +cars2 <- cars +cars2$wts <- log(rnorm(nrow(cars), 20)) +cars2$batch1 <- rep(LETTERS[1:5], each = 10) +cars2$batch2 <- rep(LETTERS[6:10], 10) +cars2$batch3 <- rep(1:5, each = 10) - set.seed(1) - data(cars) - cars2 <- cars - cars2$wts <- log(rnorm(nrow(cars), 20)) - cars2$batch1 <- rep(LETTERS[1:5], each = 10) - cars2$batch2 <- rep(LETTERS[6:10], 10) - cars2$batch3 <- rep(1:5, each = 10) +test_that("replicates work", { ### fails in test unless `cars` is loaded? gee_test(formula = dist ~ speed, @@ -86,13 +86,6 @@ test_that("replicates work", { }) test_that("geeasy and geepack give similar results", { - set.seed(1) - data(cars) - cars2 <- cars - cars2$wts <- log(rnorm(nrow(cars), 20)) - cars2$batch1 <- rep(LETTERS[1:5], each = 10) - cars2$batch2 <- rep(LETTERS[6:10], 10) - cars2$batch3 <- rep(1:5, each = 10) geeasy_test <- gee_test(formula = dist ~ speed, data = cars2, @@ -109,3 +102,23 @@ test_that("geeasy and geepack give similar results", { expect_true((geeasy_test$Estimate[1] - geepack_test$Estimate[1])/geeasy_test$Estimate[1] < 0.01) }) +test_that("jackknife standard errors work", { + cars2_obj <- geelm(formula = dist ~ speed, data = cars2, id = batch3, + family = poisson(link = "log"), offset = wts, corstr = "exchangeable") + + jack_se <- jackknife_se(cars2_obj, cars2) + jack_se_cluster <- jackknife_se(cars2_obj, cars2, id = cars2$batch3) + sand_se <- summary(cars2_obj)$coef$Std.err + # expect that non-clustered se's are closer to robust se's because this data has no inherent clustering + expect_true(sum((jack_se - sand_se)^2) < sum((jack_se_cluster - sand_se)^2)) + + gee_res <- gee_test(formula = dist ~ speed, + data = cars2, + id = batch3, + family=poisson(link="log"), + offset = wts, + use_jack_se = TRUE) + # expect that gee test robust standard errors are equal to those directly from jackknife function + expect_true(all.equal(gee_res$`Robust Std Error`[1:2], as.vector(jack_se_cluster))) +}) +