Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update gee_test() to use geeasy package instead of geepack #9

Merged
merged 6 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
.Rbuildignore
.Rproj.user
.Rproj
*.Rproj
README.Rmd
.gitignore
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ Imports:
geepack,
magrittr,
rlang,
sandwich
sandwich,
geeasy
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(gee_test)
export(get_test_statistic)
export(glm_test)
export(robust_score_test)
import(geeasy)
import(rlang)
importFrom(geepack,geeglm)
importFrom(magrittr,"%>%")
Expand Down
63 changes: 42 additions & 21 deletions R/gee_test.R
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
#' Generalized Estimating Equations under technical replication with robust and non-robust Wald and Rao (score) tests
#'
#' @param use_geeasy When TRUE, uses `geeasy` for gee estimation, when FALSE uses `geepack`
#' @param ... Arguments that you would pass to a regular `geepack::geeglm` call. Note that for now we only have functionality for Poisson tests with log link
#'
#' @importFrom sandwich sandwich
#' @importFrom stats coef glm pnorm
#' @importFrom rlang call_match caller_env
#' @importFrom geepack geeglm
#' @import geeasy
#'
#' @examples
#' # TODO
#'
#'
#' @export
gee_test <- function(...) {
gee_test <- function(use_geeasy = TRUE, ...) {

cl_orig <- match.call()
cl_orig <- call_modify(cl_orig, use_geeasy = zap())

cl <- cl_orig
cl[1] <- call("geeglm")

if (use_geeasy) {
cl[1] <- call("geelm")
} else {
cl[1] <- call("geeglm")
}

if (is.null(cl$id)) {
stop("Missing replicates information (`id`). If no replicates, use `glm_test()`.")
}
Expand All @@ -34,11 +41,13 @@ gee_test <- function(...) {

## enforce robust Wald using "std.err" = "san.se"
## Documentation for geepack suggests bias can be substantial for sandwich se's with small number of clusters
## so choose approximate jackknife as std.err="jack"
## so choose approximate jackknife as std.err="jack" when using geepack
cl <- call_modify(cl,
"id" = as.factor(eval(cl$data, envir = rlang::caller_env())[[cl$id]]),
"corstr" = "exchangeable",
"std.err" = "jack")
"corstr" = "exchangeable")
if (!use_geeasy) {
cl <- call_modify(cl, "std.err" = "jack")
}
the_reorder <- order(eval(cl$data, envir = rlang::caller_env())[[id_col]])
cl <- call_modify(cl,
"data" = eval(cl$data, envir = rlang::caller_env())[the_reorder, ],
Expand All @@ -47,25 +56,25 @@ gee_test <- function(...) {

### fit the glm, ignoring warnings about non integer inputs, as we take an estimating equations mindset
withCallingHandlers({
geeglm_result <- eval(cl, envir = rlang::caller_env()) ### is the issue here??
gee_result <- eval(cl, envir = rlang::caller_env())
}, warning=function(w) {
if (startsWith(conditionMessage(w), "non-integer x"))
invokeRestart("muffleWarning")
})


if (geeglm_result$family$family != "poisson") {
if (gee_result$family$family != "poisson") {
stop(paste("Amy has only implemented this for Poisson families.\n",
"You requested", geeglm_result$family$family, "\n",
"You requested", gee_result$family$family, "\n",
"Please open a GitHub issue if you're interested in other families."))
}
if (geeglm_result$family$link != "log") {
if (gee_result$family$link != "log") {
stop(paste("Amy has only implemented this for Poisson families with log link.\n",
"You requested link", geeglm_result$family$link, "\n",
"You requested link", gee_result$family$link, "\n",
"Please open a GitHub issue if you're interested in other link functions"))
}

output <- coef(summary(geeglm_result))[, -3] ## "Wald"
output <- coef(summary(gee_result))[, -3] ## "Wald"
colnames(output)[3] <- "Robust Wald p" ## "Pr(>|W|)"
colnames(output)[2] <- "Robust Std Error" ## "Std. Error"

Expand All @@ -75,21 +84,33 @@ gee_test <- function(...) {
robust_score_p <- rep(NA, length = pp)

for (p_marginal in 1:pp) {

robust_score_p[p_marginal] <- robust_score_test(glm_object = geeglm_result,
if (use_geeasy) {
id <- gee_result$id
} else {
id <- gee_result$geese$id
}
robust_score_p[p_marginal] <- robust_score_test(glm_object = gee_result,
call_to_model = cl,
param = p_marginal,
id = geeglm_result$geese$id)
id = id)
}

output <- cbind(output, "Robust Score p" = robust_score_p)

output <- output[, c("Estimate","Robust Std Error", "Robust Wald p", "Robust Score p")]

output <- rbind(output,
"correlation:alpha" = c(geeglm_result$geese$alpha, # "Estimate"
geeglm_result$geese$valpha.ajs, # "Robust Std Error"
NA, # "Robust Wald p"
NA)) # "Robust Score p"

if (use_geeasy) {
output <- rbind(output,
"correlation:alpha" = c(gee_result$geese$alpha, # "Estimate"
NA, # no estimate of "Robust Std Error" from geeasy
NA, # "Robust Wald p"
NA)) # "Robust Score p"
} else {
output <- rbind(output,
"correlation:alpha" = c(gee_result$geese$alpha, # "Estimate"
gee_result$geese$valpha.ajs, # "Robust Std Error"
NA, # "Robust Wald p"
NA)) # "Robust Score p"
}
output
}
5 changes: 5 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ knitr::opts_chunk$set(

# raoBust: Robust Rao (score) tests for Generalized Linear Models

<!-- badges: start -->
[![R-CMD-check](https://github.com/statdivlab/raoBust/workflows/R-CMD-check/badge.svg)](https://github.com/statdivlab/raoBust/actions)
[![codecov](https://codecov.io/github/statdivlab/raoBust/coverage.svg?branch=main)](https://app.codecov.io/github/statdivlab/raoBust)
<!-- badges: end -->

`raoBust`, at its core, gives you all the important information from `glm()`, but also with model misspecification-robust Rao tests (also called score tests) and Wald tests.

Robust score tests have outstanding error rate performance in small samples, **and** when your data is not drawn from a parametric family (i.e., always). It's shocking to me (Amy) how well they perform. They have a reputation for being conservative in small samples, but I would argue that this is a *very good thing*.
Expand Down
4 changes: 3 additions & 1 deletion man/gee_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 33 additions & 9 deletions tests/testthat/test-replicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ test_that("replicates work", {
cars2$batch3 <- rep(1:5, each = 10)

### fails in test unless `cars` is loaded?
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
Expand All @@ -19,12 +19,12 @@ test_that("replicates work", {
### Should be robust to ordering

expect_identical(
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
offset = wts),
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2,
id = batch1,
family=poisson(link="log"),
Expand All @@ -33,21 +33,21 @@ test_that("replicates work", {

# devtools::load_all()
expect_identical(
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2,
id = batch2,
family=poisson(link="log"),
offset = wts)
,
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2[cars2$batch2 %>% order, ],
id = batch2,
family=poisson(link="log"),
offset = wts)
)

expect_true({
is.data.frame(gee_test(dist ~ speed,
is.data.frame(gee_test(formula = dist ~ speed,
data = cars2,
id = batch1,
family=poisson(link="log"),
Expand All @@ -56,7 +56,7 @@ test_that("replicates work", {

#### check gives warning if weights provided
expect_warning(
gee_test(dist ~ speed,
gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
Expand All @@ -69,19 +69,43 @@ test_that("replicates work", {
cars3 <- rbind(cars, cars, cars)
cars3$dist <- cars3$dist + sample(-2:2, replace=TRUE, size = length(cars3$dist))
cars3$batch <- 1:length(cars$dist)
expect_gt(gee_test(dist ~ speed,
expect_gt(gee_test(formula = dist ~ speed,
data = cars3,
id = batch,
family=poisson(link="log"))[3, "Estimate"], 0.9)


#### check I've implemented score tests with clusters
expect_false(
any(is.na(gee_test(dist ~ speed,
any(is.na(gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
offset = wts)[["Robust Score p"]][1:2]))
)
})

test_that("geeasy and geepack give similar results", {
set.seed(1)
data(cars)
cars2 <- cars
cars2$wts <- log(rnorm(nrow(cars), 20))
cars2$batch1 <- rep(LETTERS[1:5], each = 10)
cars2$batch2 <- rep(LETTERS[6:10], 10)
cars2$batch3 <- rep(1:5, each = 10)

geeasy_test <- gee_test(formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
offset = wts)

geepack_test <- gee_test(use_geeasy = FALSE,
formula = dist ~ speed,
data = cars2,
id = batch3,
family=poisson(link="log"),
offset = wts)
expect_true((geeasy_test$Estimate[1] - geepack_test$Estimate[1])/geeasy_test$Estimate[1] < 0.01)
})

2 changes: 1 addition & 1 deletion tests/testthat/test-t1e-gee.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test_that("nominal level generalized score test for n = 100 under correct model

expect_silent({
for (j in 1:nsim) {
ps[j] <- gee_test(yy ~ covariate1,
ps[j] <- gee_test(formula = yy ~ covariate1,
data = simulate_null_data(),
id = batch,
family = poisson(link = "log"))[2, "Robust Score p"]
Expand Down
Loading