statdivlab · svteichman · Apr 11, 2024 · Feb 22, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,6 @@
 .Rbuildignore
 .Rproj.user
 .Rproj
+*.Rproj
+README.Rmd
+.gitignore
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,4 +18,5 @@ Imports:
     geepack, 
     magrittr,
     rlang,
-    sandwich
+    sandwich,
+    geeasy
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,6 +5,7 @@ export(gee_test)
 export(get_test_statistic)
 export(glm_test)
 export(robust_score_test)
+import(geeasy)
 import(rlang)
 importFrom(geepack,geeglm)
 importFrom(magrittr,"%>%")

diff --git a/R/gee_test.R b/R/gee_test.R
@@ -1,24 +1,31 @@
 #' Generalized Estimating Equations under technical replication with robust and non-robust Wald and Rao (score) tests
 #'
+#' @param use_geeasy When TRUE, uses `geeasy` for gee estimation, when FALSE uses `geepack`
 #' @param ... Arguments that you would pass to a regular `geepack::geeglm` call. Note that for now we only have functionality for Poisson tests with log link
 #'
 #' @importFrom sandwich sandwich
 #' @importFrom stats coef glm pnorm
 #' @importFrom rlang call_match caller_env
 #' @importFrom geepack geeglm
+#' @import geeasy
 #'
 #' @examples
 #' # TODO
 #'
 #'
 #' @export
-gee_test <- function(...) {
+gee_test <- function(use_geeasy = TRUE, ...) {
 
   cl_orig <- match.call()
+  cl_orig <- call_modify(cl_orig, use_geeasy = zap())
 
   cl <- cl_orig
-  cl[1] <- call("geeglm")
-
+  if (use_geeasy) {
+    cl[1] <- call("geelm")
+  } else {
+    cl[1] <- call("geeglm")
+  }
+
   if (is.null(cl$id)) {
     stop("Missing replicates information (`id`). If no replicates, use `glm_test()`.")
   }
@@ -34,11 +41,13 @@ gee_test <- function(...) {
 
   ## enforce robust Wald using "std.err" = "san.se"
   ## Documentation for geepack suggests bias can be substantial for sandwich se's with small number of clusters
-  ## so choose approximate jackknife as std.err="jack"
+  ## so choose approximate jackknife as std.err="jack" when using geepack 
   cl <- call_modify(cl,
                     "id" = as.factor(eval(cl$data, envir = rlang::caller_env())[[cl$id]]),
-                    "corstr" = "exchangeable",
-                    "std.err" = "jack")
+                    "corstr" = "exchangeable")
+  if (!use_geeasy) {
+    cl <- call_modify(cl, "std.err" = "jack")
+  } 
   the_reorder <- order(eval(cl$data, envir = rlang::caller_env())[[id_col]])
   cl <- call_modify(cl,
                     "data" = eval(cl$data, envir = rlang::caller_env())[the_reorder, ],
@@ -47,25 +56,25 @@ gee_test <- function(...) {
 
   ### fit the glm, ignoring warnings about non integer inputs, as we take an estimating equations mindset
   withCallingHandlers({
-    geeglm_result <- eval(cl, envir = rlang::caller_env()) ### is the issue here??
+    gee_result <- eval(cl, envir = rlang::caller_env()) 
   }, warning=function(w) {
     if (startsWith(conditionMessage(w), "non-integer x"))
       invokeRestart("muffleWarning")
   })
 
 
-  if (geeglm_result$family$family != "poisson") {
+  if (gee_result$family$family != "poisson") {
     stop(paste("Amy has only implemented this for Poisson families.\n",
-               "You requested", geeglm_result$family$family, "\n",
+               "You requested", gee_result$family$family, "\n",
                "Please open a GitHub issue if you're interested in other families."))
   }
-  if (geeglm_result$family$link != "log") {
+  if (gee_result$family$link != "log") {
     stop(paste("Amy has only implemented this for Poisson families with log link.\n",
-               "You requested link", geeglm_result$family$link, "\n",
+               "You requested link", gee_result$family$link, "\n",
                "Please open a GitHub issue if you're interested in other link functions"))
   }
 
-  output <- coef(summary(geeglm_result))[, -3]                 ## "Wald"
+  output <- coef(summary(gee_result))[, -3]                 ## "Wald"
   colnames(output)[3] <- "Robust Wald p"                ## "Pr(>|W|)"
   colnames(output)[2] <- "Robust Std Error"             ## "Std. Error"
 
@@ -75,21 +84,33 @@ gee_test <- function(...) {
   robust_score_p <- rep(NA, length = pp)
 
   for (p_marginal in 1:pp) {
-
-    robust_score_p[p_marginal] <- robust_score_test(glm_object = geeglm_result,
+    if (use_geeasy) {
+      id <- gee_result$id
+    } else {
+      id <- gee_result$geese$id
+    }
+    robust_score_p[p_marginal] <- robust_score_test(glm_object = gee_result,
                                                     call_to_model = cl,
                                                     param = p_marginal,
-                                                    id = geeglm_result$geese$id)
+                                                    id = id)
   }
 
   output <- cbind(output, "Robust Score p" = robust_score_p)
 
   output <- output[, c("Estimate","Robust Std Error", "Robust Wald p", "Robust Score p")]
-
-  output <- rbind(output,
-                  "correlation:alpha" = c(geeglm_result$geese$alpha, # "Estimate"
-                                          geeglm_result$geese$valpha.ajs, # "Robust Std Error"
-                                          NA, # "Robust Wald p"
-                                          NA)) # "Robust Score p"
+
+  if (use_geeasy) {
+    output <- rbind(output,
+                    "correlation:alpha" = c(gee_result$geese$alpha, # "Estimate"
+                                            NA, # no estimate of "Robust Std Error" from geeasy
+                                            NA, # "Robust Wald p"
+                                            NA)) # "Robust Score p"
+  } else {
+    output <- rbind(output,
+                    "correlation:alpha" = c(gee_result$geese$alpha, # "Estimate"
+                                            gee_result$geese$valpha.ajs, # "Robust Std Error"
+                                            NA, # "Robust Wald p"
+                                            NA)) # "Robust Score p"
+  }
   output
 }
diff --git a/README.Rmd b/README.Rmd
@@ -15,6 +15,11 @@ knitr::opts_chunk$set(
 
 # raoBust: Robust Rao (score) tests for Generalized Linear Models
 
+<!-- badges: start -->
+[![R-CMD-check](https://github.com/statdivlab/raoBust/workflows/R-CMD-check/badge.svg)](https://github.com/statdivlab/raoBust/actions)
+[![codecov](https://codecov.io/github/statdivlab/raoBust/coverage.svg?branch=main)](https://app.codecov.io/github/statdivlab/raoBust)
+<!-- badges: end -->
+
 `raoBust`, at its core, gives you all the important information from `glm()`, but also with model misspecification-robust Rao tests (also called score tests) and Wald tests. 
 
 Robust score tests have outstanding error rate performance in small samples, **and** when your data is not drawn from a parametric family (i.e., always). It's shocking to me (Amy) how well they perform. They have a reputation for being conservative in small samples, but I would argue that this is a *very good thing*. 

diff --git a/man/gee_test.Rd b/man/gee_test.Rd
diff --git a/tests/testthat/test-replicates.R b/tests/testthat/test-replicates.R
@@ -9,7 +9,7 @@ test_that("replicates work", {
   cars2$batch3 <- rep(1:5, each = 10)
 
   ### fails in test unless `cars` is loaded?
-  gee_test(dist ~ speed,
+  gee_test(formula = dist ~ speed,
            data = cars2,
            id = batch3,
            family=poisson(link="log"),
@@ -19,12 +19,12 @@ test_that("replicates work", {
   ### Should be robust to ordering
 
   expect_identical(
-    gee_test(dist ~ speed,
+    gee_test(formula = dist ~ speed,
              data = cars2,
              id = batch3,
              family=poisson(link="log"),
              offset = wts),
-    gee_test(dist ~ speed,
+    gee_test(formula = dist ~ speed,
              data = cars2,
              id = batch1,
              family=poisson(link="log"),
@@ -33,21 +33,21 @@ test_that("replicates work", {
 
   # devtools::load_all()
   expect_identical(
-    gee_test(dist ~ speed,
+    gee_test(formula = dist ~ speed,
              data = cars2,
              id = batch2,
              family=poisson(link="log"),
              offset = wts)
     ,
-    gee_test(dist ~ speed,
+    gee_test(formula = dist ~ speed,
              data = cars2[cars2$batch2 %>% order, ],
              id = batch2,
              family=poisson(link="log"),
              offset = wts)
   )
 
   expect_true({
-    is.data.frame(gee_test(dist ~ speed,
+    is.data.frame(gee_test(formula = dist ~ speed,
                            data = cars2,
                            id = batch1,
                            family=poisson(link="log"),
@@ -56,7 +56,7 @@ test_that("replicates work", {
 
   #### check gives warning if weights provided
   expect_warning(
-    gee_test(dist ~ speed,
+    gee_test(formula = dist ~ speed,
              data = cars2,
              id = batch3,
              family=poisson(link="log"),
@@ -69,19 +69,43 @@ test_that("replicates work", {
   cars3 <- rbind(cars, cars, cars)
   cars3$dist <- cars3$dist + sample(-2:2, replace=TRUE, size = length(cars3$dist))
   cars3$batch <- 1:length(cars$dist)
-  expect_gt(gee_test(dist ~ speed,
+  expect_gt(gee_test(formula = dist ~ speed,
                        data = cars3,
                        id = batch,
                        family=poisson(link="log"))[3, "Estimate"], 0.9)
 
 
   #### check I've implemented score tests with clusters
   expect_false(
-    any(is.na(gee_test(dist ~ speed,
+    any(is.na(gee_test(formula = dist ~ speed,
                        data = cars2,
                        id = batch3,
                        family=poisson(link="log"),
                        offset = wts)[["Robust Score p"]][1:2]))
   )
 })
 
+test_that("geeasy and geepack give similar results", {
+  set.seed(1)
+  data(cars)
+  cars2 <- cars
+  cars2$wts <- log(rnorm(nrow(cars), 20))
+  cars2$batch1 <- rep(LETTERS[1:5], each = 10)
+  cars2$batch2 <- rep(LETTERS[6:10], 10)
+  cars2$batch3 <- rep(1:5, each = 10)
+
+  geeasy_test <- gee_test(formula = dist ~ speed,
+                          data = cars2,
+                          id = batch3,
+                          family=poisson(link="log"),
+                          offset = wts)
+
+  geepack_test <- gee_test(use_geeasy = FALSE,
+                           formula = dist ~ speed,
+                           data = cars2,
+                           id = batch3,
+                           family=poisson(link="log"),
+                           offset = wts)
+  expect_true((geeasy_test$Estimate[1] - geepack_test$Estimate[1])/geeasy_test$Estimate[1] < 0.01)
+})
+
diff --git a/tests/testthat/test-t1e-gee.R b/tests/testthat/test-t1e-gee.R
@@ -19,7 +19,7 @@ test_that("nominal level generalized score test for n = 100 under correct model
 
   expect_silent({
     for (j in 1:nsim) {
-      ps[j] <- gee_test(yy ~ covariate1,
+      ps[j] <- gee_test(formula = yy ~ covariate1,
                         data = simulate_null_data(),
                         id = batch,
                         family = poisson(link = "log"))[2, "Robust Score p"]