Merge pull request #100 from svteichman/return_score_stat_info

add option to return score test components
statdivlab · Nov 22, 2024 · a7de668 · a7de668
2 parents 91336cf + 4acedce
commit a7de668
Show file tree

Hide file tree

Showing 7 changed files with 66 additions and 11 deletions.
diff --git a/R/emuFit.R b/R/emuFit.R
@@ -78,6 +78,7 @@
 #' @param trackB logical: should values of B be recorded across optimization
 #' iterations and be returned? Primarily used for debugging. Default is FALSE.
 #' @param return_nullB logical: should values of B under null hypothesis be returned. Primarily used for debugging. Default is FALSE. 
+#' @param return_score_components logical: should components of score statistic be returned? Primarily used for debugging. Default is FALSE.
 #' @param return_both_score_pvals logical: should score p-values be returned using both
 #' information matrix computed from full model fit and from null model fits? Default is
 #' FALSE. This parameter is used for simulations - in any applied analysis, type of
@@ -156,6 +157,7 @@ emuFit <- function(Y,
                    max_step = 1,
                    trackB = FALSE,
                    return_nullB = FALSE,
+                   return_score_components = FALSE,
                    return_both_score_pvals = FALSE,
                    remove_zero_comparison_pvals = 0.01,
                    unobserved_taxon_error = TRUE) {
@@ -485,6 +487,10 @@ and the corresponding gradient function to constraint_grad_fn.")
                                          gap = NA,
                                          converged = NA)
 
+    if (return_score_components) {
+      score_components <- vector(mode = "list", length = nrow(test_kj))
+    }
+
     if (return_both_score_pvals) {
       colnames(coefficients)[colnames(coefficients) == "pval"] <-
         "score_pval_full_info"
@@ -545,6 +551,10 @@ and the corresponding gradient function to constraint_grad_fn.")
                                 return_both_score_pvals = return_both_score_pvals,
                                 cluster = cluster)
 
+      if (return_score_components & !(is.null(test_result))) {
+        score_components[[test_ind]] <- test_result$score_pieces
+      }
+
       if (is.null(test_result)) {
         if (return_nullB) {
           nullB_list[[test_ind]] <- NA
@@ -593,7 +603,7 @@ and the corresponding gradient function to constraint_grad_fn.")
                                            p = p, 
                                            I_inv=I_inv,
                                            Dy = just_wald_things$Dy,
-                                           cluster = cluster)
+                                           cluster = cluster)$score_stat
 
 
           which_row <- which((as.numeric(coefficients$k) == as.numeric(test_kj$k[test_ind]))&
@@ -745,6 +755,9 @@ and the corresponding gradient function to constraint_grad_fn.")
       warning("Optimization for estimation under the null for robust score tests failed to converge for some tests. See 'null_estimation_unconverged' within the returned emuFit object for which tests are affected by this.")
     }
   }
+  if (run_score_tests & return_score_components) {
+    results$score_components <- score_components
+  }
 
   return(structure(results, class = "emuFit"))
 }

diff --git a/R/get_score_stat.R b/R/get_score_stat.R
@@ -105,5 +105,5 @@ get_score_stat <- function(Y,
   inside <- Matrix::crossprod(I_inv_H, Dy) %*% I_inv_H
   score_stat <- as.numeric(as.matrix(outside^2/inside))*score_adj
 
-  return(as.numeric(score_stat))
+  return(list(score_stat = as.numeric(score_stat), outside = outside, inside = inside, score = score))
 }
diff --git a/R/score_test.R b/R/score_test.R
@@ -186,7 +186,7 @@ retrying with smaller penalty scaling parameter tau and larger inner_maxit.")
     #indexes in long format corresponding to the j_constr-th col of B
   #get score stat
   indexes_to_remove <- (j_ref - 1)*p + 1:p
-  score_stat <- try(
+  score_res <- try(
     get_score_stat(Y = Y,
                  X_cup = X_cup,
                  X = X,
@@ -202,6 +202,11 @@ retrying with smaller penalty scaling parameter tau and larger inner_maxit.")
                  I_inv = I_inv,
                  Dy = Dy,
                  cluster = cluster))
+  if (inherits(score_res, "try-error")) {
+    score_stat <- score_res
+  } else {
+    score_stat <- score_res$score_stat
+  }
 
   if(!return_both_score_pvals){ #typically we want only one score p-value
                                 #(using only one version of information matrix)
@@ -210,6 +215,7 @@ retrying with smaller penalty scaling parameter tau and larger inner_maxit.")
       score_stat <- NA
     }
   return(list("score_stat" = score_stat,
+              "score_pieces" = score_res, 
               "pval" = pchisq(score_stat,1,lower.tail = FALSE),
               "log_pval" = pchisq(score_stat,1,lower.tail = FALSE, log.p = TRUE),
               "niter" = constrained_fit$niter,
@@ -226,7 +232,7 @@ retrying with smaller penalty scaling parameter tau and larger inner_maxit.")
   } else{ 
     #for simulations -- if we want to return both the score p-value using
     #information from full model fit and from null model
-      score_stat_with_null_info <-
+      score_res_with_null_info <-
         get_score_stat(Y = Y,
                        X_cup = X_cup,
                        X = X,
@@ -241,17 +247,23 @@ retrying with smaller penalty scaling parameter tau and larger inner_maxit.")
                        p = p,
                        I_inv = NULL,
                        Dy = Dy)
-
+      if (inherits(score_res_with_null_info, "try-error")) {
+        score_stat_with_null_info <- score_res_with_null_info
+      } else {
+        score_stat_with_null_info <- score_res_with_null_info$score_stat
+      }
       score_stat_with_null_info <- score_stat_with_null_info
       if (inherits(score_stat_with_null_info, "try-error")) {
         warning("one of the score statistics for test of k = ", k_constr, " and j = ", j_constr, " cannot be computed, likely because the information matrix is computationally singular.")
         score_stat_with_null_info <- NA
       }
 
       return(list("score_stat" = score_stat,
+                  "score_pieces" = score_res,
                   "pval" = pchisq(score_stat,1,lower.tail = FALSE),
                   "log_pval" = pchisq(score_stat,1,lower.tail = FALSE, log.p = TRUE),
                   "score_stat_null_info" = score_stat_with_null_info,
+                  "score_pieces_null_info" = score_res_with_null_info,
                   "pval_null_info" = pchisq(score_stat_with_null_info,1,lower.tail = FALSE),
                   "log_pval_null_info" = pchisq(score_stat_with_null_info,1,lower.tail = FALSE,log.p = TRUE),
                   "niter" = constrained_fit$niter,

diff --git a/man/emuFit.Rd b/man/emuFit.Rd
diff --git a/tests/testthat/test-emuFit.R b/tests/testthat/test-emuFit.R
@@ -640,3 +640,30 @@ test_that("emuFit works with fitted objects passed in", {
   })
 
 })
+
+test_that("emuFit has 'score_components' object when `return_score_componments = T`", {
+  mod <- emuFit(Y = Y,
+                X = X,
+                verbose = FALSE,
+                B_null_tol = 1e-2,
+                tolerance = 0.01,
+                tau = 2,
+                return_wald_p = FALSE,
+                compute_cis = FALSE,
+                run_score_tests = TRUE, 
+                test_kj = data.frame(k = 2, j = 1),
+                return_score_components = T)
+  expect_true("score_components" %in% names(mod))
+  mod <- emuFit(Y = Y,
+                X = X,
+                verbose = FALSE,
+                B_null_tol = 1e-2,
+                tolerance = 0.01,
+                tau = 2,
+                return_wald_p = FALSE,
+                compute_cis = FALSE,
+                run_score_tests = TRUE, 
+                test_kj = data.frame(k = 2, j = 1),
+                return_score_components = F)
+  expect_false("score_components" %in% names(mod))
+})
diff --git a/tests/testthat/test-get_score_stat.R b/tests/testthat/test-get_score_stat.R
@@ -96,7 +96,7 @@ test_that("Robust score statistic is invariant to reference taxon", {
                  n = n,
                  p = p,
                  I = NULL,
-                 Dy = NULL)
+                 Dy = NULL)$score_stat
     )
 
   expect_true(sd(score_stats)<1e-5)
@@ -193,7 +193,7 @@ under null when Poisson assumption is met", {
                  c1 = c1,
                  maxit = maxit,
                  inner_maxit = inner_maxit,
-                 verbose = FALSE)
+                 verbose = FALSE)$score_stat
 
     j_ref <- 5
 
@@ -212,7 +212,7 @@ under null when Poisson assumption is met", {
                                        check_influence = FALSE,
                                        I = NULL,
                                        Dy = NULL,
-                                       model_based = FALSE)
+                                       model_based = FALSE)$score_stat
 
     x <- seq(-10,5,0.01)
     # hist(log(score_stats[1:sim]),breaks = 20,freq = FALSE)
@@ -329,7 +329,7 @@ test_that("model-based score statistic is invariant to reference taxon", {
                                        n = n,
                                        p = p,
                                        I = NULL,
-                                       Dy = NULL)
+                                       Dy = NULL)$score_stat
     })
 
    expect_true(sd(score_stats)<1e-8)

diff --git a/tests/testthat/test-score_test.R b/tests/testthat/test-score_test.R
@@ -87,7 +87,7 @@ we do *not* get same results if we use incorrect info", {
                      J = J,
                      n = n,
                      p = p,
-                     I_inv = I_inv)
+                     I_inv = I_inv)$score_stat
 
     score_stat_with_other_mat <-
       get_score_stat(Y = Y_aug,
@@ -102,7 +102,7 @@ we do *not* get same results if we use incorrect info", {
                      J = J,
                      n = n,
                      p = p,
-                     I_inv = diag(rep(1,18)))
+                     I_inv = diag(rep(1,18)))$score_stat
 
     expect_equal(score_stat_with_I_inv,score_test_as_is$score_stat)
     expect_true(score_stat_with_other_mat !=score_test_as_is$score_stat)