Merge pull request #24 from svteichman/main

Add option to use pseudo-inverse in place of inverse when it is singular or computationally singular
statdivlab · Aug 2, 2024 · 5d42cd1 · 5d42cd1
2 parents afc2cda + 56855af
commit 5d42cd1
Show file tree

Hide file tree

Showing 9 changed files with 233 additions and 31 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -23,4 +23,5 @@ Imports:
     rlang,
     sandwich,
     geeasy,
-    Matrix
+    Matrix,
+    MASS
diff --git a/R/multinom_fisher_scoring.R b/R/multinom_fisher_scoring.R
@@ -12,12 +12,13 @@
 #' @param stepSize The size of the step to take during the parameter update step.
 #' @param arm_c Control parameter for checking Armijo condition.
 #' @param maxit Maximum number of iterations for Fisher scoring. Defaults to 250.
+#' @param pseudo_inv Use the pseudo-inverse of the Fisher information matrix for the update (in case the inverse in computationally singular)
 #' @return The optimal beta values under the null or alternative model.
 #'
 #' @author Shirley Mathur
 #'
 #' @export
-multinom_fisher_scoring <- function(beta, X, Y, null = TRUE, strong = FALSE, null_j = NULL, tol = 1e-5, stepSize = 0.5, arm_c = 0.5, maxit = 250) {
+multinom_fisher_scoring <- function(beta, X, Y, null = TRUE, strong = FALSE, null_j = NULL, tol = 1e-5, stepSize = 0.5, arm_c = 0.5, maxit = 250, pseudo_inv = FALSE) {
 
   # check that if strong is FALSE, j is provided 
   if (null & !strong) {
@@ -69,7 +70,18 @@ multinom_fisher_scoring <- function(beta, X, Y, null = TRUE, strong = FALSE, nul
     info_mat <- info_mat[optim_indices, optim_indices]
 
     #compute step direction
-    step_dir <- solve(info_mat) %*% score
+    if (!pseudo_inv) {
+      step_dir <- tryCatch({solve(info_mat) %*% score},
+                           error = function(cond) {
+                             print(cont)
+                             return(NA)
+                            })
+    } else {
+      step_dir <- tryCatch({solve(info_mat) %*% score},
+                           error = function(cond) {
+                             return(MASS::ginv(info_mat) %*% score)
+                           })
+    }
 
     #first get acceptable update
     accepted <- FALSE

diff --git a/R/multinom_penalized_estimation.R b/R/multinom_penalized_estimation.R
@@ -13,14 +13,15 @@
 #' @param arm_c Control parameter for checking Armijo condition, used in the MLE step.
 #' @param maxit Maximum number of iterations for augmentation algorithm. Defaults to 250.
 #' @param maxit_fs Maximum number of iterations for Fisher scoring. Defaults to 5.
+#' @param pseudo_inv Use the pseudo-inverse of the Fisher information matrix for the update (in case the inverse in computationally singular)
 #' @return The optimal beta values under the null or alternative model.
 #'
 #' @author Sarah Teichman
 #'
 #' @export
 multinom_penalized_estimation <- function(beta, X, Y, null = TRUE, strong = FALSE, null_j = NULL, 
                                           tol = 1e-5, stepSize = 0.5, arm_c = 0.5,
-                                          maxit = 250, maxit_fs = 5) {
+                                          maxit = 250, maxit_fs = 5, pseudo_inv = FALSE) {
 
   # check that if strong is FALSE, j is provided 
   if (null & !strong) {
@@ -63,7 +64,7 @@ multinom_penalized_estimation <- function(beta, X, Y, null = TRUE, strong = FALS
     beta_curr <- multinom_fisher_scoring(beta = beta_old, X = X, Y = Y_curr,
                                          null = null, strong = strong, null_j = null_j,
                                          tol = tol, stepSize = stepSize, arm_c = arm_c,
-                                         maxit = maxit_fs)
+                                         maxit = maxit_fs, pseudo_inv = pseudo_inv)
 
     # check for convergence 
     B_diff <- max(abs(beta_curr - beta_old))

diff --git a/R/multinom_test.R b/R/multinom_test.R
@@ -9,6 +9,7 @@
 #' Default is FALSE.
 #' @param j If `strong` is FALSE, this argument must be supplied. This gives the category \eqn{j} in the weak null hypothesis that \eqn{\beta_j = 0}.
 #' @param penalty If TRUE will apply a Firth penalty to estimation under the alternative and under the null. Defaults to FALSE (ask Amy her preference)
+#' @param pseudo_inv Use pseudo inverse for inverted portion of the robust score test to avoid issues with nearly singular matrices.
 #'
 #' @return The robust score test statistic for the specified hypothesis test. A list including the test statistic, p-value,
 #' estimated parameters under the null hypothesis, and estimated parameters under the alternative hypothesis.
@@ -19,7 +20,7 @@
 #'
 #' @export
 multinom_test <- function(X = NULL, Y, formula = NULL, data = NULL, 
-                          strong = FALSE, j = NULL, penalty = FALSE) {
+                          strong = FALSE, j = NULL, penalty = FALSE, pseudo_inv = FALSE) {
 
   # if X is null and formula and data are provided, get design matrix
   if (is.null(X)) {
@@ -69,9 +70,9 @@ covariates in formula must be provided.")
     beta_init <- matrix(1, nrow = p + 1, ncol = J-1)
     beta_init[(2:(p+1)), j] <- 0
     if (!penalty) {
-      beta_null1mle <- multinom_fisher_scoring(beta_init, X, Y, strong = FALSE, null_j = j)
+      beta_null1mle <- multinom_fisher_scoring(beta_init, X, Y, strong = FALSE, null_j = j, pseudo_inv = pseudo_inv)
     } else {
-      beta_null1mle <- multinom_penalized_estimation(beta_init, X, Y, strong = FALSE, null_j = j)
+      beta_null1mle <- multinom_penalized_estimation(beta_init, X, Y, strong = FALSE, null_j = j, pseudo_inv = pseudo_inv)
     }
 
     #jacobian of function of parameter h(\beta) = 0
@@ -102,13 +103,24 @@ covariates in formula must be provided.")
     the_df <- p
 
     #compute statistic!
-    T_GS<- tryCatch({as.numeric(t(S1) %*% solve(I1) %*% t(H1) %*%
-                                  (solve(H1 %*% solve(I1) %*% D1 %*% solve(I1) %*% t(H1))) %*%
-                                  H1 %*% solve(I1) %*% S1)},
-                             error = function(cond) {
-                               print(cond)
-                               return(NA)
-                               })
+    if (!pseudo_inv) {
+      T_GS<- tryCatch({as.numeric(t(S1) %*% solve(I1) %*% t(H1) %*%
+                                    (solve(H1 %*% solve(I1) %*% D1 %*% solve(I1) %*% t(H1))) %*%
+                                    H1 %*% solve(I1) %*% S1)},
+                      error = function(cond) {
+                        print(cond)
+                        return(NA)
+                      })
+    } else {
+      T_GS<- tryCatch({as.numeric(t(S1) %*% MASS::ginv(I1) %*% t(H1) %*%
+                                    (MASS::ginv(H1 %*% MASS::ginv(I1) %*% D1 %*% MASS::ginv(I1) %*% t(H1))) %*%
+                                    H1 %*% MASS::ginv(I1) %*% S1)},
+                      error = function(cond) {
+                        print(cond)
+                        return(NA)
+                      })
+    }
+
 
   } else {
     #initialize value for beta_k0 for all k = 1, \dots, J-1
@@ -124,9 +136,9 @@ covariates in formula must be provided.")
     beta_init <- matrix(0, nrow = p + 1, ncol = J-1)
     beta_init[1,] <- 1
     if (!penalty) {
-      beta_null2mle <- multinom_fisher_scoring(beta_init, X, Y, strong = TRUE)
+      beta_null2mle <- multinom_fisher_scoring(beta_init, X, Y, strong = TRUE, pseudo_inv = pseudo_inv)
     } else {
-      beta_null2mle <- multinom_penalized_estimation(beta_init, X, Y, strong = TRUE)
+      beta_null2mle <- multinom_penalized_estimation(beta_init, X, Y, strong = TRUE, pseudo_inv = pseudo_inv)
     }
 
     ## AW TODO below not needed?
@@ -158,25 +170,36 @@ covariates in formula must be provided.")
     the_df <- p*(J-1)
 
     #compute statistic!
-    T_GS <- tryCatch({as.numeric(t(S2) %*% solve(I2) %*% t(H2) %*%
-                                   (solve(H2 %*% solve(I2) %*% D2 %*% solve(I2) %*% t(H2))) %*%
-                                   H2 %*% solve(I2) %*% S2)},
-                     error = function(cond) {
-                       warning("Test statistic cannot be calculated due to the error printed above.")
-                       print(cond)
-                       return(NA)
+    if (!pseudo_inv) {
+      T_GS <- tryCatch({as.numeric(t(S2) %*% solve(I2) %*% t(H2) %*%
+                                     (solve(H2 %*% solve(I2) %*% D2 %*% solve(I2) %*% t(H2))) %*%
+                                     H2 %*% solve(I2) %*% S2)},
+                       error = function(cond) {
+                         warning("Test statistic cannot be calculated due to the error printed above.")
+                         print(cond)
+                         return(NA)
                        })
+    } else {
+      T_GS <- tryCatch({as.numeric(t(S2) %*% MASS::ginv(I2) %*% t(H2) %*%
+                                     (MASS::ginv(H2 %*% MASS::ginv(I2) %*% D2 %*% MASS::ginv(I2) %*% t(H2))) %*%
+                                     H2 %*% MASS::ginv(I2) %*% S2)},
+                       error = function(cond) {
+                         warning("Test statistic cannot be calculated due to the error printed above.")
+                         print(cond)
+                         return(NA)
+                       })
+    }
+
 
 
   }
 
   #compute mle under alternative
   beta_alt <- matrix(-0.02, nrow = p + 1, ncol = J-1)
-  mle_alt <- multinom_fisher_scoring(beta_alt, X, Y, null = FALSE)
   if (!penalty) {
-    mle_alt <- multinom_fisher_scoring(beta_alt, X, Y, null = FALSE)
+    mle_alt <- multinom_fisher_scoring(beta_alt, X, Y, null = FALSE, pseudo_inv = pseudo_inv)
   } else {
-    mle_alt <- multinom_penalized_estimation(beta_alt, X, Y, null = FALSE)
+    mle_alt <- multinom_penalized_estimation(beta_alt, X, Y, null = FALSE, pseudo_inv = pseudo_inv)
   }
 
   return(list("test_stat" = T_GS,

diff --git a/man/multinom_fisher_scoring.Rd b/man/multinom_fisher_scoring.Rd
diff --git a/man/multinom_penalized_estimation.Rd b/man/multinom_penalized_estimation.Rd
diff --git a/man/multinom_test.Rd b/man/multinom_test.Rd
diff --git a/tests/testthat/test-multinom_pseudo_inv.R b/tests/testthat/test-multinom_pseudo_inv.R
@@ -0,0 +1,152 @@
+nsim <- 100
+
+test_that("using the inverse and pseudo inverse give the same results when an inverse exists", {
+
+  nn <- 10
+  stat <- rep(NA, 50)
+  stat_alt <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, strong=TRUE)
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE, pseudo_inv = TRUE))
+    stat[sim] <- result$test_stat
+    stat_alt[sim] <- result_alt$test_stat
+  }
+
+  expect_true(all.equal(stat, stat_alt, tol = 1e-03))
+
+})
+
+test_that("the test stat with the pseudo inverse controls t1e when the inverse is computationally singular for strong hypothesis", {
+
+  nn <- 10
+  p <- rep(NA, 50)
+  p_alt <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, strong=TRUE, ms = 10)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE, pseudo_inv = TRUE))
+    p[sim] <- result$p
+    p_alt[sim] <- result_alt$p
+  }
+
+  expect_true(all.equal(p[!is.na(p)], p_alt[!is.na(p)], tol = 0.005))
+  expect_true(mean(p_alt < 0.05) < 0.05 + 1.96*sqrt(0.05 * 0.95/nsim))
+  expect_true(mean(p_alt < 0.10) < 0.1 + 1.96*sqrt(0.1 * 0.9/nsim))
+
+})
+
+test_that("the test stat with the pseudo inverse controls t1e when the inverse is computationally singular for weak hypothesis", {
+
+  # I'm having trouble coming up with times in which there are computationally singular errors
+  # for the weak null
+
+  nn <- 6
+  p <- rep(NA, 50)
+  p_alt <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, strong=FALSE, jj_null = 2, ms = 100)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    df$Y[, 4] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2, pseudo_inv = TRUE))
+    p[sim] <- result$p
+    p_alt[sim] <- result_alt$p
+  }
+
+  expect_true(all.equal(p[!is.na(p)], p_alt[!is.na(p)], tol = 0.0001))
+  expect_true(mean(p_alt < 0.05) < 0.05 + 1.96*sqrt(0.05 * 0.95/nsim))
+  expect_true(mean(p_alt < 0.10) < 0.1 + 1.96*sqrt(0.1 * 0.9/nsim))
+
+})
+
+test_that("power increases with signal for the test stat with the pseudo inverse when the inverse is computationally singular for strong hypothesis", {
+
+  nn <- 20
+  p <- rep(NA, 50)
+  p_alt <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, null = FALSE, ms = 10)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE, pseudo_inv = TRUE))
+    p[sim] <- result$p
+    p_alt[sim] <- result_alt$p
+  }
+
+  expect_true(all.equal(p[!is.na(p)], p_alt[!is.na(p)], tol = 0.05))
+
+  nn <- 20
+  p2 <- rep(NA, 50)
+  p_alt2 <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, null = FALSE, ms = 10, alt_magnitude = 3)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=TRUE, pseudo_inv = TRUE))
+    p2[sim] <- result$p
+    p_alt2[sim] <- result_alt$p
+  }
+
+  # there are some bigger differences for this set 
+  #expect_true(all.equal(p2[!is.na(p2)], p_alt2[!is.na(p2)], tol = 0.3))
+  expect_true(mean(p_alt2 < 0.05) > mean(p_alt < 0.05))
+
+})
+
+test_that("power increases with signal for the test stat with the pseudo inverse when the inverse is computationally singular for strong hypothesis", {
+
+  nn <- 20
+  p <- rep(NA, 50)
+  p_alt <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, null = FALSE, ms = 100)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    df$Y[, 4] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2, pseudo_inv = TRUE))
+    p[sim] <- result$p
+    p_alt[sim] <- result_alt$p
+  }
+
+  expect_true(all.equal(p[!is.na(p)], p_alt[!is.na(p)], tol = 0.005))
+
+  nn <- 20
+  p2 <- rep(NA, 50)
+  p_alt2 <- rep(NA, 50)
+
+  for (sim in 1:nsim) {
+    set.seed(sim)
+    df <- simulate_data_mult(nn = nn, null = FALSE, ms = 100, alt_magnitude = 3)
+    df$Y[, 1] <- 0
+    df$Y[, 3] <- 0
+    df$Y[, 4] <- 0
+    result <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2))
+    result_alt <- suppressWarnings(multinom_test(df$X, df$Y, strong=FALSE, j = 2, pseudo_inv = TRUE))
+    p2[sim] <- result$p
+    p_alt2[sim] <- result_alt$p
+  }
+
+  # there are some bigger differences for this set 
+  expect_true(all.equal(p2[!is.na(p2)], p_alt2[!is.na(p2)], tol = 0.001))
+  expect_true(mean(p_alt2 < 0.05) > mean(p_alt < 0.05))
+
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,4 +23,5 @@ Imports: @@
         rlang,
         sandwich,
         geeasy,
-        Matrix
+        Matrix,
+        MASS