From 569ffd11494b0de1c04ca0a5c0a179b5a0e45526 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 12 Sep 2023 22:19:40 +0200 Subject: [PATCH] fix CI inversion bug #138, version 0.14.3 --- DESCRIPTION | 2 +- NEWS.md | 5 +++ R/boot_algo_fastnwild.R | 93 +++++++++++++++++++++++++---------------- R/invert_p_val.R | 21 +++++----- 4 files changed, 73 insertions(+), 48 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a91f73a1..abb9d107 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fwildclusterboot Title: Fast Wild Cluster Bootstrap Inference for Linear Models -Version: 0.14.2 +Version: 0.14.3 Authors@R: c( person("Alexander", "Fischer", , "alexander-fischer1801@t-online.de", role = c("aut", "cre")), person("David", "Roodman", role = "aut"), diff --git a/NEWS.md b/NEWS.md index c2b10fe1..91ed8afd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# fwildclusterboot 0.14.3 + +- Fix a bug with CI inversion when `r` was set to be close to the estimated parameter. (CI inversion failed). See [#138](https://github.com/s3alfisc/fwildclusterboot/issues/138). Thanks to Achim Zeileis & team for raising this issue! + + # fwildclusterboot 0.14.2 Minor fixes for CRAN release. diff --git a/R/boot_algo_fastnwild.R b/R/boot_algo_fastnwild.R index 4889a4c8..aeb8b699 100644 --- a/R/boot_algo_fastnwild.R +++ b/R/boot_algo_fastnwild.R @@ -13,9 +13,9 @@ boot_algo_fastnwild <- small_sample_correction, conf_int, maxiter, - tol, + tol, sampling) { - + #' Fast wild cluster bootstrap algorithm #' #' function that implements the fast bootstrap algorithm as described @@ -63,11 +63,11 @@ boot_algo_fastnwild <- #' finding procedure to find the confidence interval. #' 10 by default. #' @param sampling 'dqrng' or 'standard'. If 'dqrng', the 'dqrng' package is - #' used for random number generation (when available). If 'standard', - #' functions from the 'stats' package are used when available. - #' This argument is mostly a convenience to control random number generation in - #' a wrapper package around `fwildclusterboot`, `wildrwolf`. - #' I recommend to use the fast' option. + #' used for random number generation (when available). If 'standard', + #' functions from the 'stats' package are used when available. + #' This argument is mostly a convenience to control random number generation in + #' a wrapper package around `fwildclusterboot`, `wildrwolf`. + #' I recommend to use the fast' option. #' @return A list of ... #' @importFrom collapse fsum GRP #' @importFrom stats as.formula coef model.matrix model.response @@ -99,13 +99,13 @@ boot_algo_fastnwild <- N_G_bootcluster <- length(unique(bootcluster[[1]])) v <- get_weights( - type = type, - full_enumeration = full_enumeration, - N_G_bootcluster = N_G_bootcluster, - boot_iter = boot_iter, + type = type, + full_enumeration = full_enumeration, + N_G_bootcluster = N_G_bootcluster, + boot_iter = boot_iter, sampling = sampling ) - + # prepare "key" for use with collapse::fsum() g <- collapse::GRP(bootcluster[[1]], call = FALSE) @@ -143,17 +143,17 @@ boot_algo_fastnwild <- # dim = N_G x k Q1 <- collapse::fsum(WX * as.vector(Q), g) - + P2_bootcluster <- vec2mat( x = as.vector(WXARP), group_id = g$group.id ) - + Q2_bootcluster <- vec2mat( x = as.vector(WXARQ), group_id = g$group.id ) - + # P2_bootcluster <- Matrix::t(Matrix.utils::aggregate.Matrix( # # see notes; formerly diag_XinvXXRuS_a # Matrix::Diagonal( @@ -305,28 +305,46 @@ boot_algo_fastnwild <- if (is.null(conf_int) || conf_int == TRUE) { # guess for standard errors - if (impose_null == TRUE) { - # should always be positive, point_estimate and t_stat need to have same - # sign, abs for security - se_guess <- abs(point_estimate / t_stat) - } else if (impose_null == FALSE) { - se_guess <- abs((point_estimate - r) / t_stat) - } - - - conf_int <- invert_p_val( - ABCD = ABCD, - small_sample_correction = small_sample_correction, - boot_iter = boot_iter, - point_estimate = point_estimate, - se_guess = se_guess, - clustid = clustid, - sign_level = sign_level, - vcov_sign = vcov_sign, - impose_null = impose_null, - p_val_type = p_val_type, - maxiter = maxiter, - tol = tol + + #browser() + conf_int <- tryCatch({ + + if (impose_null == TRUE) { + # should always be positive, point_estimate and t_stat need to have same + # sign, abs for security + pval_peak <- abs(point_estimate) + } else if (impose_null == FALSE) { + pval_peak <- abs((point_estimate - r)) + } + + invert_p_val( + ABCD = ABCD, + small_sample_correction = small_sample_correction, + boot_iter = boot_iter, + point_estimate = point_estimate, + pval_peak = pval_peak, + clustid = clustid, + sign_level = sign_level, + vcov_sign = vcov_sign, + impose_null = impose_null, + p_val_type = p_val_type, + maxiter = maxiter, + tol = tol + ) + + }, + error = function(e){ + message("Unfortunately, confidence inversion failed due to poorly chosen starting values.", + "Please let the maintainer know about this by opening an issue on GitHub.", + "No confidence interval is computed.") + conf_int <- list( + conf_int = NA, + p_test_vals = NA, + test_vals = NA + ) + conf_int + + } ) } else { conf_int <- list( @@ -336,6 +354,7 @@ boot_algo_fastnwild <- ) } + res <- list( p_val = p_val, conf_int = conf_int$conf_int, diff --git a/R/invert_p_val.R b/R/invert_p_val.R index 80ec1648..57ca169b 100644 --- a/R/invert_p_val.R +++ b/R/invert_p_val.R @@ -7,7 +7,7 @@ #' @param boot_iter An integer. Number of bootstrap iterations #' @param point_estimate A scalar. Point estimate of the coefficient #' of interest from the regression model -#' @param se_guess A scalar vector of dimension 2. A guess of the standard +#' @param pval_peak A scalar vector of dimension 2. A guess of the standard #' error that initiates the p-value inversion. #' @param clustid A vector with the clusters #' @param sign_level A numeric between 0 and 1. Sets to confidence level: @@ -33,7 +33,7 @@ invert_p_val <- function(ABCD, small_sample_correction, boot_iter, point_estimate, - se_guess, + pval_peak, clustid, sign_level, vcov_sign, @@ -42,7 +42,7 @@ invert_p_val <- function(ABCD, tol, maxiter) { check_arg(point_estimate, "numeric scalar") - check_arg(se_guess, "numeric scalar") + check_arg(pval_peak, "numeric scalar") check_arg(clustid, "data.frame") check_arg(sign_level, "numeric scalar") # check_arg(vcov_sign) @@ -107,19 +107,20 @@ invert_p_val <- function(ABCD, # define functions to find boundaries separately get_start_vals <- function(point_estimate, - se_guess, + pval_peak, sign_level, upper) { #' @param point_estimate the point estimate of the model - #' @param se_guess A guess for the standard deviation + #' @param pval_peak A guess for the standard deviation #' @param sign_level the significance / 1-coverage level of the confidence #' interval #' @param upper logical. Should the upper or lower starting value be #' searched for? #' @noRd + check <- FALSE - inflate_se <- c(2^(0:100 / 2)) + inflate_se <- (1:100 / 10) ** 2 len_inflate <- length(inflate_se) j <- 1 @@ -135,10 +136,10 @@ invert_p_val <- function(ABCD, # start guesses by taking confidence interval guess times inflation factor if (upper == TRUE) { starting_vals <- - as.numeric(point_estimate + inflate_se[j] * se_guess) + as.numeric(point_estimate + inflate_se[j] * pval_peak) } else if (upper == FALSE) { starting_vals <- - as.numeric(point_estimate - inflate_se[j] * se_guess) + as.numeric(point_estimate - inflate_se[j] * pval_peak) } # find starting value @@ -166,7 +167,7 @@ invert_p_val <- function(ABCD, start_vals <- lapply(c(TRUE, FALSE), function(x) { get_start_vals( point_estimate = point_estimate, - se_guess = se_guess, + pval_peak = pval_peak, sign_level = sign_level, upper = x ) @@ -275,4 +276,4 @@ invert_p_val <- function(ABCD, ) res_all -} \ No newline at end of file +}