Skip to content

Commit

Permalink
fix CI inversion bug #138, version 0.14.3
Browse files Browse the repository at this point in the history
  • Loading branch information
s3alfisc committed Sep 12, 2023
1 parent ecc6d5b commit 569ffd1
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 48 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: fwildclusterboot
Title: Fast Wild Cluster Bootstrap Inference for Linear Models
Version: 0.14.2
Version: 0.14.3
Authors@R: c(
person("Alexander", "Fischer", , "[email protected]", role = c("aut", "cre")),
person("David", "Roodman", role = "aut"),
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# fwildclusterboot 0.14.3

- Fix a bug with CI inversion when `r` was set to be close to the estimated parameter. (CI inversion failed). See [#138](https://github.com/s3alfisc/fwildclusterboot/issues/138). Thanks to Achim Zeileis & team for raising this issue!


# fwildclusterboot 0.14.2

Minor fixes for CRAN release.
Expand Down
93 changes: 56 additions & 37 deletions R/boot_algo_fastnwild.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ boot_algo_fastnwild <-
small_sample_correction,
conf_int,
maxiter,
tol,
tol,
sampling) {

#' Fast wild cluster bootstrap algorithm
#'
#' function that implements the fast bootstrap algorithm as described
Expand Down Expand Up @@ -63,11 +63,11 @@ boot_algo_fastnwild <-
#' finding procedure to find the confidence interval.
#' 10 by default.
#' @param sampling 'dqrng' or 'standard'. If 'dqrng', the 'dqrng' package is
#' used for random number generation (when available). If 'standard',
#' functions from the 'stats' package are used when available.
#' This argument is mostly a convenience to control random number generation in
#' a wrapper package around `fwildclusterboot`, `wildrwolf`.
#' I recommend to use the fast' option.
#' used for random number generation (when available). If 'standard',
#' functions from the 'stats' package are used when available.
#' This argument is mostly a convenience to control random number generation in
#' a wrapper package around `fwildclusterboot`, `wildrwolf`.
#' I recommend to use the fast' option.
#' @return A list of ...
#' @importFrom collapse fsum GRP
#' @importFrom stats as.formula coef model.matrix model.response
Expand Down Expand Up @@ -99,13 +99,13 @@ boot_algo_fastnwild <-
N_G_bootcluster <- length(unique(bootcluster[[1]]))

v <- get_weights(
type = type,
full_enumeration = full_enumeration,
N_G_bootcluster = N_G_bootcluster,
boot_iter = boot_iter,
type = type,
full_enumeration = full_enumeration,
N_G_bootcluster = N_G_bootcluster,
boot_iter = boot_iter,
sampling = sampling
)

# prepare "key" for use with collapse::fsum()
g <- collapse::GRP(bootcluster[[1]], call = FALSE)

Expand Down Expand Up @@ -143,17 +143,17 @@ boot_algo_fastnwild <-
# dim = N_G x k
Q1 <-
collapse::fsum(WX * as.vector(Q), g)

P2_bootcluster <- vec2mat(
x = as.vector(WXARP),
group_id = g$group.id
)

Q2_bootcluster <- vec2mat(
x = as.vector(WXARQ),
group_id = g$group.id
)

# P2_bootcluster <- Matrix::t(Matrix.utils::aggregate.Matrix(
# # see notes; formerly diag_XinvXXRuS_a
# Matrix::Diagonal(
Expand Down Expand Up @@ -305,28 +305,46 @@ boot_algo_fastnwild <-

if (is.null(conf_int) || conf_int == TRUE) {
# guess for standard errors
if (impose_null == TRUE) {
# should always be positive, point_estimate and t_stat need to have same
# sign, abs for security
se_guess <- abs(point_estimate / t_stat)
} else if (impose_null == FALSE) {
se_guess <- abs((point_estimate - r) / t_stat)
}


conf_int <- invert_p_val(
ABCD = ABCD,
small_sample_correction = small_sample_correction,
boot_iter = boot_iter,
point_estimate = point_estimate,
se_guess = se_guess,
clustid = clustid,
sign_level = sign_level,
vcov_sign = vcov_sign,
impose_null = impose_null,
p_val_type = p_val_type,
maxiter = maxiter,
tol = tol

#browser()
conf_int <- tryCatch({

if (impose_null == TRUE) {
# should always be positive, point_estimate and t_stat need to have same
# sign, abs for security
pval_peak <- abs(point_estimate)
} else if (impose_null == FALSE) {
pval_peak <- abs((point_estimate - r))
}

invert_p_val(
ABCD = ABCD,
small_sample_correction = small_sample_correction,
boot_iter = boot_iter,
point_estimate = point_estimate,
pval_peak = pval_peak,
clustid = clustid,
sign_level = sign_level,
vcov_sign = vcov_sign,
impose_null = impose_null,
p_val_type = p_val_type,
maxiter = maxiter,
tol = tol
)

},
error = function(e){
message("Unfortunately, confidence inversion failed due to poorly chosen starting values.",
"Please let the maintainer know about this by opening an issue on GitHub.",
"No confidence interval is computed.")
conf_int <- list(
conf_int = NA,
p_test_vals = NA,
test_vals = NA
)
conf_int

}
)
} else {
conf_int <- list(
Expand All @@ -336,6 +354,7 @@ boot_algo_fastnwild <-
)
}


res <- list(
p_val = p_val,
conf_int = conf_int$conf_int,
Expand Down
21 changes: 11 additions & 10 deletions R/invert_p_val.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @param boot_iter An integer. Number of bootstrap iterations
#' @param point_estimate A scalar. Point estimate of the coefficient
#' of interest from the regression model
#' @param se_guess A scalar vector of dimension 2. A guess of the standard
#' @param pval_peak A scalar vector of dimension 2. A guess of the standard
#' error that initiates the p-value inversion.
#' @param clustid A vector with the clusters
#' @param sign_level A numeric between 0 and 1. Sets to confidence level:
Expand All @@ -33,7 +33,7 @@ invert_p_val <- function(ABCD,
small_sample_correction,
boot_iter,
point_estimate,
se_guess,
pval_peak,
clustid,
sign_level,
vcov_sign,
Expand All @@ -42,7 +42,7 @@ invert_p_val <- function(ABCD,
tol,
maxiter) {
check_arg(point_estimate, "numeric scalar")
check_arg(se_guess, "numeric scalar")
check_arg(pval_peak, "numeric scalar")
check_arg(clustid, "data.frame")
check_arg(sign_level, "numeric scalar")
# check_arg(vcov_sign)
Expand Down Expand Up @@ -107,19 +107,20 @@ invert_p_val <- function(ABCD,

# define functions to find boundaries separately
get_start_vals <- function(point_estimate,
se_guess,
pval_peak,
sign_level,
upper) {
#' @param point_estimate the point estimate of the model
#' @param se_guess A guess for the standard deviation
#' @param pval_peak A guess for the standard deviation
#' @param sign_level the significance / 1-coverage level of the confidence
#' interval
#' @param upper logical. Should the upper or lower starting value be
#' searched for?
#' @noRd


check <- FALSE
inflate_se <- c(2^(0:100 / 2))
inflate_se <- (1:100 / 10) ** 2
len_inflate <- length(inflate_se)
j <- 1

Expand All @@ -135,10 +136,10 @@ invert_p_val <- function(ABCD,
# start guesses by taking confidence interval guess times inflation factor
if (upper == TRUE) {
starting_vals <-
as.numeric(point_estimate + inflate_se[j] * se_guess)
as.numeric(point_estimate + inflate_se[j] * pval_peak)
} else if (upper == FALSE) {
starting_vals <-
as.numeric(point_estimate - inflate_se[j] * se_guess)
as.numeric(point_estimate - inflate_se[j] * pval_peak)
}

# find starting value
Expand Down Expand Up @@ -166,7 +167,7 @@ invert_p_val <- function(ABCD,
start_vals <- lapply(c(TRUE, FALSE), function(x) {
get_start_vals(
point_estimate = point_estimate,
se_guess = se_guess,
pval_peak = pval_peak,
sign_level = sign_level,
upper = x
)
Expand Down Expand Up @@ -275,4 +276,4 @@ invert_p_val <- function(ABCD,
)

res_all
}
}

0 comments on commit 569ffd1

Please sign in to comment.