Skip to content

Commit

Permalink
#14 boottest() now throws an error whenever variables used as fixed e…
Browse files Browse the repository at this point in the history
…ffects in either `felm()` or `feols()` are not factor variables in the original data.

associated tests in inst/tinytest/tests_numeric_fe_clusters.R
  • Loading branch information
s3alfisc committed Sep 10, 2021
1 parent 1cb4293 commit 97b9b70
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
47 changes: 46 additions & 1 deletion R/preprocess2.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
# combine fixed effects in formula with main formula
# note: you get a warning here if rhs = 2 is empty (no fixed effect specified via fml)
formula_coef_fe <- suppressWarnings(formula(Formula::as.Formula(formula), lhs = 1, rhs = c(1, 2), collapse = TRUE))



formula <- formula_coef_fe

if (!is.null(eval(of$fixef))) {
Expand Down Expand Up @@ -82,11 +84,27 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
of[[1L]] <- quote(stats::model.frame)
# of is a data.frame that contains all variables: depvar, X, fixed effects and clusters specified
# in feols and via fe argument

of <- eval(of, parent.frame())

# check if one of the fixed effects or cluster variables is not of
# type character or factor
fixedid <- object$fixef_vars
#to_char <- union(fixedid, cluster)
to_char <- fixedid
j <- !(sapply(data.frame(of[,c(fixedid)]), class) %in% c("character", "factor"))
if(sum(j) > 0){
stop(paste("The fixed effects variable(s)", paste(to_char[j], collapse = " & "), "is/are not factor variables.`feols()` changes the types of fixed effects internally, but this is currently not supported in `boottest()`, but will be implemented in the future. To run the bootstrap, you will have to change the type of all fixed effects variables to factor prior to estimation with `feols()`."))
}
# # change fixed effects variables and cluster variables to character

# of[,to_char] <- sapply(of[,to_char], as.character)
# # sapply(of, class)

N_model <- object$nobs
model_param_names <- c(names(coef(object)), object$fixef)
} else if (class(object) == "felm") {

of <- object$call
o <- match(c("formula", "data", "weights"), names(of), 0L)
# keep only required arguments
Expand Down Expand Up @@ -133,6 +151,21 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
# names(of$fml) <- "formula"
of <- eval(of, parent.frame())

# check if one of the fixed effects or cluster variables is not of
# type character or factor
fixedid <- names(object$fe)
#to_char <- union(fixedid, cluster)
to_char <- fixedid
j <- !(sapply(data.frame(of[,c(fixedid)]), class) %in% c("character", "factor"))
if(sum(j) > 0){
stop(paste("The fixed effects variable(s)", paste(to_char[j], collapse = " & "), "is/are not factor variables.`lfe()` changes the types of fixed effects internally, but this is currently not supported in `boottest()`, but will be implemented in the future. To run the bootstrap, you will have to change the type of all fixed effects variables to factor prior to estimation with `felm()`."))
}
# # change fixed effects variables and cluster variables to character
# fixedid <- names(object$fe)
# to_char <- union(fixedid, cluster)
# of[,to_char] <- sapply(of[,to_char], as.character)
# # sapply(of, class)

N_model <- object$N
model_param_names <- rownames(coef(object))
} else if (class(object) == "lm") {
Expand Down Expand Up @@ -163,6 +196,14 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
of <- eval(of, parent.frame())
# print(of)

# # set cluster variables to character
# to_char <- cluster
# of[,to_char] <- sapply(of[,to_char], as.character)
# # sapply(of, class)




N_model <- length(residuals(object))
model_param_names <- names(coef(object))

Expand Down Expand Up @@ -226,6 +267,7 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
# Step 3: assign Y, X, weights, fixed_effects, W etc.

model_frame <- of

Y <- model.response(model_frame)

# check if there are no factor variables in the covariates and fixed effects after deletion of fe variable
Expand All @@ -239,10 +281,12 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {
#
# X: need to delete clusters
X <- model.matrix(formula_coef_fe, model_frame)

if (!is.null(fe)) {
# note: simply update(..., -1) does not work - intercept is dropped, but all levels of other fe are kept
X <- X[, -which(colnames(X) == "(Intercept)")]
}

k <- dim(X)[2]
weights <- as.vector(model.weights(of))

Expand All @@ -261,12 +305,13 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) {

fixed_effect_W <- fixed_effect[, 1]
if (is.null(weights)) {
levels(fixed_effect_W) <- 1 / table(fixed_effect)
levels(fixed_effect_W) <- (1 / table(fixed_effect)) # because duplicate levels are forbidden
} else if (!is.null(weights)) {
stop("Currently, boottest() does not jointly support regression weights / WLS and fixed effects. If you want to use
boottest() for inference based on WLS, please set fe = NULL.")
# levels(fixed_effect_W) <- 1 / table(fixed_effect)
}

W <- Matrix::Diagonal(N, as.numeric(as.character(fixed_effect_W)))
n_fe <- length(unique(fixed_effect[, 1]))
}
Expand Down
3 changes: 3 additions & 0 deletions inst/tinytest/test_bootcluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# ---------------------------------------------------------------------------------------------- #
# Part A1: no fixed effect in model

library(lfe)
library(fixest)

lm_fit <- lm(proposition_vote ~ treatment + ideology1 + log_income + as.factor(Q1_immigration) ,
data = fwildclusterboot:::create_data(N = 10000, N_G1 = 20, icc1 = 0.01, N_G2 = 10, icc2 = 0.01, numb_fe1 = 10, numb_fe2 = 10, seed = 1234))
feols_fit <- feols(proposition_vote ~ treatment + ideology1 + log_income + as.factor(Q1_immigration),
Expand Down
85 changes: 85 additions & 0 deletions inst/tinytest/test_numeric_fe_clusters.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# test issue https://github.com/s3alfisc/fwildclusterboot/issues/14
# raised by Timoth?e
# Test 1: one cluster variable is numeric
# Test 2: one fixed effect is numeric
# Test 3: both fixed effect and cluster variables are numeric

data(voters)

to_char <- c("Q1_immigration", "Q2_defense", "group_id1")
sapply(voters[, to_char], class)


# Test 1: one cluster variable is numeric
voters_1 <- voters
voters_1$Q2_defense <- as.numeric(voters_1$Q2_defense)
sapply(voters_1[, to_char], class)

feols_fit <- feols(proposition_vote ~ treatment + log_income | Q2_defense , data = voters)
feols_fit_2 <- feols(proposition_vote ~ treatment + log_income | Q2_defense, data = voters_1)
lfe_fit <- lfe::felm(proposition_vote ~ treatment + log_income | Q2_defense, data = voters_1)

# bootstrap inference via boottest()
# expect_no_error
boottest(feols_fit,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min')

expect_error(boottest(feols_fit_2,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min'))

expect_error(boottest(lfe_fit,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min'))



# are results the same no matter the type of the clustering variable?

run <- FALSE
if(run){
voters_2 <- voters
voters_2$group_id1 <- as.numeric(voters_2$group_id1)

feols_fit <- feols(proposition_vote ~ treatment + log_income , data = voters)
feols_fit_2 <- feols(proposition_vote ~ treatment + log_income , data = voters_2)
lfe_fit <- lfe::felm(proposition_vote ~ treatment + log_income , data = voters)
lfe_fit_2 <- lfe::felm(proposition_vote ~ treatment + log_income , data = voters_2)

feols_boot <-
boottest(feols_fit,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min')
feols_boot_2 <-
boottest(feols_fit_2,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min')
lfe_boot <-
boottest(lfe_fit,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min')
lfe_boot_2 <-
boottest(lfe_fit_2,
clustid = c("Q1_immigration","Q2_defense"),
B = 9999,
param = "treatment",
bootcluster='min')
expect_equal(feols_boot$p_val, feols_boot_2$p_val)
expect_equal(feols_boot$p_val, lfe_boot$p_val)
expect_equal(lfe_boot$p_val, lfe_boot_2$p_val)


}

0 comments on commit 97b9b70

Please sign in to comment.