diff --git a/R/preprocess2.R b/R/preprocess2.R index 93832024..19c2cc7e 100644 --- a/R/preprocess2.R +++ b/R/preprocess2.R @@ -41,7 +41,9 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { # combine fixed effects in formula with main formula # note: you get a warning here if rhs = 2 is empty (no fixed effect specified via fml) formula_coef_fe <- suppressWarnings(formula(Formula::as.Formula(formula), lhs = 1, rhs = c(1, 2), collapse = TRUE)) + + formula <- formula_coef_fe if (!is.null(eval(of$fixef))) { @@ -82,11 +84,27 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { of[[1L]] <- quote(stats::model.frame) # of is a data.frame that contains all variables: depvar, X, fixed effects and clusters specified # in feols and via fe argument + of <- eval(of, parent.frame()) + # check if one of the fixed effects or cluster variables is not of + # type character or factor + fixedid <- object$fixef_vars + #to_char <- union(fixedid, cluster) + to_char <- fixedid + j <- !(sapply(data.frame(of[,c(fixedid)]), class) %in% c("character", "factor")) + if(sum(j) > 0){ + stop(paste("The fixed effects variable(s)", paste(to_char[j], collapse = " & "), "is/are not factor variables.`feols()` changes the types of fixed effects internally, but this is currently not supported in `boottest()`, but will be implemented in the future. To run the bootstrap, you will have to change the type of all fixed effects variables to factor prior to estimation with `feols()`.")) + } + # # change fixed effects variables and cluster variables to character + + # of[,to_char] <- sapply(of[,to_char], as.character) + # # sapply(of, class) + N_model <- object$nobs model_param_names <- c(names(coef(object)), object$fixef) } else if (class(object) == "felm") { + of <- object$call o <- match(c("formula", "data", "weights"), names(of), 0L) # keep only required arguments @@ -133,6 +151,21 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { # names(of$fml) <- "formula" of <- eval(of, parent.frame()) + # check if one of the fixed effects or cluster variables is not of + # type character or factor + fixedid <- names(object$fe) + #to_char <- union(fixedid, cluster) + to_char <- fixedid + j <- !(sapply(data.frame(of[,c(fixedid)]), class) %in% c("character", "factor")) + if(sum(j) > 0){ + stop(paste("The fixed effects variable(s)", paste(to_char[j], collapse = " & "), "is/are not factor variables.`lfe()` changes the types of fixed effects internally, but this is currently not supported in `boottest()`, but will be implemented in the future. To run the bootstrap, you will have to change the type of all fixed effects variables to factor prior to estimation with `felm()`.")) + } + # # change fixed effects variables and cluster variables to character + # fixedid <- names(object$fe) + # to_char <- union(fixedid, cluster) + # of[,to_char] <- sapply(of[,to_char], as.character) + # # sapply(of, class) + N_model <- object$N model_param_names <- rownames(coef(object)) } else if (class(object) == "lm") { @@ -163,6 +196,14 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { of <- eval(of, parent.frame()) # print(of) + # # set cluster variables to character + # to_char <- cluster + # of[,to_char] <- sapply(of[,to_char], as.character) + # # sapply(of, class) + + + + N_model <- length(residuals(object)) model_param_names <- names(coef(object)) @@ -226,6 +267,7 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { # Step 3: assign Y, X, weights, fixed_effects, W etc. model_frame <- of + Y <- model.response(model_frame) # check if there are no factor variables in the covariates and fixed effects after deletion of fe variable @@ -239,10 +281,12 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { # # X: need to delete clusters X <- model.matrix(formula_coef_fe, model_frame) + if (!is.null(fe)) { # note: simply update(..., -1) does not work - intercept is dropped, but all levels of other fe are kept X <- X[, -which(colnames(X) == "(Intercept)")] } + k <- dim(X)[2] weights <- as.vector(model.weights(of)) @@ -261,12 +305,13 @@ preprocess2 <- function(object, cluster, fe, param, bootcluster, na_omit) { fixed_effect_W <- fixed_effect[, 1] if (is.null(weights)) { - levels(fixed_effect_W) <- 1 / table(fixed_effect) + levels(fixed_effect_W) <- (1 / table(fixed_effect)) # because duplicate levels are forbidden } else if (!is.null(weights)) { stop("Currently, boottest() does not jointly support regression weights / WLS and fixed effects. If you want to use boottest() for inference based on WLS, please set fe = NULL.") # levels(fixed_effect_W) <- 1 / table(fixed_effect) } + W <- Matrix::Diagonal(N, as.numeric(as.character(fixed_effect_W))) n_fe <- length(unique(fixed_effect[, 1])) } diff --git a/inst/tinytest/test_bootcluster.R b/inst/tinytest/test_bootcluster.R index 54e15bf4..37fe8442 100644 --- a/inst/tinytest/test_bootcluster.R +++ b/inst/tinytest/test_bootcluster.R @@ -13,6 +13,9 @@ # ---------------------------------------------------------------------------------------------- # # Part A1: no fixed effect in model +library(lfe) +library(fixest) + lm_fit <- lm(proposition_vote ~ treatment + ideology1 + log_income + as.factor(Q1_immigration) , data = fwildclusterboot:::create_data(N = 10000, N_G1 = 20, icc1 = 0.01, N_G2 = 10, icc2 = 0.01, numb_fe1 = 10, numb_fe2 = 10, seed = 1234)) feols_fit <- feols(proposition_vote ~ treatment + ideology1 + log_income + as.factor(Q1_immigration), diff --git a/inst/tinytest/test_numeric_fe_clusters.R b/inst/tinytest/test_numeric_fe_clusters.R new file mode 100644 index 00000000..606137d0 --- /dev/null +++ b/inst/tinytest/test_numeric_fe_clusters.R @@ -0,0 +1,85 @@ +# test issue https://github.com/s3alfisc/fwildclusterboot/issues/14 +# raised by Timoth?e +# Test 1: one cluster variable is numeric +# Test 2: one fixed effect is numeric +# Test 3: both fixed effect and cluster variables are numeric + +data(voters) + +to_char <- c("Q1_immigration", "Q2_defense", "group_id1") +sapply(voters[, to_char], class) + + +# Test 1: one cluster variable is numeric +voters_1 <- voters +voters_1$Q2_defense <- as.numeric(voters_1$Q2_defense) +sapply(voters_1[, to_char], class) + +feols_fit <- feols(proposition_vote ~ treatment + log_income | Q2_defense , data = voters) +feols_fit_2 <- feols(proposition_vote ~ treatment + log_income | Q2_defense, data = voters_1) +lfe_fit <- lfe::felm(proposition_vote ~ treatment + log_income | Q2_defense, data = voters_1) + +# bootstrap inference via boottest() +# expect_no_error +boottest(feols_fit, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min') + +expect_error(boottest(feols_fit_2, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min')) + +expect_error(boottest(lfe_fit, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min')) + + + +# are results the same no matter the type of the clustering variable? + +run <- FALSE +if(run){ + voters_2 <- voters + voters_2$group_id1 <- as.numeric(voters_2$group_id1) + + feols_fit <- feols(proposition_vote ~ treatment + log_income , data = voters) + feols_fit_2 <- feols(proposition_vote ~ treatment + log_income , data = voters_2) + lfe_fit <- lfe::felm(proposition_vote ~ treatment + log_income , data = voters) + lfe_fit_2 <- lfe::felm(proposition_vote ~ treatment + log_income , data = voters_2) + + feols_boot <- + boottest(feols_fit, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min') + feols_boot_2 <- + boottest(feols_fit_2, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min') + lfe_boot <- + boottest(lfe_fit, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min') + lfe_boot_2 <- + boottest(lfe_fit_2, + clustid = c("Q1_immigration","Q2_defense"), + B = 9999, + param = "treatment", + bootcluster='min') + expect_equal(feols_boot$p_val, feols_boot_2$p_val) + expect_equal(feols_boot$p_val, lfe_boot$p_val) + expect_equal(lfe_boot$p_val, lfe_boot_2$p_val) + + +}