diff --git a/DESCRIPTION b/DESCRIPTION index 3fa39b5..770f487 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: did Title: Treatment Effects with Multiple Periods and Groups -Version: 2.2.0.907 +Version: 2.2.0.908 Authors@R: c(person("Brantly", "Callaway", email = "brantly.callaway@uga.edu", role = c("aut", "cre")), person("Pedro H. C.", "Sant'Anna", email="pedro.h.santanna@vanderbilt.edu", role = c("aut"))) URL: https://bcallaway11.github.io/did/, https://github.com/bcallaway11/did/ Description: The standard Difference-in-Differences (DID) setup involves two periods and two groups -- a treated group and untreated group. Many applications of DID methods involve more than two periods and have individuals that are treated at different points in time. This package contains tools for computing average treatment effect parameters in Difference in Differences setups with more than two periods and with variation in treatment timing using the methods developed in Callaway and Sant'Anna (2021) . The main parameters are group-time average treatment effects which are the average treatment effect for a particular group at a a particular time. These can be aggregated into a fewer number of treatment effect parameters, and the package deals with the cases where there is selective treatment timing, dynamic treatment effects, calendar time effects, or combinations of these. There are also functions for testing the Difference in Differences assumption, and plotting group-time average treatment effects. diff --git a/NEWS.md b/NEWS.md index 126916d..eb36319 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # did 2.2.0 - * Code improvements that made the package much faster and memory efficient + * Code improvements that made the package faster and more memory efficient * Improved automated testing and regression testing diff --git a/R/pre_process_did.R b/R/pre_process_did.R index 0946e29..71de568 100644 --- a/R/pre_process_did.R +++ b/R/pre_process_did.R @@ -52,9 +52,20 @@ pre_process_did <- function(yname, # make sure gname is numeric if (! (is.numeric(data[, gname])) ) stop("data[, gname] must be numeric") - # put in blank xformla if no covariates + # put in blank xformla if no covariates or check whether all variables are in data if (is.null(xformla)) { xformla <- ~1 + } else { + # extract variable names from the formula + formula_vars <- all.vars(xformla) + + # identify variables in xformla not in data + missing_vars <- setdiff(formula_vars, names(data)) + + # error checking for missing variables in data + if (length(missing_vars) > 0) { + stop(paste("The following variables are not in data:", paste(missing_vars, collapse = ", ")), call. = FALSE) + } } # drop irrelevant columns from data diff --git a/tests/testthat/test-user_bug_fixes.R b/tests/testthat/test-user_bug_fixes.R index 4954b20..c7ff635 100644 --- a/tests/testthat/test-user_bug_fixes.R +++ b/tests/testthat/test-user_bug_fixes.R @@ -146,3 +146,15 @@ test_that("0 pre-treatment estimates when outcomes are 0", { res_idx <- which(res$group==9 & res$t==7) expect_equal(res$att[res_idx],0) }) + +test_that("variables not in live in dataset", { + sp <- did::reset.sim(time.periods=3) + data <- build_sim_dataset(sp) + + X2 <- factor(data$cluster) + + expect_error(att_gt(yname="Y", xformla=~X2, data=data, tname="period", idname="id", control_group="notyettreated", + gname="G", est_method="dr", clustervars="cluster"), " variables are not in data") + +}) +