add test for varnames in dataset

bcallaway11 · Nov 5, 2024 · 8a41dba · 8a41dba
1 parent 97f4486
commit 8a41dba
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 3 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: did
 Title: Treatment Effects with Multiple Periods and Groups
-Version: 2.2.0.907
+Version: 2.2.0.908
 Authors@R: c(person("Brantly", "Callaway", email = "[email protected]", role = c("aut", "cre")), person("Pedro H. C.", "Sant'Anna", email="[email protected]", role = c("aut")))
 URL: https://bcallaway11.github.io/did/, https://github.com/bcallaway11/did/
 Description: The standard Difference-in-Differences (DID) setup involves two periods and two groups -- a treated group and untreated group.  Many applications of DID methods involve more than two periods and have individuals that are treated at different points in time.  This package contains tools for computing average treatment effect parameters in Difference in Differences setups with more than two periods and with variation in treatment timing using the methods developed in Callaway and Sant'Anna (2021) <doi:10.1016/j.jeconom.2020.12.001>.  The main parameters are group-time average treatment effects which are the average treatment effect for a particular group at a a particular time.  These can be aggregated into a fewer number of treatment effect parameters, and the package deals with the cases where there is selective treatment timing, dynamic treatment effects, calendar time effects, or combinations of these.  There are also functions for testing the Difference in Differences assumption, and plotting group-time average treatment effects.

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,6 @@
 # did 2.2.0
 
-  * Code improvements that made the package much faster and memory efficient
+  * Code improvements that made the package faster and more memory efficient
 
   * Improved automated testing and regression testing
 

diff --git a/R/pre_process_did.R b/R/pre_process_did.R
@@ -52,9 +52,20 @@ pre_process_did <- function(yname,
   #  make sure gname is numeric
   if (! (is.numeric(data[, gname])) ) stop("data[, gname] must be numeric")
 
-  # put in blank xformla if no covariates
+  # put in blank xformla if no covariates or check whether all variables are in data
   if (is.null(xformla)) {
     xformla <- ~1
+  } else {
+    # extract variable names from the formula
+    formula_vars <- all.vars(xformla)
+
+    # identify variables in xformla not in data
+    missing_vars <- setdiff(formula_vars, names(data))
+
+    # error checking for missing variables in data
+    if (length(missing_vars) > 0) {
+      stop(paste("The following variables are not in data:", paste(missing_vars, collapse = ", ")), call. = FALSE)
+    }
   }
 
   # drop irrelevant columns from data

diff --git a/tests/testthat/test-user_bug_fixes.R b/tests/testthat/test-user_bug_fixes.R
@@ -146,3 +146,15 @@ test_that("0 pre-treatment estimates when outcomes are 0", {
   res_idx <- which(res$group==9 & res$t==7)
   expect_equal(res$att[res_idx],0)
 })
+
+test_that("variables not in live in dataset", {
+  sp <- did::reset.sim(time.periods=3)
+  data <- build_sim_dataset(sp)
+
+  X2  <- factor(data$cluster)
+
+  expect_error(att_gt(yname="Y", xformla=~X2, data=data, tname="period", idname="id", control_group="notyettreated",
+                      gname="G", est_method="dr", clustervars="cluster"), " variables are not in data")
+
+})
+