-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmedoutcon.R
344 lines (335 loc) · 14.9 KB
/
medoutcon.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#' Efficient estimation of natural and interventional (in)direct effects
#'
#' @param W A \code{matrix}, \code{data.frame}, or similar object corresponding
#' to a set of baseline covariates.
#' @param A A \code{numeric} vector corresponding to a treatment variable. The
#' parameter of interest is defined as a location shift of this quantity.
#' @param Z A \code{numeric} vector corresponding to an intermediate confounder
#' affected by treatment (on the causal pathway between the intervention A,
#' mediators M, and outcome Y, but unaffected itself by the mediators). When
#' set to \code{NULL}, the natural (in)direct effects are estimated.
#' @param R A \code{logical} vector indicating whether a sampled observation's
#' mediator was measured via a two-phase sampling design. Defaults to a
#' vector of ones, indicating that two-phase sampling was not performed.
#' @param M A \code{numeric} vector, \code{matrix}, \code{data.frame}, or
#' similar corresponding to a set of mediators (on the causal pathway between
#' the intervention A and the outcome Y).
#' @param Y A \code{numeric} vector corresponding to an outcome variable.
#' @param obs_weights A \code{numeric} vector of observation-level weights. The
#' default is to give all observations equal weighting.
#' @param svy_weights A \code{numeric} vector of observation-level weights that
#' have been computed externally, such as survey sampling weights. Such
#' weights are used in the construction of re-weighted efficient estimators.
#' @param two_phase_weights A \code{numeric} vector of known observation-level
#' weights corresponding to the inverse probability of the mediator being
#' measured. Defaults to a vector of ones.
#' @param effect A \code{character} indicating whether to compute the direct or
#' the indirect effect as discussed in <https://arxiv.org/abs/1912.09936>.
#' This is ignored when the argument \code{contrast} is provided. By default,
#' the direct effect is estimated.
#' @param contrast A \code{numeric} double indicating the two values of the
#' intervention \code{A} to be compared. The default value of \code{NULL} has
#' no effect, as the value of the argument \code{effect} is instead used to
#' define the contrasts. To override \code{effect}, provide a \code{numeric}
#' double vector, giving the values of a' and a*, e.g., \code{c(0, 1)}.
#' @param g_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a model for the propensity score.
#' @param h_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a model for a parameterization of the
#' propensity score that conditions on the mediators.
#' @param b_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a model for the outcome regression.
#' @param q_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a model for a nuisance regression of
#' the intermediate confounder, conditioning on the treatment and potential
#' baseline covariates.
#' @param r_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a model for a nuisance regression of
#' the intermediate confounder, conditioning on the mediators, the treatment,
#' and potential baseline confounders.
#' @param u_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a pseudo-outcome regression required
#' for in the efficient influence function.
#' @param v_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit a pseudo-outcome regression required
#' for in the efficient influence function.
#' @param d_learners A \code{\link[sl3]{Stack}} object, or other learner class
#' (inheriting from \code{\link[sl3]{Lrnr_base}}), containing instantiated
#' learners from \pkg{sl3}; used to fit an initial efficient influence
#' function regression when computing the efficient influence function in a
#' two-phase sampling design.
#' @param estimator The desired estimator of the direct or indirect effect (or
#' contrast-specific parameter) to be computed. Both an efficient one-step
#' estimator using cross-fitting and a cross-validated targeted minimum loss
#' estimator (TMLE) are available. The default is the TML estimator.
#' @param estimator_args A \code{list} of extra arguments to be passed (via
#' \code{...}) to the function call for the specified estimator. The default
#' is chosen so as to allow the number of folds used to compute the one-step
#' or TML estimators to be easily adjusted. In the case of the TML estimator,
#' the number of update (fluctuation) iterations is limited, and a tolerance
#' is included for the updates introduced by tilting (fluctuation) models.
#' @param g_bounds A \code{numeric} vector containing two values, the first
#' being the minimum allowable estimated propensity score value and the
#' second being the maximum allowable for estimated propensity scores.
#' Defaults to \code{c(0.001, 0.999)}.
#'
#' @importFrom data.table as.data.table setnames set
#' @importFrom sl3 Lrnr_glm_fast Lrnr_hal9001
#' @importFrom stats var
#'
#' @export
#'
#' @examples
#' # here, we show one-step and TML estimates of the interventional direct
#' # effect; the indirect effect can be evaluated by a straightforward change
#' # to the penultimate argument. the natural direct and indirect effects can
#' # be evaluated by omitting the argument Z (inappropriate in this example).
#' # create data: covariates W, exposure A, post-exposure-confounder Z,
#' # mediator M, outcome Y
#' n_obs <- 200
#' w_1 <- rbinom(n_obs, 1, prob = 0.6)
#' w_2 <- rbinom(n_obs, 1, prob = 0.3)
#' w <- as.data.frame(cbind(w_1, w_2))
#' a <- as.numeric(rbinom(n_obs, 1, plogis(rowSums(w) - 2)))
#' z <- rbinom(n_obs, 1, plogis(rowMeans(-log(2) + w - a) + 0.2))
#' m_1 <- rbinom(n_obs, 1, plogis(rowSums(log(3) * w + a - z)))
#' m_2 <- rbinom(n_obs, 1, plogis(rowSums(w - a - z)))
#' m <- as.data.frame(cbind(m_1, m_2))
#' y <- rbinom(n_obs, 1, plogis(1 / (rowSums(w) - z + a + rowSums(m))))
#'
#' # one-step estimate of the interventional direct effect
#' os_de <- medoutcon(
#' W = w, A = a, Z = z, M = m, Y = y,
#' effect = "direct",
#' estimator = "onestep"
#' )
#'
#' # TML estimate of the interventional direct effect
#' # NOTE: improved variance estimate and de-biasing from targeting procedure
#' tmle_de <- medoutcon(
#' W = w, A = a, Z = z, M = m, Y = y,
#' effect = "direct",
#' estimator = "tmle"
#' )
medoutcon <- function(W,
A,
Z,
M,
Y,
R = rep(1, length(Y)),
obs_weights = rep(1, length(Y)),
svy_weights = NULL,
two_phase_weights = rep(1, length(Y)),
effect = c("direct", "indirect", "pm"),
contrast = NULL,
g_learners = sl3::Lrnr_glm_fast$new(),
h_learners = sl3::Lrnr_glm_fast$new(),
b_learners = sl3::Lrnr_glm_fast$new(),
q_learners = sl3::Lrnr_glm_fast$new(),
r_learners = sl3::Lrnr_glm_fast$new(),
u_learners = sl3::Lrnr_hal9001$new(),
v_learners = sl3::Lrnr_hal9001$new(),
d_learners = sl3::Lrnr_glm_fast$new(),
estimator = c("tmle", "onestep"),
estimator_args = list(
cv_folds = 5L, max_iter = 5L,
tiltmod_tol = 5
),
g_bounds = c(0.01, 0.99)) {
# set defaults
estimator <- match.arg(estimator)
estimator_args <- unlist(estimator_args, recursive = FALSE)
est_args_os <- estimator_args[names(estimator_args) %in%
names(formals(est_onestep))]
est_args_tmle <- estimator_args[names(estimator_args) %in%
names(formals(est_tml))]
# set constant Z for estimation of the natural (in)direct effects
if (is.null(Z)) {
Z <- rep(1, length(Y))
effect_type <- "natural"
} else {
effect_type <- "interventional"
}
# construct input data structure
data <- data.table::as.data.table(cbind(
Y, M, R, Z, A, W, obs_weights,
two_phase_weights
))
w_names <- paste("W", seq_len(dim(data.table::as.data.table(W))[2]),
sep = "_"
)
m_names <- paste("M", seq_len(dim(data.table::as.data.table(M))[2]),
sep = "_"
)
data.table::setnames(data, c(
"Y", m_names, "R", "Z", "A", w_names,
"obs_weights", "two_phase_weights"
))
# bound outcome Y in unit interval
min_y <- min(data[["Y"]])
max_y <- max(data[["Y"]])
data.table::set(data, j = "Y", value = scale_to_unit(data[["Y"]]))
# need to loop over different contrasts to construct direct/indirect effects
if (is.null(contrast)) {
if (effect != "pm") {
# select appropriate component for direct vs indirect effects
is_effect_direct <- (effect == "direct")
contrast_grid <- list(switch(2 - is_effect_direct,
c(0, 0),
c(1, 1)
))
# term needed in the decomposition for both effects
contrast_grid[[2]] <- c(1, 0)
} else {
contrast_grid <- list(
c(1, 1), c(0, 0), c(1, 0)
)
}
} else {
# otherwise, simply estimate for the user-given contrast
contrast_grid <- list(contrast)
effect <- NULL
}
est_params <- lapply(contrast_grid, function(contrast) {
if (estimator == "onestep") {
# EFFICIENT ONE-STEP ESTIMATOR
onestep_est_args <- list(
data = data,
contrast = contrast,
g_learners = g_learners,
h_learners = h_learners,
b_learners = b_learners,
q_learners = q_learners,
r_learners = r_learners,
u_learners = u_learners,
v_learners = v_learners,
d_learners = d_learners,
w_names = w_names,
m_names = m_names,
y_bounds = c(min_y, max_y),
effect_type = effect_type,
svy_weights = svy_weights,
g_bounds = g_bounds
)
onestep_est_args <- unlist(list(onestep_est_args, est_args_os),
recursive = FALSE
)
est_out <- do.call(est_onestep, onestep_est_args)
} else if (estimator == "tmle") {
# TARGETED MINIMUM LOSS ESTIMATOR
tmle_est_args <- list(
data = data,
contrast = contrast,
g_learners = g_learners,
h_learners = h_learners,
b_learners = b_learners,
q_learners = q_learners,
r_learners = r_learners,
u_learners = u_learners,
v_learners = v_learners,
d_learners = d_learners,
w_names = w_names,
m_names = m_names,
y_bounds = c(min_y, max_y),
effect_type = effect_type,
svy_weights = svy_weights,
g_bounds = g_bounds
)
tmle_est_args <- unlist(list(tmle_est_args, est_args_tmle),
recursive = FALSE
)
est_out <- do.call(est_tml, tmle_est_args)
}
# lazily create output as classed list
est_out$outcome <- as.numeric(Y)
class(est_out) <- "medoutcon"
return(est_out)
})
# put effects together
if (is.null(contrast) && (effect == "direct")) {
# compute parameter estimate, influence function, and variances
de_theta_est <- est_params[[2]]$theta - est_params[[1]]$theta
de_eif_est <- est_params[[2]]$eif - est_params[[1]]$eif
de_var_est <- stats::var(de_eif_est) / nrow(data)
# construct output in same style as for contrast-specific parameter
de_est_out <- list(
theta = de_theta_est,
var = de_var_est,
eif = de_eif_est,
type = estimator,
param = paste("direct", effect_type, sep = "_"),
outcome = as.numeric(Y)
)
class(de_est_out) <- "medoutcon"
return(de_est_out)
} else if (is.null(contrast) && (effect == "indirect")) {
# compute parameter estimate, influence function, and variances
ie_theta_est <- est_params[[1]]$theta - est_params[[2]]$theta
ie_eif_est <- est_params[[1]]$eif - est_params[[2]]$eif
ie_var_est <- stats::var(ie_eif_est) / nrow(data)
# construct output in same style as for contrast-specific parameter
ie_est_out <- list(
theta = ie_theta_est,
var = ie_var_est,
eif = ie_eif_est,
type = estimator,
param = paste("indirect", effect_type, sep = "_"),
outcome = as.numeric(Y)
)
class(ie_est_out) <- "medoutcon"
return(ie_est_out)
} else if (is.null(contrast) && (effect == "pm")) {
# compute parameter estimate, influence function, and variances
pm_theta_est <- 1 - log(est_params[[3]]$theta / est_params[[2]]$theta) /
log(est_params[[1]]$theta / est_params[[2]]$theta)
pm_eif_est <- -est_params[[3]]$eif /
(est_params[[3]]$theta_plugin * log(est_params[[1]]$theta_plugin /
est_params[[2]]$theta_plugin)) +
est_params[[2]]$eif * (
(log(est_params[[1]]$theta_plugin / est_params[[2]]$theta_plugin) -
log(est_params[[3]]$theta_plugin / est_params[[2]]$theta_plugin)) /
(est_params[[2]]$theta_plugin *
(log(est_params[[1]]$theta_plugin /
est_params[[2]]$theta_plugin))^2)) +
est_params[[1]]$eif * log(est_params[[3]]$theta_plugin /
est_params[[2]]$theta_plugin) /
(est_params[[1]]$theta_plugin * (log(est_params[[1]]$theta_plugin /
est_params[[2]]$theta_plugin))^2)
pm_var_est <- stats::var(pm_eif_est) / nrow(data)
# construct output in same style as for contrast-specific parameter
pm_est_out <- list(
theta = pm_theta_est,
var = pm_var_est,
eif = pm_eif_est,
type = estimator,
param = paste("pm", effect_type, sep = "_"),
outcome = as.numeric(Y),
contrast_results = list(
contrast_1_1_mean = est_params[[1]]$theta,
contrast_1_1_eif = est_params[[1]]$eif,
contrast_0_0_mean = est_params[[2]]$theta,
contrast_0_0_eif = est_params[[2]]$eif,
contrast_1_0_mean = est_params[[3]]$theta,
contrast_1_0_eif = est_params[[3]]$eif
)
)
class(pm_est_out) <- "medoutcon"
return(pm_est_out)
} else {
est_out <- unlist(est_params, recursive = FALSE)
est_out$param <- paste0(
"tsm(", "a'=", contrast[1], ",",
"a*=", contrast[2], ")"
)
est_out$.contrast <- contrast
class(est_out) <- "medoutcon"
return(est_out)
}
}