-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAlternative_Outcome_Approach.R
193 lines (165 loc) · 5.07 KB
/
Alternative_Outcome_Approach.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#------Bayesian IPW Modeling for Causal Inference in a Bayesian Framework-------
#-Author: A. Jordan Nafa----------------------------Created: November 18, 2021-#
#-R Version: 4.1.0----------------------------------Revised: December 22, 2021-#
# Set Project Options----
options(
digits = 4, # Significant figures output
scipen = 999, # Disable scientific notation
repos = getOption("repos")["CRAN"],
brms.backend = "cmdstanr"
)
# Load the Required libraries----
pacman::p_load(
"tidyverse",
"data.table",
"dtplyr",
"datawizard",
"brms",
"cmdstanr",
"tidybayes",
"ggdag"
)
# Load the helper functions----
source("scripts/00_Helper_Functions.R")
# Base Directory for the model objects
models_dir <- "analyses/models/"
# Base Directory for the stan files
stan_dir <- "analyses/stan/"
# Read in the IPw Matrix from the treatment model
ipw_matrix <- read_rds("data/ipw_matrix.rds")
# Load the Mosquito Nets Data
nets <- read_rds("data/mosquito_nets.rds")
#------------------------------------------------------------------------------#
#-----------------------Specifying the Outcome Model----------------------------
#------------------------------------------------------------------------------#
# We can specify the outcome model almost entirely in brms as follows. First
# We define the formula and specify priors
# Specify the formula for the outcome model----
outcome_bf <- bf(
malaria_risk | weights(weights) ~ net_num,
family = brmsfamily("gaussian", "identity") # Normal Regression Model
)
# Specify some weakly informative priors for the model parameters----
outcome_priors <-
prior("student_t(3, 0, 2.5)", class = "Intercept") +
prior("normal(0, 2.5)", class = "b")
# Next, we'll define the stanvars objects for the custom parameters
# and data needed for the IPW weights
# Defining the IPw Matrix in the data block
data_vars <- stanvar(
x = as.matrix(ipw_matrix),
name = "weights",
scode = "int<lower = 1> L;
matrix[N, L] IPW;",
block = "data"
)
# Defining the weights matrix in the model block
model_vars <- stanvar(
scode = "// Weights for the Current Iteration
int M = get_iter();
vector[N] weights = IPW[, M];",
block = "model",
position = "start"
)
# Defining the iteration update function in generated quantities
gq_vars <- stanvar(
scode = "add_iter();",
block = "genquant"
)
# Adding the iteration counter functions
fun_vars <- stanvar(
scode = "void add_iter();
int get_iter();",
block = "functions"
)
# Generate Stan Code for the Model
make_stancode(
formula = outcome_bf,
data = nets,
prior = outcome_priors,
sample_prior = "no",
data2 = list(L = 8000, weights = as.matrix(ipw_matrix)),
stanvars = c(data_vars, model_vars, gq_vars, fun_vars),
)
model_code <- "// generated with brms 2.16.2
functions {
void add_iter();
int get_iter();
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
// vector<lower=0>[N] weights; // model weights
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int prior_only; // should the likelihood be ignored?
int<lower = 1> L;
matrix[N, L] IPW;
}
transformed data {
int Kc = K - 1;
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // population-level effects
real Intercept; // temporary intercept for centered predictors
real<lower=0> sigma; // dispersion parameter
}
transformed parameters {
}
model {
// Weights for the Current Iteration
int M = get_iter();
vector[N] weights = IPW[, M];
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = Intercept + Xc * b;
for (n in 1:N) {
target += weights[n] * (normal_lpdf(Y[n] | mu[n], sigma));
}
}
// priors including constants
target += normal_lpdf(b | 0, 2.5);
target += student_t_lpdf(Intercept | 3, 0, 2.5);
target += student_t_lpdf(sigma | 3, 0, 14.8)
- 1 * student_t_lccdf(0 | 3, 0, 14.8);
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
add_iter();
}"
# Compile the stan model
ipw_outcome_model <- rstan::stan_model(
model_name = "Outcome_IPW_Model",
model_code = model_code,
allow_undefined = TRUE,
includes = paste0('\n#include "', file.path(getwd(), 'analyses/stan/iterfuns.hpp'), '"\n'),
save_dso = TRUE
)
# Define the data for the stan model
stan_data_ls <- list(
N = nrow(nets),
Y = nets$malaria_risk,
K = 2,
X = model.matrix(~ net_num, data = nets),
L = 8000,
IPW = as.matrix(ipw_matrix),
prior_only = 0
)
# fit the model using rstan
samp_outcome_model <- rstan::sampling(
ipw_outcome_model,
data = stan_data_ls,
chains = 6,
iter = 8000,
seed = 1234,
cores = 6
)
# @ todo write a brms wrapper function