-
-
Notifications
You must be signed in to change notification settings - Fork 477
/
Copy path16.3_FittingAndUnderstangingAVaryingInterceptMultilevelModelUsingRStan.R
89 lines (72 loc) · 3.17 KB
/
16.3_FittingAndUnderstangingAVaryingInterceptMultilevelModelUsingRStan.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
library(rstan)
library(ggplot2)
## Data
source("radon.data.R", echo = TRUE)
## Classical complete pooling regression
lm.pooled <- lm (y ~ x)
summary(lm.pooled)
## Classical no pooling regression
# with the constant term
lm.unpooled.0 <- lm (y ~ x + factor(county))
summary(lm.unpooled.0)
# without the constant term
lm.unpooled <- lm (y ~ x + factor(county) - 1)
summary(lm.unpooled)
## Call Stan from R
radon.data <- c("N", "J", "y", "x", "county")
# with 10 iterations
radon.1.sf <- stan(file='radon.1.stan', data=radon.data, iter = 10, chains=4)
# with 500 iterations
radon.1.sf <- stan(file='radon.1.stan', data=radon.data, iter = 500, chains=4)
plot(radon.1.sf) # to get a plot similar to Figure 16.1
print(radon.1.sf) # to display the results in the R console
## Summarizing classical and multilevel inferences graphically
# choose countries
display8 <- c(36, 1, 35, 21, 14, 71, 61, 70) # counties to be displayed
radon.ggdf <- subset(data.frame(y, x, county), county %in% display8)
radon.ggdf$county.name <- county_name[radon.ggdf$county]
radon.ggdf$county.name <- factor(radon.ggdf$county.name,
levels=county_name[display8])
# pull out parameter estimates from classical fits
a.pooled <- coef(lm.pooled)[1] # complete-pooling intercept
radon.ggdf$a.pooled <- a.pooled
radon.ggdf$b.pooled <- coef(lm.pooled)[2] # complete-pooling slope
a.nopooled <- coef(lm.unpooled)[2:(J+1)] # no-pooling vector of intercepts
radon.ggdf$a.nopooled <- a.nopooled[radon.ggdf$county]
radon.ggdf$b.nopooled <- coef(lm.unpooled)[1] # no-pooling slope
# compute medians from Stan fit
sims <- extract(radon.1.sf)
a <- sims$a
b <- sims$b
a.multilevel <- rep(NA, J)
for (j in 1:J)
a.multilevel[j] <- median(a[,j])
radon.ggdf$a.multilevel <- a.multilevel[radon.ggdf$county]
radon.ggdf$b.multilevel <- median(b)
# make the plot in Figure 12.4
p1 <- ggplot(radon.ggdf, aes(x, y)) +
geom_jitter(position = position_jitter(width = 0.05, height = 0)) +
geom_abline(aes(intercept = a.pooled, slope = b.pooled), linetype = "dashed") +
geom_abline(aes(intercept = a.nopooled, slope = b.nopooled), size = 0.25) +
geom_abline(aes(intercept = a.multilevel, slope = b.multilevel)) +
scale_x_continuous("floor", breaks=c(0,1), labels=c("0", "1")) +
ylab("log radon level") +
facet_wrap(~ county.name, ncol = 4)
print(p1)
# displaying estimates and uncertainties and plot in Figure 12.3b
a.sd <- rep(NA, J)
for (j in 1:J)
a.sd[j] <- sd(a[,j])
estimates.ggdf <- data.frame(sample.size = as.vector(table(county)),
a.multilevel, a.sd)
dev.new()
p2 <- ggplot(estimates.ggdf, aes(x = sample.size, y = a.multilevel)) +
geom_pointrange(aes(ymin = a.multilevel - a.sd, ymax = a.multilevel + a.sd),
position = position_jitter(width = 0.1, height = 0)) +
geom_hline(yintercept = a.pooled, size = 0.5) +
scale_x_continuous("sample size in country j",
trans = "log", breaks = c(1, 3, 10, 30, 100)) +
scale_y_continuous(expression(paste("intercept, ", alpha[j],
" (multilevel inference)")),
limits = c(0, 3))
print(p2)