-
-
Notifications
You must be signed in to change notification settings - Fork 477
/
Copy path5.1_LogisticRegressionWithOnePredictor.R
61 lines (50 loc) · 2.11 KB
/
5.1_LogisticRegressionWithOnePredictor.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
library(rstan)
library(ggplot2)
### Data
source("nes1992_vote.data.R", echo = TRUE)
### Logistic model: vote ~ income
data.list <- c("N", "vote", "income")
nes_logit.sf <- stan(file='nes_logit.stan', data=data.list,
iter=1000, chains=4)
print(nes_logit.sf, pars = c("beta", "lp__"))
### Figures
beta.post <- extract(nes_logit.sf, "beta")$beta
beta.mean <- colMeans(beta.post)
# Figure 5.1 (a)
len <- 20
x <- seq(1, 5, length.out = len)
y <- 1 / (1 + exp(- beta.mean[1] - beta.mean[2] * x))
nes_vote.ggdf.1 <- data.frame(x, y)
p1 <- ggplot(data.frame(income, vote), aes(x = income, y = vote)) +
geom_jitter(position = position_jitter(height = 0.04, width = 0.4)) +
geom_line(aes(x, y), data = nes_vote.ggdf.1, size = 2) +
stat_function(fun = function(x)
1 / (1 + exp(- beta.mean[1] - beta.mean[2] * x))) +
scale_x_continuous("Income", limits = c(-2, 8), breaks = seq(1, 5),
labels = c("1\n(poor)", "2", "3", "4", "5\n(rich)")) +
scale_y_continuous("Pr(Republican Vote)", limits = c(-0.05, 1.05),
breaks = seq(0, 1, 0.2))
print(p1)
# Figure 5.1 (b)
dev.new()
n <- 20
ndx <- sample(nrow(beta.post), n)
min.x <- 0.5
max.x <- 5.5
x <- seq(min.x, max.x, length.out = len)
nes_vote.ggdf.2 <- data.frame(c(), c(), c()) # empty data frame
for (i in ndx) {
y <- 1 / (1 + exp(- beta.post[i, 1] - beta.post[i, 2] * x))
nes_vote.ggdf.2 <- rbind(nes_vote.ggdf.2,
data.frame(id = rep(i, len), x, y))
}
p2 <- ggplot(data.frame(income, vote), aes(x = income, y = vote)) +
geom_jitter(position = position_jitter(height = .04, width = .4)) +
geom_line(aes(x, y, group = id), data = nes_vote.ggdf.2, alpha = 0.1) +
geom_line(aes(x, y = 1 / (1 + exp(- beta.mean[1] - beta.mean[2] * x))),
data = nes_vote.ggdf.2) +
scale_x_continuous("Income", limits = c(min.x, max.x), breaks = seq(1, 5),
labels = c("1\n(poor)", "2", "3", "4", "5\n(rich)")) +
scale_y_continuous("Pr(Republican Vote)", limits = c(-0.05, 1.05),
breaks = seq(0, 1, 0.2))
print(p2)