-
Notifications
You must be signed in to change notification settings - Fork 8
/
wage_tracker.R
139 lines (123 loc) · 4.95 KB
/
wage_tracker.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Atlanta Fed replication
library(lubridate)
library(zoo)
library(bigvis)
cps_raw <- src_mysql(dbname = "cass_cps_microdata",
host = "127.0.0.1",
user = user,
password = password) %>%
tbl("raw_microdata")
# Want only ORG
wage_tracker <- cps_raw %>%
filter(PRERELG == 1) %>%
select(date, personid, PWORWGT, PWLGWGT, PRNAGWS, PRFTLF,
PRERNHLY, PTHR, PEERNHRO, PRERNWA, PTWK, PEHRACT1, PRWERNAL, PRHERNAL,
PESEX, PRTAGE, PTDTRACE, PEHSPNON, PEEDUCA) %>%
collect()
# Now construct flows
# Month1
wage_flow <- wage_tracker %>%
mutate(date = ymd(date),
flow_date = date + years(1),
race = case_when(PEHSPNON == 1 ~ "Hispanic",
PTDTRACE == 1 ~ "White",
PTDTRACE == 2 ~ "Black",
TRUE ~ "Other"),
wage = case_when(PRHERNAL != 1 & PRERNHLY < 9999 & PRERNHLY > 0 ~ as.double(PRERNHLY),
PRWERNAL != 1 & PTWK != 1 & PEERNHRO > 0 ~ PRERNWA/PEERNHRO,
PRWERNAL != 1 & PTWK != 1 & PEHRACT1 > 0 ~ PRERNWA/PEHRACT1)) %>%
filter(date <= ymd("2018-03-01"))
# Month2 and merge
wage_flow <- wage_tracker %>%
mutate(date = ymd(date),
flow_date = date,
race = case_when(PEHSPNON == 1 ~ "Hispanic",
PTDTRACE == 1 ~ "White",
PTDTRACE == 2 ~ "Black",
TRUE ~ "Other"),
wage = case_when(PRHERNAL != 1 & PRERNHLY < 9999 & PRERNHLY > 0 ~ as.double(PRERNHLY),
PRWERNAL != 1 & PTWK != 1 & PEERNHRO > 0 ~ PRERNWA/PEERNHRO,
PRWERNAL != 1 & PTWK != 1 & PEHRACT1 > 0 ~ PRERNWA/PEHRACT1)) %>%
inner_join(wage_flow, ., by = c("flow_date", "personid"))
# Drop bad matches
wage_flow <- wage_flow %>%
filter(PESEX.x == PESEX.y,
race.x == race.y,
PRTAGE.x <= PRTAGE.y,
!PRTAGE.y > PRTAGE.x + 2,
wage.x >= 213,
wage.y >= 213)
tracker <- wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213) %>%
mutate(change = wage.y/wage.x -1,
weight = PWORWGT.y/10000) %>%
group_by(flow_date) %>%
summarize(weighted_change = weighted.median(change, weight, na.rm = T),
unweighted_change = median(change, na.rm = T))
tracker %>%
mutate(roll = rollmean(unweighted_change, 3, align = "right", na.pad = T)) %>%
filter(!is.na(roll)) %T>% View("tracker") %>%
ggplot(., aes(flow_date, roll)) + geom_line()
race_tracker <- wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213) %>%
mutate(change = wage.y/wage.x -1) %>%
group_by(flow_date, race.x) %>%
summarize(change = median(change, na.rm = T))
race_tracker %>%
group_by(race = race.x) %>%
mutate(roll = rollmean(change, 12, align = "right", na.pad = T)) %>%
filter(!is.na(roll)) %>%
ggplot(., aes(flow_date, roll, colour = race)) + geom_line()
wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213) %>%
mutate(change = wage.y/wage.x -1) %>%
group_by(flow_date, PESEX.x) %>%
summarize(change = median(change, na.rm = T)) %>%
group_by(race = factor(PESEX.x)) %>%
mutate(roll = rollmean(change, 12, align = "right", na.pad = T)) %>%
filter(!is.na(roll)) %>%
ggplot(., aes(flow_date, roll, colour = race)) + geom_line()
wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213) %>%
mutate(change = wage.y/wage.x -1,
white = case_when(race.x == "White" ~ "white",
TRUE ~ "nonwhite")) %>%
group_by(flow_date, white) %>%
summarize(change = median(change, na.rm = T)) %>%
group_by(race = factor(white)) %>%
mutate(roll = rollmean(change, 12, align = "right", na.pad = T),
roll = signif(roll*100, 2)) %>%
filter(!is.na(roll)) %>%
ggplot(., aes(flow_date, roll, colour = race)) + geom_line()
wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213,
PRTAGE.x >= 25) %>%
mutate(change = wage.y/wage.x -1,
ed = case_when(PEEDUCA.y <= 39 ~ "HS",
PEEDUCA.y %in% 40:42 ~ "some college",
PEEDUCA.y >= 43 ~ 'BA')) %>%
group_by(flow_date, ed) %>%
summarize(change = median(change, na.rm = T)) %>%
group_by(race = ed) %>%
mutate(roll = rollmean(change, 12, align = "right", na.pad = T),
roll = signif(roll*100, 2)) %>%
filter(!is.na(roll)) %>%
ggplot(., aes(flow_date, roll, colour = race)) + geom_line()
wage_flow %>%
filter(wage.x >= 213,
wage.y >= 213) %>%
mutate(change = wage.y/wage.x -1,
age = cut(PRTAGE.x, breaks = c(0, 24, 54, 100),
labels = c("young", "prime", "older"))) %>%
group_by(flow_date, age) %>%
summarize(change = median(change, na.rm = T)) %>%
group_by(race = age) %>%
mutate(roll = rollmean(change, 12, align = "right", na.pad = T),
roll = signif(roll*100, 2)) %>%
filter(!is.na(roll)) %>%
ggplot(., aes(flow_date, roll, colour = race)) + geom_line()