-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrIPFP - Map_CES_COICOP.R
172 lines (129 loc) · 7.93 KB
/
rIPFP - Map_CES_COICOP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Load a table with code number and item names used for our DB
# Issue: BRA - Need to consolidate survey 'code'
# - There are non-food items in food DB (e.g. Coal).
# ZAF - Need to consolidate survey 'code'
#############
### India ###
#############
### Read code-item name in DB mapping for IND
# Theoretically ITEM_DLE could be identical to Surv_Heading, but ITEM_DLE is somehow modified.
wb <- XLConnect::loadWorkbook("../Bridging/CES-COICOP/IND NSS 68 2011-2012 CE Codes.xlsx")
code_item_IND <- XLConnect::readWorksheet(wb, "NSS68", header=TRUE, forceConversion=T)
names(code_item_IND) <- c("CODE", "ITEM_DLE", "UNIT")
code_item_IND$UNIT <- NULL
# code_item_IND <- code_item_IND[match(unique(code_item_IND$ITEM_DLE), code_item_IND$ITEM_DLE),] # There are duplicate ITEM_DLEs because they are input in two units (e.g. weight & expenditure)
### Fix WB mis-mappings
# WB assigned "CES 291: Biscuits, chocolates, etc" to "ICP 2: UNBR Food".
# Sending it to "ICP 36:Confectionery, chocolate and ice cream"
IND_WB$ICP_SEQ[IND_WB$CODE==291] <- 36
# WB assigned "CES 295: sauce, jam, jelly (gm)".
# Sending it to "ICP 35:Jams, marmalades and honey"
IND_WB$ICP_SEQ[IND_WB$CODE==295] <- 35
# WB assigned "CES 196 "others: birds, crab, oyster, tortoise, etc. to "ICP 9: UNBR Meat".
# Sending it to "ICP 14:Other meats and meat preparations"
IND_WB$ICP_SEQ[IND_WB$CODE==196] <- 14
# WB assigned "CES 274-276" to "ICP 39: Coffee, tea and cocoa".
# They should be "ICP 40: Mineral waters, soft drinks, fruit and vegetable juices"
# 274 "mineral water (litre)"
# 275 "cold beverages: bottled/canned (litre)"
# 276 "fruit juice and shake (litre)"
IND_WB$ICP_SEQ[IND_WB$CODE>=274 & IND_WB$CODE<=276] <- 40
# WB assigned "CES 334/335: Kerosene" to "ICP 64: Gas".
# They should be "ICP 65: Other fuel"
IND_WB$ICP_SEQ[IND_WB$CODE %in% 334:335] <- 65
# WB assigned "CES 435/437" to "ICP 131: Cultural service".
# They should be "ICP 119: Audio-visual, photographic and information processing equipment"
# 435 "photography"
# 436 "VCD/ DVD hire (incl. instrument)"
# 437 "cable TV"
IND_WB$ICP_SEQ[IND_WB$CODE %in% 435:437] <- 119
### Linking NTNU COICOP with WB ICP
# XXX_WB has original survey code, ICP seq number, and survey heading (except for BRA where 'heading' is missing).
# icp_ntnu is for ICP seq number, NTNU-COICOP seq number, and ICP heading.
IND_map <- merge(IND_WB, icp_ntnu, by="ICP_SEQ")
IND_map <- IND_map[c("CODE", "Surv_Heading", "ICP_SEQ", "COICOP1", "COICOP2", "ICP_Heading", "NTNU_109")]
# IND_map$ORD <- 1:dim(IND_map)[1] # Is this necessary?
IND_map <- merge(IND_map, code_item_IND, by="CODE")
IND_map <- IND_map[order(IND_map$CODE),]
### Need to remove fuel items for direct link CES-EXIO
IND_map <- IND_map %>%
filter(!((COICOP1 == "04" & COICOP2 > "50") |
(COICOP1 == "07" & COICOP2 == "22" &
!grepl("Lubri", Surv_Heading, ignore.case = TRUE))))
### Create CES to ICP bridge matrices
CES_ICP_IND <- matrix(0, length(IND_map$CODE), max(IND_map$ICP_SEQ))
CES_ICP_IND[cbind(1:length(IND_map$CODE), IND_map$ICP_SEQ)] <- 1
row.names(CES_ICP_IND) <- IND_map$CODE
#################
### Indonesia ###
#################
### Read code-item mapping for IDN
wb <- XLConnect::loadWorkbook("../Bridging/CES-COICOP/IDN NSES July 2008 CE Codes.xlsx")
code_item_IDN <- XLConnect::readWorksheet(wb, "Codes", header=TRUE, forceConversion=T)
names(code_item_IDN) <- c("CODE", "ITEM_DLE", "UNIT")
code_item_IDN$UNIT <- NULL
# code_item_IDN <- code_item_IDN[match(unique(code_item_IDN$ITEM_DLE), code_item_IDN$ITEM_DLE),] # There are duplicate ITEM_DLEs because they are input in two units (e.g. weight & expenditure) -> Not necessary because merge will anyhow remove these.
# WB removed all ceremonial spendings from the consumption. I put those back at '151: other services' under ICP.
# IDN_WB$ICP_SEQ[c(338, 340, 342, 343)] <- 151
### Linking NTNU COICOP with WB ICP
# XXX_WB has original survey code, ICP seq number, and survey heading (except for BRA where 'heading' is missing).
# icp_ntnu is for ICP seq number, NTNU-COICOP seq number, and ICP heading.
IDN_map <- merge(IDN_WB, icp_ntnu, by="ICP_SEQ")
IDN_map <- IDN_map[c("CODE", "Surv_Heading", "ICP_SEQ", "COICOP1", "COICOP2", "ICP_Heading", "NTNU_109")]
# IDN_map$ORD <- 1:dim(IDN_map)[1]
IDN_map <- merge(IDN_map, code_item_IDN, by="CODE")
IDN_map <- IDN_map[order(IDN_map$CODE),]
### Need to remove fuel items for direct link CES-EXIO
IDN_map <- IDN_map %>%
filter(!((COICOP1 == "04" & COICOP2 > "50") |
(COICOP1 == "07" & COICOP2 == "22" &
!grepl("Lubri", Surv_Heading, ignore.case = TRUE))))
### Create CES to ICP bridge matrices
CES_ICP_IDN <- matrix(0, length(IDN_map$CODE), max(IDN_map$ICP_SEQ))
CES_ICP_IDN[cbind(1:length(IDN_map$CODE), IDN_map$ICP_SEQ)] <- 1
row.names(CES_ICP_IDN) <- IDN_map$CODE
####################
### South Africa ###
####################
# Load ZAF_FOOD.raw and ZAF_OTH.raw
load(file="./Saved tables/ZAF_FOOD_All.Rda")
load(file="./Saved tables/ZAF_OTH_All.Rda")
### Read code-item mapping for ZAF
wb <- XLConnect::loadWorkbook("../Bridging/CES-COICOP/ZAF IES 2010-2011 CE Codes.xlsx")
code_item_ZAF <- XLConnect::readWorksheet(wb, "Sheet1", header=TRUE, forceConversion=T) %>% mutate(code=as.numeric(code))
code_item_ZAF <- rbind(ZAF_FOOD.raw %>% select(code, item) %>% distinct(),
ZAF_OTH.raw %>% select(code, item) %>% distinct()) %>% arrange(code) %>% mutate(code=as.numeric(code))
names(code_item_ZAF) <- c("CODE", "ITEM_DLE")
### Linking NTNU COICOP with WB ICP
# XXX_WB has original survey code, ICP seq number, and survey heading (except for BRA where 'heading' is missing).
# icp_ntnu is for ICP seq number, NTNU-COICOP seq number, and ICP heading.
# WB uses 66111100 instead of 66111101 (used in the survey) for 'own production' goods. (Unknown reason)
ZAF_WB0 <- ZAF_WB %>% mutate_cond(CODE > 66000000, CODE=CODE+1) %>%
mutate_cond(CODE==12711301, ICP_SEQ=151) %>% # Funeral expense
mutate_cond(CODE==12711400, ICP_SEQ=151) %>% # Gravestone
mutate_cond(CODE==7231100, ICP_SEQ=104) %>% # Maintenance and lubrication services
mutate_cond(CODE==4541301, ICP_SEQ=81) %>% # Candles = "Non-durable household goods" (think it is right to have this in fuel category as WB did, but for the ease for now.)
mutate_cond(CODE==4541710, ICP_SEQ=81) %>% # Other household fuel as "Non-durable household goods" under coicop
mutate(Surv_Heading=as.character(Surv_Heading)) %>%
rbind(list(66311801, 127, "Purchases of herding dogs")) %>%
rbind(list(88888888, 81, "Unclassified Diary Items except food"))
ZAF_map <- ZAF_WB0 %>% left_join(icp_ntnu %>% select(-NTNU_109), by="ICP_SEQ") # Do not need COICOP values any more from icp_ntnu
ZAF_map <- ZAF_map %>% select(CODE, Surv_Heading, ICP_SEQ, ICP_Heading, COICOP1, COICOP2) %>%
mutate_cond(CODE==4541301, COICOP1=5, COICOP2=61) %>% # Candles = "Non-durable household goods" (think it is right to have this in fuel category as WB did, but for the ease for now.)
mutate_cond(CODE==4541710, COICOP1=5, COICOP2=61) # Other household fuel as "Non-durable household goods" under coicop
ZAF_map <- ZAF_map %>% left_join(code_item_ZAF, by="CODE")
ZAF_map <- ZAF_map %>% arrange(CODE) %>% filter(!is.na(ICP_SEQ) & !is.na(ITEM_DLE))
ZAF_map$ITEM_DLE <- sub(";", ",", ZAF_map$ITEM_DLE)
### Need to remove fuel items for direct link CES-EXIO
ZAF_map <- ZAF_map %>%
filter(!((COICOP1 == "04" & COICOP2 > "50") |
(COICOP1 == "07" & COICOP2 == "22" & # Motor fuel
!grepl("Lubri|grease", Surv_Heading, ignore.case = TRUE)))) # Lubricant and grease will stay out of fuel.
### Create CES to ICP bridge matrices
CES_ICP_ZAF <- matrix(0, length(ZAF_map$CODE), max(ZAF_map$ICP_SEQ))
CES_ICP_ZAF[cbind(1:length(ZAF_map$CODE), ZAF_map$ICP_SEQ)] <- 1
row.names(CES_ICP_ZAF) <- ZAF_map$CODE
### TEST: Detect which CES items are classifed to UNBR.
a <- IDN_map[grep("UNBR", IDN_map$ICP_Heading),]
a <- IND_map[grep("UNBR", IND_map$ICP_Heading),]
a <- ZAF_map[grep("UNBR", ZAF_map$ICP_Heading),]